0. How to use MeCab with python

Do your best to install MeCab. ** If you are using 64-bit version of python (3.7 or above) from windows, don't be fooled by old information **. Installing MeCab using mecab-python-windows doesn't work. Instead, pip install mecab worked fine in one shot.
ʻimport MeCab. Note that it is not ʻimport Mecab
Make it like `print (MeCab.Tagger ('-Owakati'). parse ('Plum hahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahaha

What you get is a "long string", not a list or tuple. A little inconvenient.

1. What I did

I made a class with python. By doing MeCab.Tagger ('-Odump') in the constructor, all the information fetched by MeCab is stored in the field, and each method fetches only the necessary information from that field in regular expression and outputs it.

The code is as follows.

`MeCab_handler.py`


import re, MeCab
import numpy as np
import jaconv
from pykakasi import kakasi

class MeCab_handler:
    """
    MeCab.Tagger('-Odump').parse((Constructor arguments))And
Get the result as a one-dimensional ndarray in each method
    
    """
    def __init__(self, sentence):
        self.parse_result = MeCab.Tagger('-Odump').parse(sentence)

    def get_separated(self):
        """
Word-separation
        """
        tmp = np.array(re.findall('\n[0-9]+ ([^ ]*)', self.parse_result))
        return tmp[0:np.size(tmp)-1] #Cut EOS

    def get_words_basic(self):
        """
Uninflected word
        """
        tmp = np.array(re.findall('\n[0-9]+ [^ ]* (?:[^,]*,){6,6}([^,]*)', self.parse_result))
        return tmp[0:np.size(tmp)-1]

    def get_POS(self, need_detail=False):
        """
Part of speech
Optional argument need_If detail is True,
Subclassification(Up to 3 stages)Get it if there is
        """
        if need_detail:
            tmp = np.array(re.findall('\n[0-9]+ [^ ]* ([^,]*(?:,[^*,]+(?:,[^*,]+(?:,[^*,])?)?)?)', self.parse_result))
        else:
            tmp = np.array(re.findall('\n[0-9]+ [^ ]* ([^,]*)', self.parse_result))
            
        return tmp[0:np.size(tmp)-1] #Cut EOS

    def get_conjugation_type(self):
        """
Utilization type
        """
        tmp = np.array(re.findall('\n[0-9]+ [^ ]* (?:[^,]*,){4,4}([^,]*)', self.parse_result), dtype='object')
        tmp = np.where(tmp=='*', None, tmp)
        return tmp[0:np.size(tmp)-1]

    def get_conjugation_form(self):
        """
Inflected form
        """
        tmp = np.array(re.findall('\n[0-9]+ [^ ]* (?:[^,]*,){5,5}([^,]*)', self.parse_result))
        return tmp[0:np.size(tmp)-1]

    def get_katakana(self):
        """
Katakana
        """
        tmp = np.array(re.findall('\n[0-9]+ [^ ]* (?:[^,]*,){7,7}([^,]*)', self.parse_result))
        return tmp[0:np.size(tmp)-1]

    def get_hiragana(self):
        """
Hiragana
        """
        katakanas = self.get_katakana()
        hiraganas = np.zeros(0, dtype=katakanas.dtype)
        for katakana in katakanas:
            hiraganas = np.append(hiraganas, jaconv.kata2hira(katakana))
        return hiraganas
            
        
    def get_how_to_speak(self):
        """
How to pronounce. get_hiragana and get_It may be different from katakana etc.
Romaji
        """
        tmp = np.array(re.findall('\n[0-9]+ [^ ]* (?:[^,]*,){8,8}([^ ]*)', self.parse_result))
        katakanas = tmp[0:np.size(tmp)-1]

        kakac = kakasi()
        kakac.setMode("K", "a") #Katakana to ascii
        kakac.setMode("r", "Hepburn") #Hepburn is adopted for Romaji
        conv = kakac.getConverter()

        romans = np.zeros(0, dtype='object')        
        for katakana in katakanas:
            romans = np.append(romans, conv.do(katakana))
        return romans

I wrote the function in the source code, but it is as shown in the table below.

Method	Example (`print ('print (MeCab_handler ('The United States cried. Movie Doraemon" Nobita's Theory and Practice "). Method)`)
get_separated ()	`['National''is''crying''. "" Movie "" Doraemon "" "" "Nobita" "" Theory "" and "" Practice "" ""]`
get_words_basic ()	`['National''is''cry''". "" Movie "" Doraemon "" "" "Nobita" "" Theory "" and "" Practice "" ""]`
get_POS ()	`['noun''particle'' particle''auxiliary verb''symbol''noun''noun''symbol''particle'
get_POS (True)	`['noun, proper noun, region, one'" particle, case particle, general'' verb, independence''particle'' symbol, punctuation''noun, general' '''Noun, proper noun, person's name, first name'' Noun, generalization''Noun, general''' Noun, case particle, general''Noun, Sahen connection''Noun, parenthesis closing']`
get_conjugation_type ()	`[None None'Five-stage / Kakou Ionbin''Special / Ta'None None None None None None None None None None]`
get_conjugation_form ()	`['''''Conjugated word''Uninflected word''''''''''''''' '' '*' ]`
get_katakana ()	`['Zenbei''Ga''Nai''Ta''. "" Aiga "" Doraemon "" "" "Nobita" "No" "Lilon" "To" "Jissen" ""]`
get_hiragana ()	`['Zenbei''is''not''wa'. ''Eiga''Doraemon' '
get_how_to_speak()	`['zenbei' 'ga' 'nai' 'ta' '。' 'eiga' 'doraemon' '「' 'nobita' 'no' 'riron' 'to' 'jissen' '」']`

I made a class to get the analysis result by MeCab in ndarray with python

0. How to use MeCab with python

1. What I did

MeCab_handler.py

`MeCab_handler.py`