Execute the following command.
!apt-get -q -y install sudo file mecab libmecab-dev mecab-ipadic-utf8 git curl python-mecab > /dev/null
!git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git > /dev/null
!echo yes | mecab-ipadic-neologd/bin/install-mecab-ipadic-neologd -n > /dev/null 2>&1
!pip install mecab-python3 > /dev/null
!ln -s /etc/mecabrc /usr/local/etc/mecabrc
!echo `mecab-config --dicdir`"/mecab-ipadic-neologd"
import MeCab
path = "-d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd"
tagger = MeCab.Tagger(path)
text = 'Devil's blade is in vogue.'
node = tagger.parseToNode(text)
while node:
print(node.feature)
node = node.next
#Below, the output result
# BOS/EOS,*,*,*,*,*,*,*,*
#noun,Proprietary noun,General,*,*,*,Devil's Blade,Kimetsu no Yaiba,Kimetsu no Yaiba
#symbol,Comma,*,*,*,*,、,、,、
#verb,Independence,*,*,Five steps, La line,Continuous connection,Popular,Haya,Haya
#verb,Non-independent,*,*,One step,Uninflected word,Teru,Teru,Teru
#symbol,Punctuation,*,*,*,*,。,。,。
# BOS/EOS,*,*,*,*,*,*,*,*
The following information can be obtained from node.
--surface: split word --posid: Part of speech ID --feature: Detailed information
Recommended Posts