--Download case data (PDF) - http://www.courts.go.jp/app/hanrei_jp/search1 --Convert PDF to text using Automator - http://qiita.com/yuki_bg/items/2e6efe93992d83752312 --After that, install MeCab, wordcloud, etc. -Install anaconda Maybe you need to --Clone the strong mecab dictionary (neologd)
(zsh)
brew install mecab mecab-ipadic
pip3.5 install mecab-python3
pip3.5 install wordcloud
pip3.5 install numpy Pillow matplotlib #Libraries required to use wordcloud
#brew install numpy # error
#brew install homebrew/python/numpy # smthngs wrong...
#sudo xcode-select --install # doesnt work...
###MeCab dictionary with new words added"mecab-ipadic-neologd"Get
cd /usr/local/lib/mecab/dic
git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git
./bin/install-mecab-ipadic-neologd -n
wordcloud.py
import MeCab
from os import path
from wordcloud import WordCloud
import matplotlib.pyplot as plt
pos_list = [10, 11, 31, 32, 34]
pos_list.extend(list(range(36,50)))
pos_list.extend([59, 60, 62, 67])
def create_mecab_list(text):
mecab_list = []
mecab = MeCab.Tagger("-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
mecab.parse("")
# encoding = text.encode('utf-8')
node = mecab.parseToNode(text)
while node:
if len(node.surface) > 1:
if node.posid in pos_list:
morpheme = node.surface
mecab_list.append(morpheme)
node = node.next
return mecab_list
with open("./086064_hanrei_utf8.txt", "r") as file:
hanrei = file.read()
string = " ".join(create_mecab_list(hanrei))#.decode("utf-8")
fpath = "/Library/Fonts/Hiragino Maru Go ProN W4.ttc"
wordcloud = WordCloud(
# background_color="white",
max_font_size=40,
relative_scaling=.5,
# width=900,
# height=500,
font_path=fpath
).generate(string)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
(zsh)
python3 wordcloud.py
Recommended Posts