――Create Word Cloud with as few steps as possible. (Please when it is enough to make it once)
--Perform morphological analysis with janome
--If it is MeCab
, it is necessary to set it so that it can be seen from python
.
--Font settings for Word Cloud
Library setup If you have Anaconda, is it okay in such a place?
pip install janome
pip install wordcloud
import pandas as pd
from janome.tokenizer import Tokenizer
import matplotlib.pyplot as plt
from wordcloud import WordCloud
--Prepare such data (sample.csv
). Although I say csv, I don't use commas, but ...
sample.csv
Lion may be a panda
No, that's not the case
:
##Data read
df = pd.read_csv('sample.csv', header=None)
##Give a title
df.colums = ['sentences']
##Function group definition
def get_nouns(sentence, noun_list):
for token in t.tokenize(sentence):
split_token = token.part_of_speech.split(',')
##Extract general nouns
if split_token[0] == 'noun' and split_token[1] == 'General':
noun_list.append(token.surface)
def depict_word_cloud(noun_list):
##Space-separate noun list elements(word_cloud specifications)
noun_space = ' '.join(map(str, noun_list))
##word cloud settings(Font settings)
wc = WordCloud(background_color="white", font_path=r"C:/WINDOWS/Fonts/msgothic.ttc", width=300,height=300)
wc.generate(noun_space)
##Specifying the size of the output image
plt.figure(figsize=(5,5))
##Delete scale
plt.tick_params(labelbottom=False,
labelleft=False,
labelright=False,
labeltop=False,
length=0)
##Display of word cloud
plt.imshow(wc)
plt.show()
##Preparation for morphological analysis
t = Tokenizer()
noun_list = []
for sentence in list(df['sentences']):
get_nouns(sentence, noun_list)
depict_word_cloud(noun_list)
Success if this happens:
Recommended Posts