Hit the twitter API with python3, Morphological analysis with MeCab Create a word cloud
-Install anaconda -MeCab and wordcloud --twitter api
get_tweets.py
# coding: utf-8
import requests
from requests_oauthlib import OAuth1Session
import json
import csv
CK = "hhhhhhhhhhhhhhhhhhhhhh"
CS = "oooooooooooooooooooooooooooooooooooooooooo"
AT = "gggggggggggggggggggggggggggggggggggggggggggggggggg"
AS = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
oauth = OAuth1Session(CK, CS, AT, AS)
# url = "https://stream.twitter.com/1.1/statuses/filter.json" # get stream tweets #This may not have worked
# url = "https://stream.twitter.com/1.1/statuses/sample.json" # get sample tweets #This may not have worked
# url = "https://api.twitter.com/1.1/statuses/update.json" # post a tweet
# url = "https://api.twitter.com/1.1/search/tweets.json?" # search tweets
#You should be able to get the tweet of a specific public user by changing the username below(I wonder if I have to follow?)
url = "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=username" # @Is unnecessary
params = {
	# "track": "a"
	# "lang": "ja"
	"count": "200"
	# "status": "Hello, world!"
}
req = oauth.get(
	url,
	# stream = True,
	params = params
	)
twitter = json.loads(req.text)
maxid = twitter[0]["id"] + 1
c = 0
tweets_list = []
for i in range(3):
	print(i)
	params = {
		"count": "200",
		"max_id": maxid
	}
	req = oauth.get(
		url,
		# max_id = maxid,
		params = params
		)
	twitter = json.loads(req.text)
	for tweet in twitter:
		tweets_list.append([
			c,
			tweet["id"],
			tweet["created_at"],
			tweet["text"]
			])
		maxid = tweet["id"] - 1
		c += 1
with open("tweets.csv", "w") as f:
	writer = csv.writer(f, lineterminator="\n")
	writer.writerow(tweets_list)
It is assumed that "@" and "RT" are included in the judgment of reply and retweet. If this process is unnecessary, you can write in list comprehension notation
wordcloud_tweets.py
import MeCab
from os import path
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import csv
stop_words = ["To do", "Absent", "Become", "Already", "Shiyo", "Can", "Became", "Ku", "Finally", "is there", "May", "think", "today"]
pos_list = [10, 11, 31, 32, 34]
pos_list.extend(list(range(36,50)))
pos_list.extend([59, 60, 62, 67])
def create_mecab_list(text):
	mecab_list = []
	mecab = MeCab.Tagger("-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
	mecab.parse("")
	# encoding = text.encode('utf-8')
	node = mecab.parseToNode(text)
	while node:
		# for sw in stop_words:
		# 	if node.surface == sw:
		# 		node = node.next
		if len(node.surface) > 1:
			if node.posid in pos_list:
				morpheme = node.surface
				mecab_list.append(morpheme)
		node = node.next
	return mecab_list
text_tweet = []
with open("./tweets.csv", "r") as file:
	reader = csv.reader(file)
	for tweets_text in reader:
		tweets_list = csv.reader(tweets_text)
		for ele in tweets_list:
			if "@" in ele[3]:
				continue
			if "RT" in ele[3]:
				continue
			text_tweet.append(ele[3])
text = "".join(text_tweet)
string = " ".join(create_mecab_list(text))#.decode("utf-8")
fpath = "/Library/Fonts/Hiragino Maru Go ProN W4.ttc"
wordcloud = WordCloud(
	background_color="black",
	stopwords=set(stop_words),
	max_font_size=56,
	relative_scaling=.4,
	width=500,
	height=300,
	font_path=fpath
	).generate(string)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud.to_file("./wordcloud.png ")
Recommended Posts