Get arbitrary keywords using Twitter's Streaming API. After all, if it is home_timeline, only the people who follow it will not be able to collect data unless Streaming is used. It's been talked about a lot, but now I've touched the Streaming API.
stream.py
#!/user/bin/env python
# -*- coding: utf-8 -*-
from requests_oauthlib import OAuth1Session
import json
import requests
import time, calendar
import sys, codecs
word = raw_input(u"input: ")
C_KEY = "*************************************"
C_SECRET = "*************************************"
A_TOKEN = "*************************************"
T_SECRET = "*************************************"
URL = "https://stream.twitter.com/1.1/statuses/filter.json"
def Client_key():
return OAuth1Session(C_KEY,
client_secret = C_SECRET,
resource_owner_key = A_TOKEN,
resource_owner_secret = T_SECRET
)
def Response(client, **filter_data):
return client.post(
URL,
data = filter_data,
stream = True
)
def YmdHMS(created_at):
time_utc = time.strptime(created_at, '%a %b %d %H:%M:%S +0000 %Y')
unix_time = calendar.timegm(time_utc)
time_local = time.localtime(unix_time)
return int(time.strftime("%Y%m%d%H%M%S", time_local))
def Print_l(r):
for line in r.iter_lines():
tweet = json.loads(line)
Created_at = YmdHMS(tweet["created_at"])
User = (tweet["user"]["screen_name"].encode("utf-8"))
Name = (tweet["user"]["name"].encode("utf-8"))
Text = (tweet["text"].encode("utf-8"))
try:
if tweet["user"]["lang"] == "ja":
print "ID: ", User
print "username: ", Name
print "Text: ", Text
print Created_at
print "==" * 40
except:
pass
if __name__ == "__main__":
client = Client_key()
r = Response(client, track=word)
Print_l(r)
Since you get real-time things, if you have a strange keyword, you can't find it and there is no json object! I get angry. It's better to go get the #hashtag unilaterally.
The change of created_at to Japan time http://blog.unfindable.net/archives/4302 I used the above URL.
End the script with ctr + c.
Recommended Posts