In this article, we use tweepy to set a specific tweet + a reply to the corresponding tweet. Describe the code to get. In the code below, over 100 replies were posted as an example Collects tweet and reply information.
The set of tweets and replies I got is "Id" included in the status of the tweet and "in_reply_to_status_id" included in the status of the reply The destination can be linked by collating.
Please note that the free version of the Twitter API has a limitation that tweets older than 7 days cannot be collected.
gather.py
# coding:utf-8
import tweepy
import csv
import time
from datetime import datetime, date, timedelta
import re
#Get current date information
today = datetime.today()
#Specify the range of posting time of the tweet to be acquired(Example: From 2 days ago to today)
tweet_begin_date = datetime.strftime(today - timedelta(days=2), '%Y-%m-%d_00:00:00_JST')
tweet_end_date = datetime.strftime((today), '%Y-%m-%d_23:59:00_JST')
#Specify the range of posting time of the reply to be acquired(Example: From 2 days ago to today)
reply_begin_date = datetime.strftime(today - timedelta(days=2), '%Y-%m-%d_00:00:00_JST')
reply_end_date = datetime.strftime((today), '%Y-%m-%d_23:59:00_JST')
#Acquisition result csv file output destination directory
csv_dir = '/hoge/'
# Twitter API KEY
Consumer_key = 'xxxx'
Consumer_secret = 'xxxx'
Access_token = 'xxxx'
Access_secret = 'xxxx'
#For Twitter API authentication
def authTwitter():
auth = tweepy.OAuthHandler(Consumer_key, Consumer_secret)
auth.set_access_token(Access_token, Access_secret)
api = tweepy.API(auth, retry_count=3,retry_delay=40,retry_errors=set([401, 404, 500, 502, 503, 504]), wait_on_rate_limit = True, wait_on_rate_limit_notify=True)
return(api)
#For tweet data acquisition
def gather_tweet_and_reply(s,t):
api = authTwitter() #Authentication
tweet_list = []
reply_list = []
tweet_id_list = []
user_id_list = []
tweets = tweepy.Cursor(api.search, q = s, #Search string
include_entities = True, #Get all omitted links
tweet_mode = 'extended', #Get all omitted tweets
since = tweet_begin_date, #Specifying the collection start date and time
until = tweet_end_date, #Specifying the collection end period
lang = 'ja').items() #Get only Japanese tweets
#Store searched tweets in a list
for tweet in tweets:
tweet_list.append([tweet.id, tweet.user.screen_name, tweet.created_at, tweet.full_text.replace('\n',''), tweet.favorite_count, tweet.retweet_count])
tweet_id_list.append(tweet.id)
user_id_list.append(tweet.user.screen_name)
# user_id_The user name stored in list searches for the destination reply
for user_id in user_id_list:
replies = tweepy.Cursor(api.search, q = t + " to:" + str(user_id), #Search string
include_entities = True, #Get all omitted links
tweet_mode = 'extended', #Get all omitted tweets
since = reply_begin_date, #Specifying the reply collection start date and time
until = reply_end_date, #Specifying the reply collection end date and time
lang = 'ja').items() #Get only Japanese tweets
#Countermeasures against session disconnection due to mass transmission of requests
time.sleep(5)
#The destination ID of the reply is tweet_id_If it is in list, store it in the list
for reply in replies:
if reply.in_reply_to_status_id in tweet_id_list:
reply_list.append([reply.id, reply.in_reply_to_status_id, reply.user.screen_name, reply.created_at, reply.full_text.replace('\n',''), reply.favorite_count, reply.retweet_count])
#Output result as csv
with open(csv_dir+'tweet_'+ today.strftime('%Y%m%d_%H%M%S') + '.csv', 'w',newline='',encoding='utf-8') as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerow(["id","user","created_at","text","fav","RT"])
writer.writerows(tweet_list)
pass
with open(csv_dir+'reply_'+ today.strftime('%Y%m%d_%H%M%S') + '.csv', 'w',newline='',encoding='utf-8') as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerow(["id","to_id","user","created_at","text","fav","RT"])
writer.writerows(reply_list)
pass
def main():
gather_tweet_and_reply("lang:ja exclude:retweets min_replies:100","lang:ja filter:replies exclude:retweets")
if __name__ == "__main__":
main()
・ Summary of procedures from Twitter API registration (account application method) to approval * Information as of August 2019 ・ I didn't know what I could get from the Tweepy status list, so I took it out
Recommended Posts