As the title suggests, it is a script for downloading images with specific keywords using Twitter's search API.
https://github.com/Code-Hex/twippai Here is the reference.
Execution environment OS:OX X EI Capitan(10.11.5) Python:2.7.10
In addition to the standard library, you need to install these. requests requests_oauthlib
#!/usr/bin/env/python
# coding: utf-8
import os
import json
import time
import requests
from hashlib import md5
from requests_oauthlib import OAuth1
def md5hex(str):
a = md5()
a.update(str)
return a.hexdigest()
folder = './data'
#Returns True if the path exists
path = os.path.exists(folder)
if not path:
os.mkdir(folder)
consumer_key = ''#your key
consumer_secret = ''#your key
access_token = ''#your key
access_token_secret = ''#your key
#See below for Twitter API
#https://syncer.jp/twitter-api-matome/get/search/tweets
url = 'https://api.twitter.com/1.1/search/tweets.json'
oauth = OAuth1(consumer_key,consumer_secret,
access_token,access_token_secret)
#Specifying the signature method. It worked without
#signaturemethod = 'HMAC-SHA1')
idnum = ''
count = 1
while count <= 10:
#You can also use Twitter search operators
query_form = {'q' : u'Ditto filter:images min_retweets:1',
'lang' : 'ja',
'count' : 100,
'result_type' : 'recent',
'max_id' : idnum}
uri = requests.get(url, auth = oauth, params = query_form)
json_loads = json.loads(uri.content)
for data in json_loads['statuses']:
if 'media' not in data['entities']:
continue
else:
urls = data['entities']['media']
media_urls = urls[0]['media_url']#Get the URL of the image
downloads = requests.get(media_urls).content #Image DL
print media_urls + " " + str([count])
#% S is replaced as a string
#The URL is returned as a character string in hexadecimal format and used as the file name.?
filename = '%s.jpg' % md5hex(urls[0]['media_url'])
filepath = '%s/%s' % (folder, filename)
images = open(filepath, 'wb')
images.write(downloads)
images.close()
count += 1
idum = data['id']
time.sleep(1)
Recommended Posts