This is the code to extract the handle name with Beautiful soup from the article of Business Insider called "100 Most Influential Technical Twitter Users" and get the Twitter account information with Twitter REST API.
The 100 Most Influential Tech People On Twitter
Such a page
# coding: utf-8
from bs4 import BeautifulSoup
import requests,json
from requests_oauthlib import OAuth1Session
from requests.exceptions import ConnectionError, ReadTimeout, SSLError
res = requests.get("http://www.businessinsider.com/100-influential-tech-people-on-twitter-2014-4?op=1")
soup = BeautifulSoup(res.text)
count = 100
user_list = []
user = {}
for line in soup.body.get_text().split('\n'):
if line.find('Occupation:') > -1:
if len(user.keys()) != 0:
user_list.append(user)
user = {}
print count, line
user['rank'] = count
user['occupation'] = line.replace('Occupation:','').replace(u'\xa0','')
count -=1
for c in ['@','Why:','Tech PI:','PI:']:
if line.find(c) > -1:
print ' ', line
if line.find('Tech PI:') >-1 :
user['tech_pi'] = line.replace('Tech PI:','').replace(u'\xa0','')
elif line.find('PI:') >-1:
user['pi'] = line.replace('PI:','').replace(u'\xa0','')
elif line.find('@') >-1 :
user['handle'] = line.replace('Handle:','') .replace(u'\xa0','').replace(u'@','')
elif line.find('Why:') >-1 :
user['Why:'] = line.replace('Why:','').replace(u'\xa0','')
break
handle_list = [d['handle'] for d in user_list]
KEYS = { #Set the key you got with your account
'consumer_key':'**********',
'consumer_secret':'**********',
'access_token':'**********',
'access_secret''**********',
}
twitter = OAuth1Session(KEYS['consumer_key'],KEYS['consumer_secret'],
KEYS['access_token'],KEYS['access_secret'])
url = 'https://api.twitter.com/1.1/users/lookup.json?'
params = {'screen_name':','.join(handle_list)}
req = twitter.get(url, params = params)
user_list = json.loads(req.text)
for u in user_list:
d_data = json.dumps(u, sort_keys=True, indent=4)
print d_data
The output looks like this.
100 Occupation: CEO/founder of News Corporation; Creator of FOX Broadcasting
Handle: @rupertmurdoch
Why: See how tech fits into the greater news cycle from Rupert himself. Yeah, he writes his own tweets.
Tech PI: 83
PI: 86
99 Occupation: Assistant professor at the University of North Carolina, Chapel Hill with her own tech site at www.technosociology.org
Handle: @zeynep
Why: Catch Zeynep's musings on everything ranging from international Web policies to social justice.
Tech PI: 84
PI: 77
98 Occupation: Data Scientist in Residence at Accel, Scientist Emeritus at bitly, co-founder of HackNY, co-host of DataGotham, and member of NYCResistor
Handle: @hmason
Why: Hilary is on top of the chatter when it comes to today's tech news.
Tech PI: 84
PI: 77
・
・
・
The information fetched by REST API is retained in json.
"contributors_enabled": false,
"created_at": "Sat Dec 31 18:29:24 +0000 2011",
"default_profile": true,
"default_profile_image": false,
"description": "",
"entities": {
"description": {
"urls": []
}
},
"favourites_count": 13,
"follow_request_sent": false,
"followers_count": 570445,
"following": false,
"friends_count": 96,
"geo_enabled": false,
"id": 451586190,
"id_str": "451586190",
"is_translation_enabled": false,
"is_translator": false,
"lang": "en",
"listed_count": 7145,
"location": "",
"name": "Rupert Murdoch ",
"notifications": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png ",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png ",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/1732184156/Twitter_normal.jpg ",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1732184156/Twitter_normal.jpg ",
"profile_link_color": "0084B4",
"profile_location": null,
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "rupertmurdoch",
"status": {
"contributors": null,
"coordinates": null,
"created_at": "Fri Apr 10 12:33:22 +0000 2015",
"entities": {
"hashtags": [],
"symbols": [],
"urls": [],
"user_mentions": []
},
"favorite_count": 63,
"favorited": false,
"geo": null,
"id": 586507259578032128,
"id_str": "586507259578032128",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"lang": "en",
"place": null,
"retweet_count": 89,
"retweeted": false,
"source": "<a href=\"http://twitter.com/#!/download/ipad\" rel=\"nofollow\">Twitter for iPad</a>",
"text": "Guardian today suggests my dad's expose of Gallipoli fiasco led to my anti-establishment views. Maybe, but confirmed by many later \nevents.",
"truncated": false
},
"statuses_count": 1423,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": true
・
・
・
Recommended Posts