Get Qiita article information with Qiita API, which article and which tag is often seen? I want to check etc. As the first step, try registering the information acquired by API in mongoDB
This time's content is written in Python. For the acquisition of article information, I referred to the following article.
Try using Qiita API from Python
import requests
import logging
import json
formatter = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
logging.basicConfig(level=logging.WARNING, format=formatter)
logger = logging.getLogger(__name__)
class GetQiitaInfo(object):
def __init__(self):
self.token = 'your token'
def get_next_url(self, response):
"" "If there is a next page, the url is included as'rel =" next "', so the url is extracted and returned.
If not, returns None.
link: <https://qiita.com/api/v2/authenticated_user/items?page=1>;
rel="first", <https://qiita.com/api/v2/authenticated_user/items?page=2>;
rel="next", <https://qiita.com/api/v2/authenticated_user/items?page=4>;
rel="last"
:param response:
: return: Next url
"""
link = response.headers['link']
if link is None:
return None
links = link.split(',')
for link in links:
if 'rel="next"' in link:
return link[link.find('<') + 1:link.find('>')]
return None
def get_items(self):
"" "Pagination to get all articles,
Since the number of stocks and the number of views are not included in the list, the information is also added and returned.
:param token:
: return: List of articles
"""
url = 'https://qiita.com/api/v2/authenticated_user/items'
headers = {'Authorization': 'Bearer {}'.format(self.token)}
items = []
while True:
response = requests.get(url, headers=headers)
response.raise_for_status()
items.extend(json.loads(response.text))
logger.info('GET {}'.format(url))
# Check if there is the following url
url = self.get_next_url(response)
if url is None:
break
# Get and add view and stock information for each article
# page_views_count has a field in the list API but null is returned
for item in items:
#Number of views
url = 'https://qiita.com/api/v2/items/{}'.format(item['id'])
logger.info('GET {}'.format(url))
response = requests.get(url, headers=headers)
response.raise_for_status()
itemJson = json.loads(response.text)
item['page_views_count'] = itemJson['page_views_count']
item['tag1'] = itemJson['tags'][0]['name']
item['tag2'] = itemJson['tags'][1]['name'] if len(itemJson['tags']) >= 2 else ''
item['tag3'] = itemJson['tags'][2]['name'] if len(itemJson['tags']) >= 3 else ''
item['tag4'] = itemJson['tags'][3]['name'] if len(itemJson['tags']) >= 4 else ''
item['tag5'] = itemJson['tags'][4]['name'] if len(itemJson['tags']) >= 5 else ''
tag_list = []
for i in range(len(itemJson['tags'])):
tag_list.append(itemJson['tags'][i]['name'])
item['tag_list'] = tag_list
#Stock quantity
url = 'https://qiita.com/api/v2/items/{}/stockers'.format(item['id'])
logger.info('GET {}'.format(url))
response = requests.get(url, headers=headers)
response.raise_for_status()
users = json.loads(response.text)
for user in users:
logger.info({
'id': user['id'],
'name': user['name']
})
item['stocks_count'] = len(users)
return items
For my own study, I made two changes from the article I referred to. ・ Classed -Added tag1 to tag5 and tag_list
I wrote an article about mongoDB operation before, but it remains the same.
reference Operating mongodb with Python-Part 4: insert-
from pymongo import MongoClient
class MongoSample(object):
def __init__(self, dbName, collectionName):
self.client = MongoClient()
self.db = self.client [dbName] #Set DB name
self.collection = self.db.get_collection(collectionName)
def find_one(self, projection=None,filter=None, sort=None):
return self.collection.find_one(projection=projection,filter=filter,sort=sort)
def find(self, projection=None,filter=None, sort=None):
return self.collection.find(projection=projection,filter=filter,sort=sort)
def insert_one(self, document):
return self.collection.insert_one(document)
def insert_many(self, documents):
return self.collection.insert_many(documents)
from get_qiita_info import GetQiitaInfo
from mongo_sample import MongoSample
# Get Qiita article information
qiita = GetQiitaInfo()
items = qiita.get_items()
arg1:DB Name
arg2:Collection Name
mongo = MongoSample("db", "qiita")
# If you don't delete unnecessary key values
mongo.insert_many(items)
# Bulk registration with
for item in items:
# rendered_body / body is unnecessary, so delete it
item.pop("rendered_body")
item.pop("body")
# Register one by one
mongo.insert_one(item)
result = mongo.find_one()
print(result)
Let's take a look at mongoDB after executing the above code.
> db.qiita.findOne()
{
"_id" : ObjectId("5e38ff43c92e7c532aeffb47"),
"coediting" : false,
"comments_count" : 0,
"created_at" : "2020-02-04T13:37:44+09:00",
"group" : null,
"id" : "331ae2289a95f5a9b901",
"likes_count" : 0,
"private" : false,
"reactions_count" : 0,
"tags" : [
{
"name" : "Python",
"versions" : [ ]
},
{
"name" : "Python3",
"versions" : [ ]
}
],
"title": "[Python] No value for argument'self' in unbound method call",
"updated_at" : "2020-02-04T13:37:44+09:00",
"url" : "https://qiita.com/bc_yuuuuuki/items/331ae2289a95f5a9b901",
"user" : {
"description": "I'm learning blockchain / AI / Python / Golang / MongoDB, etc. \ r \ nThe content posted on this site is my own opinion, not necessarily my position and strategy in my organization / company. , Not representative of opinion. ",,
"facebook_id" : "",
"followees_count" : 0,
"followers_count" : 2,
"github_login_name" : null,
"id" : "bc_yuuuuuki",
"items_count" : 28,
"linkedin_id" : "",
"location" : "",
"name" : "",
"organization" : "",
"permanent_id" : 476876,
"profile_image_url" : "https://pbs.twimg.com/profile_images/1157834557783072768/ktpc9kGV_bigger.jpg ",
"team_only" : false,
"twitter_screen_name" : "bc_yuuuuuki",
"website_url" : ""
},
"page_views_count" : 54,
"tag1" : "Python",
"tag2" : "Python3",
"tag_list" : [
"Python",
"Python3"
],
"stocks_count" : 0
}
I was able to confirm that it was registered.
In this code, the API acquisition result has been slightly modified, but it is convenient to be able to search and aggregate the JSON acquired by hitting the API without thinking about anything.