Get the tag search results of Nico Nico Douga in XML format. It seems that there is an existing search API, but login is required and it is difficult to use. (→ Reference article: Search API specifications for Nico Nico Douga) I wrote Python code to use without login.
nc2xml.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# nc2xml.py
# Copyright (c) 2014 nezuq
# This software is released under the MIT License.
# http://opensource.org/licenses/mit-license.php
import sys
import time
import datetime
import urllib
import urllib2
from lxml import etree
argvs = sys.argv
argc = len(argvs)
#Search keyword
KEYWORD = u'Kimashitawa'
if 1 < argc:
KEYWORD = argvs[1].decode('utf-8')
#Sort criteria
#SORT_TYPE = 'n' #Latest comment posting date and time
#SORT_TYPE = 'v' #Views
#SORT_TYPE = 'm' #My list
#SORT_TYPE = 'r' #Number of comments
SORT_TYPE = 'f' #Post date and time
#SORT_TYPE = 'l' #Playback time
if 2 < argc:
SORT_TYPE = argvs[2]
#Target page number(ID acquisition start page)
MIN_PAGENUMBER = 1
if 3 < argc:
MIN_PAGENUMBER = int(argvs[3])
#Target page number(ID acquisition end page)
MAX_PAGENUMBER = 3
if 4 < argc:
MAX_PAGENUMBER = int(argvs[4])
#retrieval method
SEARCH_TYPE = 'tag' #Tag search
def main():
keyword = urllib2.quote(KEYWORD.encode("utf-8"))
ids = []
nrow_bef = -1
npage = MIN_PAGENUMBER - 1
while ((nrow_bef != len(ids)) and (npage < MAX_PAGENUMBER)):
npage += 1
uri = 'http://www.nicovideo.jp/%s/%s?sort=%s&rss=2.0&page=%d' % (SEARCH_TYPE, keyword, SORT_TYPE, npage)
print u'Inquiring about video ID:' + uri
time.sleep(1)
res = urllib2.urlopen(uri)
rss = etree.fromstring(res.read())
nrow_bef = len(ids)
ids += map((lambda x: x.text.rsplit('/', 1)[1]), rss.findall('./channel/item/link'))
query = 'http://i.nicovideo.jp/v3/video.array?v=' + ','.join(ids)
print u'Acquiring video data:' + query
d = datetime.datetime.today()
#* If the inquiry takes more than 20 seconds, it will time out and the number will be 0.
#* Issuing a load-intensive query may be an IP block or API blockade, so it is NG.
time.sleep(1)
urllib.urlretrieve(query, u'%s_%s.xml' % (KEYWORD, d.strftime('%y%m%d%H%M%S')))
print 'end!'
if __name__ == '__main__':
main()
nc2xml.Run py.
python nc2xml.py "lily" m 1 3
http://i.nicovideo.jp/v3/video.array?v=sm9720246,sm19673281
I have to analyze the data for the development of Yuri culture ... (sense of mission)
-> Nico Nico Douga search API specifications organized -> Comparison of i.nicovideo.jp API and getthumbinfo API -> I searched for Nico Nico Douga search API, so make a note -> Get the tag search results of Nico Nico Douga by RSS
Recommended Posts