The [API specifications] of e-Stat (http://www.e-stat.go.jp/api/api-spec/) has been upgraded.
I made a script corresponding to ver.2 by referring to the post of my predecessor (Python 2.7).
Let's use the API of the official statistics counter (e-Stat)
The script before editing contains the API ID as an argument, but it is troublesome to enter it every time, so I wrote it in the code.
Also, Government statistics code is entered in stats_code.
getStatsListSample2015.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import urllib
import urllib2
from lxml import etree
import sys
import codecs
def main(argvs, argc):
if argc != 3:
print ("Usage #python %s api_key search_kind stats_code" % argvs[0])
return 1
api_key = '' #Enter the obtained API ID
search_kind = argvs[1]
stats_code = argvs[2]
stats_code = urllib.quote(stats_code.encode('utf-8'))
url = ('http://api.e-stat.go.jp/rest/2.0/app/getStatsList?appId=%s&lang=J&statsCode=%s&searchKind=%s' % (api_key, stats_code, search_kind))
req = urllib2.Request(url)
opener = urllib2.build_opener()
conn = opener.open(req)
cont = conn.read()
parser = etree.XMLParser(recover=True)
root = etree.fromstring(cont, parser)
result = root.find('RESULT')
data_list = root.find('DATALIST_INF')
table_infs = data_list.xpath('./TABLE_INF')
for table_inf in table_infs:
print ((u'--------------').encode('utf-8'))
for iterator in table_inf.getiterator():
if iterator.text is not None:
itag = iterator.tag.encode('utf-8')
itext = iterator.text.encode('utf-8')
if iterator.items() is not None:
if iterator.get('id') is not None:
print itag,iterator.get('id').encode('utf-8'),itext
elif iterator.get('code') is not None:
print itag,iterator.get('code').encode('utf-8'),itext
elif iterator.get('no') is not None:
print itag,iterator.get('no').encode('utf-8'),itext
else:
print itag,itext
if __name__ == '__main__':
argvs = sys.argv
argc = len(argvs)
sys.exit(main(argvs, argc))
Example of use:
python getStatsListSample2015.py 1 00200521 > gSLS00200521.dat
result:
gSLS00200521.dat
--------------
TABLE_INF 0000030001
STAT_NAME 00200521 Census
GOV_ORG 00200 Ministry of Internal Affairs and Communications
STATISTICS_NAME 1980 National Census 1st Basic Total National Edition
TITLE 00101 Gender (gender) (3), age 5 years (23), population nationwide / city / county / prefecture (47), whole area / densely inhabited area
CYCLE -
SURVEY_DATE 198010
OPEN_DATE 2007-10-05
SMALL_AREA 0
MAIN_CATEGORY 02 Population / Household
SUB_CATEGORY 01 Population
OVERALL_TOTAL_NUMBER 3651
UPDATED_DATE 2008-03-19
--------------
TABLE_INF 0000030002
STAT_NAME 00200521 Census
GOV_ORG 00200 Ministry of Internal Affairs and Communications
STATISTICS_NAME 1980 National Census 1st Basic Total National Edition
TITLE 00102 Gender (gender) (3), age group (103), population nationwide / city / county / prefecture (47), whole area / densely inhabited area
CYCLE -
SURVEY_DATE 198010
OPEN_DATE 2007-10-05
SMALL_AREA 0
MAIN_CATEGORY 02 Population / Household
SUB_CATEGORY 01 Population
OVERALL_TOTAL_NUMBER 16365
UPDATED_DATE 2008-03-19
--------------
・
・
・
export_csv2015.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import urllib2
from lxml import etree
import csv
def export_statical_data(writer, api_key, stats_data_id, class_object, start_position):
"""
Export stats
"""
url = ('http://api.e-stat.go.jp/rest/2.0/app/getStatsData?limit=10000&appId=%s&lang=J&statsDataId=%s&metaGetFlg=N&cntGetFlg=N' % (api_key, stats_data_id))
if start_position > 0:
url = url + ('&startPosition=%d' % start_position)
req = urllib2.Request(url)
opener = urllib2.build_opener()
conn = opener.open(req)
cont = conn.read()
parser = etree.XMLParser(recover=True)
root = etree.fromstring(cont, parser)
row = []
datas = {}
value_tags = root.xpath('//STATISTICAL_DATA/DATA_INF/VALUE')
for value_tag in value_tags:
row = []
for key in class_object:
val = value_tag.get(key)
if val in class_object[key]['objects']:
level = '';
if 'level' in class_object[key]['objects'][val]:
if class_object[key]['objects'][val]['level'].isdigit():
level = ' ' * (int(class_object[key]['objects'][val]['level']) - 1)
text = ("%s%s" % (level , class_object[key]['objects'][val]['name']))
row.append(text.encode('utf-8'))
else:
row.append(val.encode('utf-8'))
row.append(value_tag.text)
writer.writerow(row)
next_tags = root.xpath('//STATISTICAL_DATA/TABLE_INF/NEXT_KEY')
if next_tags:
if next_tags[0].text:
export_statical_data(writer, api_key, stats_data_id, class_object, int(next_tags[0].text))
def get_meta_data(api_key, stats_data_id):
"""
Get meta information
"""
url = ('http://api.e-stat.go.jp/rest/2.0/app/getMetaInfo?appId=%s&lang=J&statsDataId=%s' % (api_key, stats_data_id))
req = urllib2.Request(url)
opener = urllib2.build_opener()
conn = opener.open(req)
cont = conn.read()
parser = etree.XMLParser(recover=True)
root = etree.fromstring(cont, parser)
class_object_tags = root.xpath('//METADATA_INF/CLASS_INF/CLASS_OBJ')
class_object = {}
for class_object_tag in class_object_tags:
class_object_id = class_object_tag.get('id')
class_object_name = class_object_tag.get('name')
class_object_item = {
'id' : class_object_id,
'name' : class_object_name,
'objects' : {}
}
class_tags = class_object_tag.xpath('.//CLASS')
for class_tag in class_tags:
class_item = {
'code' : class_tag.get('code'),
'name' : class_tag.get('name'),
'level' : class_tag.get('level'),
'unit' : class_tag.get('unit')
}
class_object_item['objects'][class_item['code']] = class_item
class_object[class_object_id] = class_object_item
return class_object
def export_csv(api_key, stats_data_id, output_path):
"""
Export specified statistics to CSV.
"""
writer = csv.writer(open(output_path, 'wb'),quoting=csv.QUOTE_ALL)
class_object = get_meta_data(api_key, stats_data_id)
row = []
for key in class_object:
title = class_object[key]['name']
row.append(title.encode('utf-8'))
row.append('VALUE')
writer.writerow(row)
export_statical_data(writer, api_key, stats_data_id, class_object, 1)
def main(argvs, argc):
if argc != 2:
print ("Usage #python %s api_key stats_data_id output_path" % argvs[0])
return 1
api_key = '' #Enter the obtained API ID
stats_data_id = argvs[1]
output_path = 'e-stat' + argvs[1] + '.csv'
export_csv(api_key, stats_data_id, output_path)
if __name__ == '__main__':
argvs = sys.argv
argc = len(argvs)
sys.exit(main(argvs, argc))
Example of use:
python export_csv2015.py 0000030001
result:
e-stat0000030001.csv
"Area / concentration 030002","Age 5 years old Class A030002","Men and women A030001","Time axis(Annual)","All prefectures 030001","VALUE"
"Whole area","Total number","男女Total number","1980","Nationwide","117060396"
"Whole area","Total number","男女Total number","1980","National city","89187409"
"Whole area","Total number","男女Total number","1980","National counties","27872987"
"Whole area","Total number","男女Total number","1980","Hokkaido","5575989"
"Whole area","Total number","男女Total number","1980","Aomori Prefecture","1523907"
"Whole area","Total number","男女Total number","1980","Iwate Prefecture","1421927"
"Whole area","Total number","男女Total number","1980","Miyagi Prefecture","2082320"
"Whole area","Total number","男女Total number","1980","Akita","1256745"
"Whole area","Total number","男女Total number","1980","Yamagata Prefecture","1251917"
"Whole area","Total number","男女Total number","1980","Fukushima Prefecture","2035272"
"Whole area","Total number","男女Total number","1980","Ibaraki Prefecture","2558007"
"Whole area","Total number","男女Total number","1980","Tochigi Prefecture","1792201"
"Whole area","Total number","男女Total number","1980","Gunma Prefecture","1848562"
"Whole area","Total number","男女Total number","1980","Saitama","5420480"
"Whole area","Total number","男女Total number","1980","Chiba","4735424"
"Whole area","Total number","男女Total number","1980","Tokyo","11618281"
"Whole area","Total number","男女Total number","1980","Kanagawa Prefecture","6924348"
"Whole area","Total number","男女Total number","1980","Niigata Prefecture","2451357"
"Whole area","Total number","男女Total number","1980","Toyama Prefecture","1103459"
・
・
・
It will not work unless it is Python 2.7, so if you only have Python 3, let's build an environment.
Building Python 3.x environment with Pyenv (CentOS, Ubuntu)
Recommended Posts