About e-Stat API (ver.2)

The [API specifications] of e-Stat (http://www.e-stat.go.jp/api/api-spec/) has been upgraded.

Creating a python script

I made a script corresponding to ver.2 by referring to the post of my predecessor (Python 2.7).

Let's use the API of the official statistics counter (e-Stat)

The script before editing contains the API ID as an argument, but it is troublesome to enter it every time, so I wrote it in the code.

Also, Government statistics code is entered in stats_code.

Search for statistical table information

`getStatsListSample2015.py`


#!/usr/bin/python
# -*- coding: utf-8 -*-
import urllib
import urllib2
from lxml import etree
import sys
import codecs

def main(argvs, argc):
 if argc != 3:
  print ("Usage #python %s api_key search_kind stats_code" % argvs[0])
  return 1
 api_key = '' #Enter the obtained API ID
 search_kind = argvs[1]
 stats_code = argvs[2]
 stats_code = urllib.quote(stats_code.encode('utf-8'))

 url = ('http://api.e-stat.go.jp/rest/2.0/app/getStatsList?appId=%s&lang=J&statsCode=%s&searchKind=%s' % (api_key, stats_code, search_kind))
 req = urllib2.Request(url)
 opener = urllib2.build_opener()
 conn = opener.open(req)
 cont = conn.read()
 parser = etree.XMLParser(recover=True)
 root = etree.fromstring(cont, parser)
 result = root.find('RESULT')
 data_list = root.find('DATALIST_INF')
 table_infs = data_list.xpath('./TABLE_INF')

 for table_inf in table_infs:
  print ((u'--------------').encode('utf-8'))
  for iterator in table_inf.getiterator():
   if iterator.text is not None:
    itag  = iterator.tag.encode('utf-8')
    itext = iterator.text.encode('utf-8')
   if iterator.items() is not None:
    if iterator.get('id') is not None:
     print itag,iterator.get('id').encode('utf-8'),itext
    elif iterator.get('code') is not None:
     print itag,iterator.get('code').encode('utf-8'),itext
    elif iterator.get('no') is not None:
     print itag,iterator.get('no').encode('utf-8'),itext
    else:
     print itag,itext


if __name__ == '__main__':
 argvs = sys.argv
 argc = len(argvs)
 sys.exit(main(argvs, argc))

Example of use:

python getStatsListSample2015.py 1 00200521 > gSLS00200521.dat

result:

`gSLS00200521.dat`


--------------
TABLE_INF 0000030001 
            
STAT_NAME 00200521 Census
GOV_ORG 00200 Ministry of Internal Affairs and Communications
STATISTICS_NAME 1980 National Census 1st Basic Total National Edition
TITLE 00101 Gender (gender) (3), age 5 years (23), population nationwide / city / county / prefecture (47), whole area / densely inhabited area
CYCLE -
SURVEY_DATE 198010
OPEN_DATE 2007-10-05
SMALL_AREA 0
MAIN_CATEGORY 02 Population / Household
SUB_CATEGORY 01 Population
OVERALL_TOTAL_NUMBER 3651
UPDATED_DATE 2008-03-19
--------------
TABLE_INF 0000030002 
            
STAT_NAME 00200521 Census
GOV_ORG 00200 Ministry of Internal Affairs and Communications
STATISTICS_NAME 1980 National Census 1st Basic Total National Edition
TITLE 00102 Gender (gender) (3), age group (103), population nationwide / city / county / prefecture (47), whole area / densely inhabited area
CYCLE -
SURVEY_DATE 198010
OPEN_DATE 2007-10-05
SMALL_AREA 0
MAIN_CATEGORY 02 Population / Household
SUB_CATEGORY 01 Population
OVERALL_TOTAL_NUMBER 16365
UPDATED_DATE 2008-03-19
--------------
・
・
・

Output statistical table as csv

`export_csv2015.py`


#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import urllib2
from lxml import etree
import csv

def export_statical_data(writer, api_key, stats_data_id, class_object, start_position):
    """
Export stats
    """
    url = ('http://api.e-stat.go.jp/rest/2.0/app/getStatsData?limit=10000&appId=%s&lang=J&statsDataId=%s&metaGetFlg=N&cntGetFlg=N' % (api_key, stats_data_id))
    if start_position > 0:
        url = url + ('&startPosition=%d' % start_position)

    req = urllib2.Request(url)
    opener = urllib2.build_opener()
    conn = opener.open(req)
    cont = conn.read()
    parser = etree.XMLParser(recover=True)
    root = etree.fromstring(cont, parser)

    row = []
    datas = {}
    value_tags = root.xpath('//STATISTICAL_DATA/DATA_INF/VALUE')
    for value_tag in value_tags:
        row = []
        for key in class_object:
            val = value_tag.get(key)
            if val in class_object[key]['objects']:
                level = '';
                if 'level' in class_object[key]['objects'][val]:
                    if class_object[key]['objects'][val]['level'].isdigit():
                        level = ' ' * (int(class_object[key]['objects'][val]['level']) - 1)
                text = ("%s%s" % (level , class_object[key]['objects'][val]['name']))
                row.append(text.encode('utf-8'))
            else:
                row.append(val.encode('utf-8'))
        row.append(value_tag.text)
        writer.writerow(row)

    next_tags = root.xpath('//STATISTICAL_DATA/TABLE_INF/NEXT_KEY')
    if next_tags:
        if next_tags[0].text:
            export_statical_data(writer, api_key, stats_data_id, class_object, int(next_tags[0].text))

def get_meta_data(api_key, stats_data_id):
    """
Get meta information
    """
    url = ('http://api.e-stat.go.jp/rest/2.0/app/getMetaInfo?appId=%s&lang=J&statsDataId=%s' % (api_key, stats_data_id))
    req = urllib2.Request(url)
    opener = urllib2.build_opener()
    conn = opener.open(req)
    cont = conn.read()
    parser = etree.XMLParser(recover=True)
    root = etree.fromstring(cont, parser)
    class_object_tags = root.xpath('//METADATA_INF/CLASS_INF/CLASS_OBJ')
    class_object = {}

    for class_object_tag in class_object_tags:
        class_object_id = class_object_tag.get('id')
        class_object_name = class_object_tag.get('name')
        class_object_item = {
            'id' : class_object_id,
            'name' : class_object_name,
            'objects' : {}
        }
        class_tags = class_object_tag.xpath('.//CLASS')
        for class_tag in class_tags:
            class_item = {
                'code' : class_tag.get('code'),
                'name' : class_tag.get('name'),
                'level' : class_tag.get('level'),
                'unit' : class_tag.get('unit')
            }
            class_object_item['objects'][class_item['code']] = class_item
        class_object[class_object_id] = class_object_item
    return class_object

def export_csv(api_key, stats_data_id, output_path):
    """
Export specified statistics to CSV.
    """
    writer = csv.writer(open(output_path, 'wb'),quoting=csv.QUOTE_ALL)

    class_object = get_meta_data(api_key, stats_data_id)
    row = []
    for key in class_object:
        title = class_object[key]['name']
        row.append(title.encode('utf-8'))
    row.append('VALUE')
    writer.writerow(row)

    export_statical_data(writer, api_key, stats_data_id, class_object, 1)

def main(argvs, argc):
    if argc != 2:
        print ("Usage #python %s api_key stats_data_id output_path" % argvs[0])
        return 1
    api_key = '' #Enter the obtained API ID
    stats_data_id = argvs[1]
    output_path = 'e-stat' + argvs[1] + '.csv'
    export_csv(api_key, stats_data_id, output_path)

if __name__ == '__main__':
    argvs = sys.argv
    argc = len(argvs)
    sys.exit(main(argvs, argc))

Example of use:

python export_csv2015.py 0000030001

result:

`e-stat0000030001.csv`


"Area / concentration 030002","Age 5 years old Class A030002","Men and women A030001","Time axis(Annual)","All prefectures 030001","VALUE"
"Whole area","Total number","男女Total number","1980","Nationwide","117060396"
"Whole area","Total number","男女Total number","1980","National city","89187409"
"Whole area","Total number","男女Total number","1980","National counties","27872987"
"Whole area","Total number","男女Total number","1980","Hokkaido","5575989"
"Whole area","Total number","男女Total number","1980","Aomori Prefecture","1523907"
"Whole area","Total number","男女Total number","1980","Iwate Prefecture","1421927"
"Whole area","Total number","男女Total number","1980","Miyagi Prefecture","2082320"
"Whole area","Total number","男女Total number","1980","Akita","1256745"
"Whole area","Total number","男女Total number","1980","Yamagata Prefecture","1251917"
"Whole area","Total number","男女Total number","1980","Fukushima Prefecture","2035272"
"Whole area","Total number","男女Total number","1980","Ibaraki Prefecture","2558007"
"Whole area","Total number","男女Total number","1980","Tochigi Prefecture","1792201"
"Whole area","Total number","男女Total number","1980","Gunma Prefecture","1848562"
"Whole area","Total number","男女Total number","1980","Saitama","5420480"
"Whole area","Total number","男女Total number","1980","Chiba","4735424"
"Whole area","Total number","男女Total number","1980","Tokyo","11618281"
"Whole area","Total number","男女Total number","1980","Kanagawa Prefecture","6924348"
"Whole area","Total number","男女Total number","1980","Niigata Prefecture","2451357"
"Whole area","Total number","男女Total number","1980","Toyama Prefecture","1103459"
・
・
・

Those who only have Python 3

It will not work unless it is Python 2.7, so if you only have Python 3, let's build an environment.

Building Python 3.x environment with Pyenv (CentOS, Ubuntu)

Creating a Python script that supports the e-Stat API (ver.2)

About e-Stat API (ver.2)

Creating a python script

Search for statistical table information

getStatsListSample2015.py

gSLS00200521.dat