Regional mesh statistics divide the area into mesh areas without gaps based on latitude and longitude, and organize the statistical data into each area.
demo: http://needtec.sakura.ne.jp/estat/population http://needtec.sakura.ne.jp/tokuraku/passenger.html
The information on which this data is based is obtained from the Official Statistics Office (estat). I used to draw with matplotlib in Python, but now I can display it in a browser.
** Let's use the API of the official statistics counter (e-Stat) ** http://qiita.com/mima_ita/items/44f358dc1bc4000d365d
Please refer to the following for details on how to obtain the regional mesh. http://www.stat.go.jp/data/mesh/pdf/gaiyo1.pdf
It is too late to contact the official statistics counter every time you draw, so the procedure up to data drawing is divided as follows.
Perform these steps in Python on the server side and JavaScript on the client side.
The actual code can be obtained from the following. https://github.com/mima3/estat
Please refer to the following for the explanation of spatialite. ** Try using SpatiaLite to store spatial information such as maps in SQLite ** http://qiita.com/mima_ita/items/64f6c2b8bb47c4b5b391
The implementation of the database that stores the statistics introduced here is the following code. https://github.com/mima3/estat/blob/master/estat_db.py
The table for storing the estat regional mesh is as follows.
** Table name: Stat ** A table that stores information about statistics
Column name | Mold | Description |
---|---|---|
stat_id | TextField | Primary key. Statistics ID |
stat_name | TextField | Government statistics name |
stat_name_code | TextField | Government statistics name code |
gov_org | TextField | Name of creator |
gov_org_code | TextField | Creation institution name code |
survey_date | TextField | Survey date |
title | TextField | Statistical table title |
** Table name: StatValue ** Store the value of each statistic
Column name | Mold | Description |
---|---|---|
id | PrimaryKeyField | Auto-incrementing ID |
stat_id | TextField | Statistics ID |
value | TextField | Statistics value |
** Table name: StatValueAttr ** Stores the attribute value associated with the value of each statistic
Column name | Mold | Description |
---|---|---|
id | PrimaryKeyField | Auto-incrementing ID |
stat_id | TextField | Statistics ID |
stat_value_id | INT | Foreign key for StatValue id |
attr_name | TextField | Attribute name |
attr_value | TextField | Attribute value |
** Table name: MapArea ** Store Polygon in attribute area
Column name | Mold | Description |
---|---|---|
id | PrimaryKeyField | Auto-incrementing ID |
stat_id | TextField | Statistics ID |
stat_val_attr_id | INT | ID of StatValueAttr |
geometry | Polygon | Geometry information |
The actual code when creating with peewee in python is as follows.
estat_db.py
# -*- coding: utf-8 -*-
import os
from peewee import *
from playhouse.sqlite_ext import SqliteExtDatabase
from estat_go_jp import *
import jpgrid
database_proxy = Proxy() # Create a proxy for our db.
SRID = 4326
class PolygonField(Field):
db_field = 'polygon'
class PointField(Field):
db_field = 'point'
class LineStringField(Field):
db_field = 'linestring'
class MultiPolygonField(Field):
db_field = 'multipolygon'
class MultiPointField(Field):
db_field = 'multipoint'
class MultiLineStringField(Field):
db_field = 'multilinestring'
class Stat(Model):
"""
Model for storing statistics
"""
stat_id = TextField(primary_key=True)
stat_name = TextField()
stat_name_code = TextField()
gov_org = TextField()
gov_org_code = TextField()
#statistics_name = TextField()
#cycle = TextField()
survey_date = TextField()
#open_date = TextField()
#small_area = TextField()
title = TextField()
class Meta:
database = database_proxy
class StatValue(Model):
"""
A model for storing statistical values
"""
id = PrimaryKeyField()
stat_id = TextField(index=True, unique=False)
value = TextField()
class Meta:
database = database_proxy
class StatValueAttr(Model):
"""
Model for storing statistical attribute values
"""
id = PrimaryKeyField()
stat_id = TextField()
stat_value = ForeignKeyField(
db_column='stat_value_id',
rel_model=StatValue,
to_field='id'
)
attr_name = TextField()
attr_value = TextField()
class Meta:
database = database_proxy
indexes = (
(('stat_id', 'stat_value'), False),
(('stat_id', 'stat_value', 'attr_name'), False),
)
class MapArea(Model):
"""
Model to store Polygon in the attribute value area of statistics
"""
id = PrimaryKeyField()
stat_id = TextField()
stat_val_attr = ForeignKeyField(
db_column='stat_val_attr_id',
rel_model=StatValueAttr,
to_field='id'
)
geometry = PolygonField()
class Meta:
database = database_proxy
indexes = (
(('stat_id', 'stat_val_attr'), True),
)
class idx_MapArea_Geometry(Model):
pkid = PrimaryKeyField()
xmin = FloatField()
xmax = FloatField()
ymin = FloatField()
ymax = FloatField()
class Meta:
database = database_proxy
def connect(path, spatialite_path, evn_sep=';'):
"""
Connect to database
@param path sqlite path
@param spatialite_path mod_Path to spatialite
@param env_sep Environment variable PATH connection character WINDOWS;LINUX:
"""
os.environ["PATH"] = os.environ["PATH"] + evn_sep + os.path.dirname(spatialite_path)
db = SqliteExtDatabase(path)
database_proxy.initialize(db)
db.field_overrides = {
'polygon': 'POLYGON',
'point': 'POINT',
'linestring': 'LINESTRING',
'multipolygon': 'MULTIPOLYGON',
'multipoint': 'MULTIPOINT',
'multilinestring': 'MULTILINESTRING',
}
db.load_extension(os.path.basename(spatialite_path))
def setup(path, spatialite_path, evn_sep=';'):
"""
Creating a database
@param path sqlite path
@param spatialite_path mod_Path to spatialite
@param env_sep Environment variable PATH connection character WINDOWS;LINUX:
"""
connect(path, spatialite_path, evn_sep)
database_proxy.create_tables([Stat, StatValue, StatValueAttr], True)
database_proxy.get_conn().execute('SELECT InitSpatialMetaData()')
#Geometry table needs to be implemented directly.
database_proxy.get_conn().execute("""
CREATE TABLE IF NOT EXISTS "MapArea" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"stat_id" TEXT,
"stat_val_attr_id" INTEGER ,
FOREIGN KEY(stat_val_attr_id) REFERENCES StatValueAttr(id));
""")
database_proxy.get_conn().execute("""
CREATE INDEX IF NOT EXISTS "ix_MapArea_stat_id" ON MapArea(stat_id);
""")
database_proxy.get_conn().execute("""
CREATE INDEX IF NOT EXISTS "ix_MapArea_stat_id_stat_val_attr_id" ON MapArea(stat_id, stat_val_attr_id);
""")
database_proxy.get_conn().execute("""
Select AddGeometryColumn ("MapArea", "Geometry", ?, "POLYGON", 2);
""", (SRID,))
database_proxy.get_conn().execute("""
SELECT CreateSpatialIndex("MapArea", "geometry")
""")
I am creating a table with the setup function. If you don't include geometry, peewee will generate a table with the following code.
database_proxy.create_tables([Stat, StatValue, StatValueAttr], True)
But if you want to include a column of geometry, you have to implement it yourself, like this:
#Geometry table needs to be implemented directly.
database_proxy.get_conn().execute("""
CREATE TABLE IF NOT EXISTS "MapArea" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"stat_id" TEXT,
"stat_val_attr_id" INTEGER ,
FOREIGN KEY(stat_val_attr_id) REFERENCES StatValueAttr(id));
""")
database_proxy.get_conn().execute("""
CREATE INDEX IF NOT EXISTS "ix_MapArea_stat_id" ON MapArea(stat_id);
""")
database_proxy.get_conn().execute("""
CREATE INDEX IF NOT EXISTS "ix_MapArea_stat_id_stat_val_attr_id" ON MapArea(stat_id, stat_val_attr_id);
""")
database_proxy.get_conn().execute("""
Select AddGeometryColumn ("MapArea", "Geometry", ?, "POLYGON", 2);
""", (SRID,))
database_proxy.get_conn().execute("""
SELECT CreateSpatialIndex("MapArea", "geometry")
""")
In this code, after creating a table containing columns other than geometry, add a column of geometry with "AddGeometryColumn" and index it with "Create SpatialIndex".
The index created for the geometry is a virtual table and cannot be used by Peewee. When using the index, you need to write the SQL yourself.
In the eStat region mesh, one statistic is divided for each primary mesh and has a statistic ID. For example, the following statistical IDs can be obtained from the statistical information of "2010 Census-World Geodetic System (1KM mesh) 20101001".
・ T000608M3622 ・ T000608M3623 ・ T000608M3624 ・ T000608M3653 Abbreviation
This ID is created from "T000608" and the primary mesh code.
In other words, the following steps are required to obtain regional mesh statistics.
The function to execute the getStatesList API to get all the statistical IDs related to "2010 Census-World Geodetic System (1KM Mesh) 20101001" is implemented as follows.
Reference source: https://github.com/mima3/estat/blob/master/estat_go_jp.py
estat_go_jp.py
def get_stats_list(api_key, search_kind, key_word):
"""
Search for statistics
@param api_key API key
@param search_kind Statistics type
@param key_word search key
"""
key_word = urllib.quote(key_word.encode('utf-8'))
url = ('http://api.e-stat.go.jp/rest/1.0/app/getStatsList?appId=%s&lang=J&searchKind=%s&searchWord=%s' % (api_key, search_kind, key_word))
req = urllib2.Request(url)
opener = urllib2.build_opener()
conn = opener.open(req)
cont = conn.read()
parser = etree.XMLParser(recover=True)
root = etree.fromstring(cont, parser)
ret = []
data_list = root.find('DATALIST_INF')
list_infs = data_list.xpath('.//LIST_INF')
for list_inf in list_infs:
item = {
'id': trim_data(list_inf.get('id'))
}
stat_name = list_inf.find('STAT_NAME')
if stat_name is not None:
item['stat_name'] = trim_data(stat_name.text)
item['stat_name_code'] = trim_data(stat_name.get('code'))
gov_org = list_inf.find('GOV_ORG')
if gov_org is not None:
item['gov_org'] = trim_data(gov_org.text)
item['gov_org_code'] = trim_data(gov_org.get('code'))
statistics_name = list_inf.find('STATISTICS_NAME')
if statistics_name is not None:
item['statistics_name'] = trim_data(statistics_name.text)
title = list_inf.find('TITLE')
if title is not None:
item['title'] = trim_data(title.text)
cycle = list_inf.find('CYCLE')
if cycle is not None:
item['cycle'] = cycle.text
survey_date = list_inf.find('SURVEY_DATE')
if survey_date is not None:
item['survey_date'] = trim_data(survey_date.text)
open_date = list_inf.find('OPEN_DATE')
if open_date is not None:
item['open_date'] = trim_data(open_date.text)
small_area = list_inf.find('SMALL_AREA')
if small_area is not None:
item['small_area'] = trim_data(small_area.text)
ret.append(item)
return ret
Basically, the contents acquired by urllib2 are parsed by lxml and stored in the directory.
To get the value for each stats ID, execute the following get_stats_id_value ().
Reference source: https://github.com/mima3/estat/blob/master/estat_go_jp.py
estat_go_jp.py
def get_meta_data(api_key, stats_data_id):
"""
Get meta information
"""
url = ('http://api.e-stat.go.jp/rest/1.0/app/getMetaInfo?appId=%s&lang=J&statsDataId=%s' % (api_key, stats_data_id))
req = urllib2.Request(url)
opener = urllib2.build_opener()
conn = opener.open(req)
cont = conn.read()
parser = etree.XMLParser(recover=True)
root = etree.fromstring(cont, parser)
class_object_tags = root.xpath('//METADATA_INF/CLASS_INF/CLASS_OBJ')
class_object = {}
for class_object_tag in class_object_tags:
class_object_id = class_object_tag.get('id')
class_object_name = class_object_tag.get('name')
class_object_item = {
'id': trim_data(class_object_id),
'name': trim_data(class_object_name),
'objects': {}
}
class_tags = class_object_tag.xpath('.//CLASS')
for class_tag in class_tags:
class_item = {
'code': trim_data(class_tag.get('code')),
'name': trim_data(class_tag.get('name')),
'level': trim_data(class_tag.get('level')),
'unit': trim_data(class_tag.get('unit'))
}
class_object_item['objects'][class_item['code']] = class_item
class_object[class_object_id] = class_object_item
return class_object
def _get_stats_id_value(api_key, stats_data_id, class_object, start_position, filter_str):
"""
Get statistics
"""
url = ('http://api.e-stat.go.jp/rest/1.0/app/getStatsData?limit=10000&appId=%s&lang=J&statsDataId=%s&metaGetFlg=N&cntGetFlg=N%s' % (api_key, stats_data_id, filter_str))
if start_position > 0:
url = url + ('&startPosition=%d' % start_position)
req = urllib2.Request(url)
opener = urllib2.build_opener()
conn = opener.open(req)
cont = conn.read()
parser = etree.XMLParser(recover=True)
root = etree.fromstring(cont, parser)
ret = []
row = {}
datas = {}
value_tags = root.xpath('//STATISTICAL_DATA/DATA_INF/VALUE')
for value_tag in value_tags:
row = {}
for key in class_object:
val = value_tag.get(key)
if val in class_object[key]['objects']:
text = class_object[key]['objects'][val]['name']
row[key] = trim_data(text.encode('utf-8'))
else:
row[key] = val.encode('utf-8')
row['value'] = trim_data(value_tag.text)
ret.append(row)
return ret
def get_stats_id_value(api_key, stats_data_id, filter):
"""
Get statistics
@param api_key API key
@param stats_data_id statistical table ID
@param filter_str filter character
"""
filter_str = ''
for key in filter:
filter_str += ('&%s=%s' % (key, urllib.quote(filter[key].encode('utf-8'))))
class_object = get_meta_data(api_key, stats_data_id)
return _get_stats_id_value(api_key, stats_data_id, class_object, 1, filter_str), class_object
You can search for get_stats_id_value with a filter, but this time we will get everything without a filter, so please do not consider that.
The procedure is to get the meta information of the statistics by get_meta_data. This will find out what attributes the statistic has.
Then use _get_stats_id_value () to get the stats and their attributes.
Unfortunately, there is no longitude / latitude information in the data obtained from eStat. Therefore, it is necessary to obtain the longitude and latitude from the mesh code. For Python, it's easier to use the following libraries.
python-geohash https://code.google.com/p/python-geohash/
This library can be installed with easy_install etc. as shown below.
easy_install python-geohash
To use it, just specify the mesh code, and the range of the corresponding mesh code will be displayed in longitude and latitude.
import jpgrid
jpgrid.bbox('305463') #Specify the mesh code →{'s': 154.375, 'e': 20.583333333333332, 'w': 20.5, 'n': 154.5}
Using these, the code to store regional mesh statistics in the database looks like this:
Reference source: https://github.com/mima3/estat/blob/master/estat_db.py
estat_db.py
def import_stat(api_key, stat_id):
"""
Import statistics
@param api_key e-stat API KEY
@param stat_id statistic ID
"""
with database_proxy.transaction():
MapArea.delete().filter(MapArea.stat_id == stat_id).execute()
StatValueAttr.delete().filter(StatValueAttr.stat_id == stat_id).execute()
StatValue.delete().filter(StatValue.stat_id == stat_id).execute()
Stat.delete().filter(Stat.stat_id == stat_id).execute()
tableinf = get_table_inf(api_key, stat_id)
stat_row = Stat.create(
stat_id=stat_id,
stat_name=tableinf['stat_name'],
stat_name_code=tableinf['stat_name_code'],
title=tableinf['title'],
gov_org=tableinf['gov_org'],
gov_org_code=tableinf['gov_org_code'],
survey_date=tableinf['survey_date']
)
values, class_object = get_stats_id_value(api_key, stat_id, {})
for vdata in values:
if not 'value' in vdata:
continue
value_row = StatValue.create(
stat_id=stat_id,
value=vdata['value']
)
for k, v in vdata.items():
stat_val_attr = StatValueAttr.create(
stat_id=stat_id,
stat_value=value_row,
attr_name=k,
attr_value=v
)
if k == 'area':
#Mesh code
meshbox = jpgrid.bbox(v)
database_proxy.get_conn().execute(
'INSERT INTO MapArea(stat_id, stat_val_attr_id, geometry) VALUES(?,?,BuildMBR(?,?,?,?,?))',
(stat_id, stat_val_attr.id, meshbox['s'], meshbox['e'], meshbox['n'], meshbox['w'], SRID)
)
database_proxy.commit()
If the attribute name is area, the geometry information is stored in MapArea as a region mesh. When using R-Index, it is difficult to operate with Peewee's ORM, so the SQL statement is written directly here.
The script for import using the above is as follows.
https://github.com/mima3/estat/blob/master/import_estat.py
To use it, specify API_KEY, statistic title, path to mod_spatialite.dll, and path to DB.
python import_estat.py API_KEY 2 2010 Census-World Geodetic System(1KM mesh)20101001 C:\tool\spatialite\mod_spatialite-4.2.0-win-x86\mod_spatialite.dll estat.sqlite
This script deals with the character code in cp932 for Windows, so please modify it according to your terminal.
Next, I will explain how to return the region mesh of the specified range as GeoJSON on the Web server. I'm using bottle here, but I think it's okay to use your favorite web framework around here, and it may not be necessary to use a web framework.
Bottle: Python Web Framework http://bottlepy.org/docs/dev/index.html
First, specify the range and write the code to get the corresponding area mesh from the DB.
estat_db.py
def get_mesh_stat(stat_id_start_str, attr_value, xmin, ymin, xmax, ymax):
"""
Get regional mesh stats
@param stat_id_start_str Statistics ID start character Get all IDs starting with this character.
@param attr_Value to narrow down in value cat01
@param xmin acquisition range
@param ymin acquisition range
@param xmax acquisition range
@param ymax Acquisition range
"""
rows = database_proxy.get_conn().execute("""
SELECT
statValue.value,
AsGeoJson(MapArea.Geometry)
FROM
MapArea
inner join idx_MapArea_Geometry ON pkid = MapArea.id AND xmin > ? AND ymin > ? AND xmax < ? AND ymax < ?
inner join statValueAttr ON MapArea.stat_val_attr_id = statValueAttr.id
inner join statValueAttr AS b ON b.stat_value_id = statValueAttr.stat_value_id AND b.attr_value = ?
inner join statValue ON statValue.id = b.stat_value_id
WHERE
MapArea.stat_id like ?;
""", (xmin, ymin, xmax, ymax, attr_value, stat_id_start_str + '%'))
ret = []
for r in rows:
ret.append({
'value': r[0],
'geometory': r[1]
})
return ret
Geometry is converted to GeoJSON with AsGeoJson (MapArea.Geometry) to treat it as GeoJSON. Combine this and return it to the client as a single GeoJSON as shown below.
Reference source: https://github.com/mima3/estat/blob/master/application.py
application.py
@app.get('/json/get_population')
def getPopulation():
stat_id = request.query.stat_id
swlat = request.query.swlat
swlng = request.query.swlng
nelat = request.query.nelat
nelng = request.query.nelng
attrval = request.query.attr_value
ret = estat_db.get_mesh_stat(stat_id, attrval, swlng, swlat, nelng, nelat)
res = {'type': 'FeatureCollection', 'features': []}
for r in ret:
item = {
'type': 'Feature',
'geometry': json.loads(r['geometory']),
'properties': {'value': r['value']}
}
res['features'].append(item)
response.content_type = 'application/json;charset=utf-8'
return json.dumps(res)
This will allow you to handle requests such as:
http://needtec.sakura.ne.jp/estat/json/get_population?stat_id=T000608&attr_value=%E4%BA%BA%E5%8F%A3%E7%B7%8F%E6%95%B0&swlat=35.503426100823496&swlng=139.53192492382811&nelat=35.83811583873688&nelng=140.08124133007811
response:
{"type": "FeatureCollection", "features": [{"geometry": {"type": "Polygon", "coordinates": [[[139.5374999999999, 35.6], [139.5499999999999, 35.6], [139.5499999999999, 35.60833333333333], [139.5374999999999, 35.60833333333333], [139.5374999999999, 35.6]]]},Abbreviation
Here, we will describe an example of drawing a regional mesh using Google Map.
demo: http://needtec.sakura.ne.jp/estat/population
By using addGeoJSON in GoogleMap, you can draw any GeoJSON on GoogleMAP.
population.js
features = map.data.addGeoJson(result);
var max = 0;
for (var i = 0; i < features.length; i++) {
if (max < features[i].getProperty('value')) {
max = features[i].getProperty('value');
}
}
map.data.setStyle(styleFeature(max));
At this time, you can specify the GeoJSON style with setStyle. In this example, the color density is changed according to the property value.
population.js
var styleFeature = function(max) {
var colorScale = d3.scale.linear().domain([0, max]).range(["#CCFFCC", "red"]);
return function(feature) {
return {
strokeWeight : 1,
fillColor: colorScale(+feature.getProperty('value')),
fillOpacity: 0.5
};
};
}
reference: ** Display GeoJSON data on Google Map ** http://shimz.me/blog/google-map-api/3445
Drawing a large area at once is heavy, so the enlargement function is disabled.
Here, D3.js is used to describe an example of drawing a regional mesh using SVG.
demo: http://needtec.sakura.ne.jp/tokuraku/passenger.html
passenger.js
$('#selMesh').change(function() {
svgMeshGrp
.attr('class', 'tracts')
.selectAll('rect')
.data([])
.exit()
.remove();
var sel = $('#selMesh').val();
if (!sel) {
return;
}
function drawMesh(json) {
console.log(json);
var max = 0;
for (var i = 0; i < json.features.length; ++i) {
var v = parseInt(json.features[i].properties.value);
if (max < v) {
max = v;
}
}
console.log(max);
var colorScale = d3.scale.linear().domain([0, max]).range([0.0, 0.8]);
svgMeshGrp
.attr('class', 'tracts')
.selectAll('rect')
.data(json.features)
.enter()
.append('rect')
.attr('x', function(d, i) {
var extX = d3.extent(d.geometry.coordinates[0], function(d) { return d[0];});
var extY = d3.extent(d.geometry.coordinates[0], function(d) { return d[1];});
var pt = projection([extX[0], extY[0]]);
return pt[0];
})
.attr('y', function(d) {
var extX = d3.extent(d.geometry.coordinates[0], function(d) { return d[0];});
var extY = d3.extent(d.geometry.coordinates[0], function(d) { return d[1];});
var pt = projection([extX[0], extY[0]]);
return pt[1];
})
.attr('width', function(d) {
var extX = d3.extent(d.geometry.coordinates[0], function(d) { return d[0];});
var extY = d3.extent(d.geometry.coordinates[0], function(d) { return d[1];});
var ptMin = projection([extX[0], extY[0]]);
var ptMax = projection([extX[1], extY[1]]);
return Math.abs(ptMax[0] - ptMin[0]);
})
.attr('height', function(d) {
var extX = d3.extent(d.geometry.coordinates[0], function(d) { return d[0];});
var extY = d3.extent(d.geometry.coordinates[0], function(d) { return d[1];});
var ptMin = projection([extX[0], extY[0]]);
var ptMax = projection([extX[1], extY[1]]);
return Math.abs(ptMax[1] - ptMin[1]);
})
.attr('fill-opacity', function(d) {
console.log('color' , d.properties.value, colorScale(d.properties.value));
return colorScale(d.properties.value);
})
.attr('fill' , '#00f');
}
if (stat_dict[sel].json) {
drawMesh(stat_dict[sel].json);
} else {
$.blockUI({ message: '<img src="/railway_location/img/loading.gif" />' });
var api = encodeURI(util.format('/estat/json/get_population?swlat=%d&swlng=%d&nelat=%d&nelng=%d&stat_id=%s&attr_value=%s',
swlat, swlng, nelat, nelng, stat_dict[sel].id, stat_dict[sel].attrval
));
d3.json(api, function(json) {
drawMesh(json);
stat_dict[sel].json = json;
$.unblockUI();
});
}
}).keyup(function() {
$(this).blur().focus();
});
The point to note here is that GeoJSON is used to draw with rect instead of drawing as path. Adding 1000 paths to svg makes it heavier, but rect is lighter. Since the area mesh is always a quadrangle, it is better to draw it with rect.
I explained how to describe a regional mesh at the government general statistics window eStat. I think that it was possible to demonstrate that the processing speed can be increased by temporarily storing Geometry information such as Spatialite in a DB that can store it.