From the Governor's File of the National Governors' Association, the incumbent governor Scraping
import datetime
import re
import pandas as pd
import matplotlib.pyplot as plt
def wareki2date(s):
m = re.search("(Showa|Heisei|Reiwa)([ 0-9 yuan]{1,2})Year( [0-9]{1,2})Moon([ 0-9]{1,2})Day", s)
if m:
year, month, day = [1 if i == "Former" else int(i.strip()) for i in m.group(2, 3, 4)]
if m.group(1) == "Showa":
year += 1925
elif m.group(1) == "Heisei":
year += 1988
elif m.group(1) == "Reiwa":
year += 2018
return pd.Timestamp(year, month, day)
else:
return pd.NaT
df = pd.read_html("http://www.nga.gr.jp/app/chijifile/", attrs={"summary": "Search result list"})[0]
#Convert Japanese calendar to Western calendar
df["Birthday"] = df["Birthday"].apply(wareki2date)
df["Election date"] = df["Election date"].apply(wareki2date)
df["Term expiration date"] = df["Term expiration date"].apply(wareki2date)
df["Inauguration date"] = df["Inauguration date"].apply(wareki2date)
df["age"] = df["age"].str.rstrip("age").astype(int)
df["age"].value_counts(bins=[20,40,45,50,55,60,65,70,75,80]).sort_index().plot.bar()
df["age"].describe()
count 47.000000 mean 61.680851 std 9.273868 min 39.000000 25% 56.000000 50% 60.000000 75% 69.500000 max 78.000000 Name: age, dtype: float64
df["Number of wins"].describe()
count 47.000000 mean 2.765957 std 1.447828 min 1.000000 25% 1.000000 50% 3.000000 75% 4.000000 max 7.000000 Name: Number of wins, dtype: float64
The list of Japan Association of City Mayors only lists names.
Recommended Posts