Information on stores opened and closed nationwide is collected on a site called Opened and closed.com. For studying scraping, let's color map the percentage of store closures in March-April 2020 and 2019 for each prefecture.
Let's go in the direction of counting the number of stores corresponding to each period in each region on the above site. Fortunately, it is categorized by region and each page is saved in descending order, so use that. Also, scraping uses python's beautifulsoup4.
Let's look at the results first (implementation is below). The result is as follows. The numbers for sidebar are for each prefecture ratio = $ (N_ {opened} --N_ {closed}) $ / $ (N_ {closed} + N_ {opened}) $ It is standardized in.
Looking at this, there is no significant difference due to coronavirus. It is necessary to look at the future trends to see whether the impact is simply small or the impact will appear with a time lag. I hope that the damage will be reduced somehow.
First, collect the data from the above url address.
shop_openup_closedown_ratio_1.py
from bs4 import BeautifulSoup
from urllib import request
import datetime
import numpy as np
def period(year,month,year_s = 2019,year_e = 2019,month_s = 3,month_e = 4):
res = False
if (year<=year_e) & (year >=year_s) & (month>=month_s) & (month<=month_e):
res = 2
if year >= year_e:
if year > year_e:
res = 1
else:
if month>month_e:
res = 1
return res
def main(year_s = 2020,year_e = 2020,month_s = 4,month_e = 4):
dic = {}
states = ['close','open']
for state in range(len(states)):
url = 'https://kaiten-heiten.com/heiten/area-' + states[state] + '/'
response = request.urlopen(url)
soup = BeautifulSoup(response,'html.parser')
for a in soup.find_all('a', class_="links"):
link = a.get('href')
region = a.text
print(region)
if dic.get(region) is None:
dic[region] = [0,0]
url = link
response = request.urlopen(url)
soup = BeautifulSoup(response,'html.parser')
shop_list = soup.find_all('span', class_='post_time')
year_last = int(shop_list[-1].text[:5])
month_last = int(shop_list[-1].text[6:8])
for a in shop_list:
year = int(a.text[:5])
month = int(a.text[6:8])
if period(year, month,year_s,year_e,month_s,month_e) == 2:
dic[region][state] += 1
cout = 0
flag = 0
while period(year_last, month_last,year_s,year_e,month_s,month_e)>=1:
next_p = soup.find('a',class_='next page-numbers')
if soup.find_all('a',class_='next page-numbers') is not None:
link = next_p.get('href')
else:
break
url = link
response = request.urlopen(url)
soup = BeautifulSoup(response,'html.parser')
shop_list = soup.find_all('span', class_='post_time')
year_last = int(shop_list[-1].text[:5])
month_last = int(shop_list[-1].text[6:8])
print(year_last,month_last)
for a in shop_list:
year = int(a.text[:5])
month = int(a.text[6:8])
if period(year, month):
dic[region][state] += 1
regions = list(dic.keys())
vals_ = np.array(list(dic.values()))
hk = int(22)
hk_name = ['Hokkaido']
hk_vals = np.array([vals_[:hk,0].sum(), vals_[:hk,1].sum()])
regions = hk_name + regions[hk:]
vals = [list(hk_vals)] + list(vals_[hk:])
# ratio == N_o - N_c / N
ratio = np.zeros(len(regions))
for i in range(len(regions)):
ratio[i] = (vals[i][1] - vals[i][0]) / (vals[i][1] + vals[i][0])
return regions, vals, ratio, vals_
regions_2020, vals_2020, ratio_2020, vals_2020 = main(2020,2020,3,4)
regions_2019, vals_2019, ratio_2019, vals_2019 = main(2019,2019,3,4)
Get the data with.
shop_openup_closedown_ratio_2.py
import numpy as np
import cv2
from PIL import Image
import matplotlib.colors
import matplotlib.pyplot as plt
from japanmap import *
def mapping(regions,ratio,name,a=0.1,b=1):
n_min = a
n_max = b
cmap = plt.cm.rainbow
norm = matplotlib.colors.Normalize(vmin=n_min, vmax=n_max)
def color_scale(r):
tmp = cmap(norm(r))
return (tmp[0]*255, tmp[1]*255, tmp[2]*255)
dic = {}
for k in range(len(regions)):
map_val = color_scale(ratio[k])
dic[regions[k]] = map_val
lab = name + ' 3~4'
fig = plt.figure(figsize=(15,9))
plt.title(lab,fontsize=15)
plt.imshow(picture(dic))
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
plt.colorbar(sm)
plt.show()
fig.savefig(name)
Arrange the data with. Plot below.
shop_openup_closedown_ratio_3.py
a = list(ratio_2020)+list(ratio_2019)
max_n = max(a)
min_n = min(a)
mapping(regions_2019,ratio_2019,'2019')
mapping(regions_2020,ratio_2020,'2020',min_n,max_n)
Recommended Posts