--Try using API "Streamlit" that can easily visualize data with Python3 ――The target data is the number of newly infected people by prefecture of coronavirus released by NHK. - https://www3.nhk.or.jp/n-data/opendata/coronavirus/nhk_news_covid19_prefectures_daily_data.csv --In this article, I will write the procedure to run Streamlit on an EC2 server and publish it as a Web application. ――Please refer to the past article for how to build VPC and EC2 -Build a simple network with Amazon VPC -Start Amazon EC2 and try ssh connection from Mac terminal --The source code is published on GitHub, so feel free to clone and try it. - https://github.com/TaishiOIKAWA/Streamlit-corona/tree/main
--EC2 service -[Left Tab] Instance --Change only the following settings and create an instance by default for the others
--Network: Develop (10.1.0.0/16) --Subnet: public-subnet-1a-dev --Auto-assigned public IP: Enabled
--Add tag --Key: Name --Value: Streamlit-server
--New security group --Group name: streamlit-server-sg --Description: streamlit-server-sg --Adding a rule --Add HTTP, HTTPS, custom TCP
--Assuming to use an existing key --Access by session manager is also acceptable
--SSH access to streamlit-server instance (may be session manager)
--ssh -i ~/.ssh/develop.pem ec2-user @ [Public IPv4 Address]
--Install Streamlit (assuming Amazon Linux 2)
- sudo su
- yum update -y
- yum install python3 -y
- pip3 install streamlit
--Drop the data published by HNK - https://www3.nhk.or.jp/n-data/opendata/coronavirus/nhk_news_covid19_prefectures_daily_data.csv --If you use linux, you can drop it to the current directory with the following command.
```sh
curl -O https://www3.nhk.or.jp/n-data/opendata/coronavirus/nhk_news_covid19_prefectures_daily_data.csv
```
--mv
in the same directory as corona.py
--Used when calculating the ratio of infected people to the population by prefecture
- https://www.e-stat.go.jp/stat-search/files?page=1&layout=datalist&toukei=00200521&tstat=000001011777&cycle=0&tclass1=000001094741&stat_infid=000031524010&tclass2val=0
――This is curl did not pass ... Please let me know the reason
--Skip the data dropped from e-Stat with scp etc.
--Reference: https://qiita.com/ayies128/items/c4548a10adeb4775bc52
--Rename the file to population.csv
- mv c01.csv population.csv
--mv
in the same directory as corona.py
--This time, in addition to streamlit, the following libraries are also imported --pandas: Data manipulation --altair: Graph drawing --datetime: Handle dates --calendar: Use when you want the last day
```py
import pandas as pd
import streamlit as st
import altair as alt #Package for drawing graphs
import datetime as dt #Handle dates
import calendar #Use when you want the last day
```
--Used when specifying the date in the select box
```py
THIS_YEAR = dt.datetime.today().year
THIS_MONTH = dt.datetime.today().month
```
--Year: 2020 --This year --Month: 1 --December
```py
RANGE_YEAR = list(range(2020, THIS_YEAR + 1))
RANGE_MONTH = list(range(1, 13, 1))
```
--Number of newly infected coronaviruses by prefecture
Name of prefectures
--DF ['prefecture name']. Unique
can be easily removed by eliminating duplication.
```py
DF_CORONA_JAPAN = pd.read_csv(
'nhk_news_covid19_prefectures_daily_data.csv') #Read data on the number of people infected with coronavirus
PREFACTURES = DF_CORONA_JAPAN['Name of prefectures'].unique() # セレクトボックス用にName of prefecturesリストを取得しておく
```
--Population by prefecture
--Acquired by focusing on 2015 data
py #Obtained population DF by prefecture from the 2015 census DF_POPURATION = pd.read_csv('population.csv', encoding='cp932') DF_POPURATION_HEISEI_LATEST = DF_POPURATION[DF_POPURATION['Year (year)'] == 2015]
--Constructor --Receive and retain data on infected persons by prefecture
--alt_graph method --Method for drawing a graph of newly infected persons during the target period
--get_ndays_cum method --A method that returns the total number of newly infected people in the last ndays
```py
class PrefactureGraphMaker():
'''A class that creates a graph of newly infected people'''
def __init__(self, df_prefacture):
self.df_prefacture = df_prefacture
def alt_graph(self, term):
'''A method to draw a graph of newly infected coronaviruses for a specified period of time'''
start = self.df_prefacture['date'] >= term.start_datetime
end = self.df_prefacture['date'] <= term.end_datetime
df_term = self.df_prefacture[start == end] #
graph_slider = alt.Chart(df_term).mark_bar().encode(
x='date', y='Number of infected people in each area_Number of announcements per day', color='Number of infected people in each area_Number of announcements per day').properties(
width=800, height=640).configure_axis(labelFontSize=20,
titleFontSize=20)
st.altair_chart(graph_slider)
def get_ndays_cum(self, ndays):
'''A method that returns the total number of newly infected people in the last ndays'''
return sum(self.df_prefacture.tail(ndays)['Number of infected people in each area_Number of announcements per day'])
```
--Constructor --Place select boxes in parallel --Set the beginning and end of the drawing period --start_month_range method --If the "first year" is this year, only the current month is displayed. --end_year_range method, end_month_range method --Do not display the period before "START YEAR/MONTH"
```py
class TermSelectBox():
'''A class that manages a text box that selects the period to display'''
def __init__(self):
'''Select boxes arranged in a horizontal row'''
self.col1, self.col2, self.col3, self.col4 = st.beta_columns(4)
self.start_year = self.col1.selectbox(
'START YEAR',
RANGE_YEAR,
)
# strat_Adjust the display range according to the year
self.start_month = self.start_month_range()
self.end_year = self.end_year_range()
self.end_month = self.end_month_range()
# start_year year/ start_month month/1 day datetime
self.start_datetime = dt.datetime(self.start_year, self.start_month, 1)
# end_year / end_month /Datetime on the last day of the month(The last day depends on the month)
self.end_datetime = dt.datetime(
self.end_year, self.end_month,
get_last_date(self.end_year, self.end_month))
def start_month_range(self):
'''If start year is this year, only display until this month'''
if self.start_year == THIS_YEAR:
return self.col2.selectbox('START MONTH', RANGE_MONTH[:THIS_MONTH])
else:
return self.col2.selectbox('START MONTH', RANGE_MONTH)
def end_year_range(self):
'''Automatically move the year after the beginning of the period'''
return self.col3.selectbox(
'END YEAR', RANGE_YEAR[RANGE_YEAR.index(self.start_year):])
def end_month_range(self):
'''Automatically move the month after the "first month"'''
if self.start_year == THIS_YEAR:
if self.start_year == self.end_year:
return self.col4.selectbox(
'END MONTH', RANGE_MONTH[self.start_month - 1:THIS_MONTH])
else:
return self.col4.selectbox('END MONTH',
RANGE_MONTH[:THIS_MONTH])
else:
if self.start_year == self.end_year:
return self.col4.selectbox('END MONTH',
RANGE_MONTH[self.start_month - 1:])
else:
return self.col4.selectbox('END MONTH',
RANGE_MONTH[:THIS_MONTH])
```
--Create an instance, call the method as you want, and you're done
if __name__ == "__main__":
prefacture_name = st.selectbox('Prefectures',
(PREFACTURES)) #Define a select box that can be selected by prefecture
df_prefacture = extract_prefacture_data(DF_CORONA_JAPAN,
prefacture_name) #Extract prefecture data from DF
_term_sb = TermSelectBox() #Define a select box for period setting
write_prefacture_graph_title(prefacture_name, _term_sb) #Show graph title and duration
gm_n = PrefactureGraphMaker(df_prefacture) #Instance for graph drawing
gm_n.alt_graph(_term_sb) #Draw a graph with a period
ndays = st.selectbox("Days", range(1, 31, 1), index=2)
df_population_prefacture = DF_POPURATION_LATEST[
DF_POPURATION_LATEST['Name of prefectures'] == prefacture_name]['Population (total)']
st.write('###here' + str(ndays) + 'Of the day' + prefacture_name + 'Total number of newly infected people:' +
str(gm_n.get_ndays_cum(ndays)) + 'Man')
ndays_per_population = (gm_n.get_ndays_cum(ndays) /
int(df_population_prefacture)) * 100
st.write('### ' + prefacture_name + 'Of the population' +
str(ndays_per_population)[:5] + '%(About 10,000' +
str(round(ndays_per_population * 100))[:4] + 'Man)')
--Run with streamlit run corona.py
```sh
You can now view your Streamlit app in your browser.
Network URL: http://[Private IP]:8501
External URL: http://[Public IP]:8501
```
--Try accessing http: // [Public IP]: 8501
with your local browser
--Success if the application can be displayed
――By using Streamlit, we were able to visualize simple data in no time. ――If you have any mistakes or improvements, please comment.
Recommended Posts