The PG that is obtained by scraping the meteorological data released by the Japan Meteorological Agency and output as a csv file is summarized as an amnesia.
How to get weather information using the weather API in Python List of precautions for web scraping [Python Web scraping technique collection "There is no value that cannot be obtained" JavaScript support @ Added 6/12] (https://qiita.com/Azunyan1111/items/b161b998790b1db2ff7a)
OS : Windows10 64 bit python : 3.7.4
Main package name | Use |
---|---|
BeautifulSoup | Web page analysis & tag search&Data shaping |
logging | Log output and settings |
namedtuple | Tuple operation |
pandas | For Dataframe operations |
requests | Get web page |
It's easy to do. ① Use a package called requests to get the data in the web page you want to get
python
r = requests.get('URL')
② Analyze the contents with a package called Beautiful Soup
python
soup = BeautifulSoup(r.text,'lxml')
rows = soup.findAll("tr",class_="mtx") #Get tags by narrowing down the conditions
③ Collect as Dataframe with pandas and output as csv
python
weatherData = pd.DataFrame(dataList[1:])
#Column name settings
weatherData.columns = nameList
#Output as csv file
weatherData.to_csv(f"{place}{startYear}_{endYear}.csv",encoding="SHIFT-JIS")
GetWeather.py
# coding: UTF-8
#*************************************************************************
#Weather information acquisition process
#
#Target data:Weather information in Fukuoka prefecture
#Processing content: Acquire weather information from the Japan Meteorological Agency and output a csv file
#* Confirmed that it can be used for commercial purposes according to the Japan Meteorological Agency Terms of Use.
# https://www.jma.go.jp/jma/kishou/info/coment.html
# python ver = 3.7.4
#*************************************************************************
import os
import sys
import requests
import logging.config
from time import time
import datetime as dt
import sqlite3
import pprint
import pandas as pd
from bs4 import BeautifulSoup
from collections import namedtuple
import csv
#*************************************************************************
#Variable list
#*************************************************************************
now = dt.datetime.now() #Current date and time
now =now.strftime("%Y%m%d")
#File location where the exe resides
#Set the working folder two before the absolute path where CODE exists
#Change it to specify where you want it to be your work file, if needed
os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../"))
exePath = os.getcwd()
place_codeA = [82]
place_codeB = [47807]
place_name = ["Fukuoka"]
nameList = ["point","date", "Average air pressure on land(hPa)", "Average atmospheric pressure in the sea(hPa)","'Precipitation(mm)", "Average temperature(℃)", "Average humidity(%)", "Average wind speed(m/s)", "Daylight hours(h)","Noon(06:00-18:00)","Night(18:00-Next day 06:00)"]
dataList = [nameList]
collist = ["Point"]
base_url = "http://www.data.jma.go.jp/obd/stats/etrn/view/daily_s1.php?prec_no=%s&block_no=%s&year=%s&month=%s&day=1&view=p1"
#***********************************************
#List of named tuple lists
#*************************************************************************
#*************************************************************************
#[ ONL : OutputNameList]output/Tuple of column name for aggregation processing
#*************************************************************************
#Column name 0-9,10-19,20-29
partsO =["PT","TIME", "EHPA","SHPA", "RAIN", "TEMPER", "HUMID","WIND","SUM","AM","PM"]
OLNM = namedtuple("ONL", partsO)
OCNL = OLNM("point","date", "Average air pressure on land(hPa)", "Average atmospheric pressure in the sea(hPa)","'Precipitation(mm)", "Average temperature(℃)", "Average humidity(%)", "Average wind speed(m/s)", "Daylight hours(h)","Noon(06:00-18:00)","Night(18:00-Next day 06:00)")
#*************************************************************************
#Function list
#*************************************************************************
#*************************************************************************
# ParseFloat:Convert a string to a float type
#argument:
#*************************************************************************
def ParseFloat(str):
try:
return float(str)
except:
return 0.0
#*************************************************************************
#Main processing
#argument:
#*************************************************************************
def main():
returnValue = 0
#Log settings
logging.config.fileConfig(f"{exePath}/python/logging.conf", defaults={'logfilename': f"log/UI_{now}.log" })
logger = logging.getLogger()
logger.info("Meteorological information acquisition processing Start recording")
startYear = 2018
endYear = 2018
try:
#Loop at place
for place in place_name:
index = place_name.index(place)
#2018 in for statement~Until 2019
for year in range(startYear,endYear + 1):
#January of the year~Covers 12 times in December.
for month in range(1,13):
#Apply two city codes, year and month.
r = requests.get(base_url%(place_codeA[index], place_codeB[index], year, month))
r.encoding = r.apparent_encoding
print(dt.datetime.now())
#Scraping the target table.
soup = BeautifulSoup(r.text,'lxml')
rows = soup.findAll("tr",class_="mtx") #Specify the tag and specify the class name
rows = rows[4:]
#Acquires one line from the 1st to the last day
for row in rows:
data = row.findAll("td")
#Extract only the information you want
rowData = [] #Initialization
rowData.append(place) #point
rowData.append(str(year) + "/" + str(month) + "/" + str(data[0].string)) #date
rowData.append(ParseFloat(data[1].string)) #Average air pressure on land(hPa)
rowData.append(ParseFloat(data[2].string)) #Average atmospheric pressure in the sea(hPa)
rowData.append(ParseFloat(data[3].string)) # 'Precipitation(mm)
rowData.append(ParseFloat(data[6].string)) #Average temperature(℃)
rowData.append(ParseFloat(data[9].string)) #Average humidity(%)
rowData.append(ParseFloat(data[11].string)) #Average wind speed(m/s)
rowData.append(ParseFloat(data[16].string)) #Daylight hours(h)
rowData.append(data[19].string) #Noon(06:00-18:00)
rowData.append(data[20].string) #Night(18:00-Next day 06:00)
#Add data to the next line
dataList.append(rowData)
#Convert to Dataframe
weatherData = pd.DataFrame(dataList[1:])
#Column name settings
weatherData.columns = nameList
print(weatherData)
#Output as csv file
weatherData.to_csv(f"{place}{startYear}_{endYear}.csv",encoding="SHIFT-JIS")
#When an exception occurs
except:
logger.info("Weather information acquisition process Abnormal end")
logger.exception("[Ended abnormally]")
#When it ends normally
else:
logger.info("Weather information acquisition process completed normally")
finally:
print("{0}".format(returnValue), end="")
#If you did not import it as a package, do the following
if __name__ == "__main__":
main()
Recommended Posts