Scraping member stores of Go To Eat Osaka Campaign
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
result = []
url = "https://goto-eat.weare.osaka-info.jp/?search_element_0_0=2&search_element_0_1=3&search_element_0_2=4&search_element_0_3=5&search_element_0_4=6&search_element_0_5=7&search_element_0_6=8&search_element_0_7=9&search_element_0_8=10&search_element_0_9=11&search_element_0_cnt=10&search_element_1_0=12&search_element_1_1=13&search_element_1_2=14&search_element_1_3=15&search_element_1_4=16&search_element_1_5=17&search_element_1_6=18&search_element_1_7=19&search_element_1_8=20&search_element_1_9=21&search_element_1_10=22&search_element_1_11=23&search_element_1_12=24&search_element_1_13=25&search_element_1_14=26&search_element_1_15=27&search_element_1_16=28&search_element_1_17=29&search_element_1_cnt=18&searchbutton=%E5%8A%A0%E7%9B%9F%E5%BA%97%E8%88%97%E3%82%92%E6%A4%9C%E7%B4%A2%E3%81%99%E3%82%8B&csp=search_add&feadvns_max_line_0=2&fe_form_no=0"
while True:
r = requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
for li in soup.select("div.search_result_box > ul > li"):
data = {}
data["Store name"] = li.select_one("p.name").get_text(strip=True)
data["Genre"], data["area"] = li.select_one("ul.tag_list").stripped_strings
for tr in li.table.select("tr"):
k = tr.th.get_text(strip=True)
if k == "Street address":
v = list(tr.td.stripped_strings)
data["Postal code"] = v[0]
data[k] = " ".join(v[-1].split())
else:
data[k] = tr.td.get_text(strip=True)
result.append(data)
tag = soup.select_one("div.wp-pagenavi > a.nextpostslink")
if tag:
url = tag.get("href")
else:
break
time.sleep(1)
df = pd.DataFrame(result).reindex(
columns=["Store name", "Genre", "area", "Postal code", "Street address", "TEL", "business hours", "Regular holiday"]
)
df.to_csv("osaka.csv", encoding="utf_8_sig")
Recommended Posts