Convert PDF of member stores of Ishikawa Go To Eat Campaign to CSV
pip install tabula-py
pip install pandas
pip install beautifulsoup4
pip install requests
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tabula import read_pdf
url = "https://ishikawa-gotoeat-cpn.com/member_store/"
r = requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
dfs = [
tbl
for tag in soup.select("p.btn.btn_red.btn_pdf > a")
for tbl in read_pdf(urljoin(url, tag.get("href")), pages="all", lattice=True)
]
df = pd.concat(dfs).set_axis(["Municipal name", "Store name"], axis=1).reset_index(drop=True)
df.to_csv("ishikawa.csv", encoding="utf_8_sig")
Recommended Posts