Scraping match results from Schedule / Results on the Japan Football League official website
import requests
from bs4 import BeautifulSoup
url = "http://www.jfl.or.jp/jfl-pc/view/s.php?a=1542&f=2020A001_spc.html"
r = requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
data = []
for table in soup.find_all("table", class_="table-data"):
trs = table.find_all("tr")
th = int(trs[0].th.get_text(strip=True).strip("Section"))
for i, tr in enumerate(trs[1:], 1):
tds = [td.get_text(strip=True) for td in tr.find_all("td")]
data.append([th, i] + tds)
import pandas as pd
df = pd.DataFrame(
data, columns=["section", "number", "date", "time", "home", "Score", "Away", "Stadium", "Remarks"]
)
df.set_index(["section", "number"], inplace=True)
df
df_score = (
df["Score"].str.split("-", expand=True).rename(columns={0: "Home score", 1: "Away score"})
)
df_score["Home score"] = pd.to_numeric(df_score["Home score"], errors="coerce").astype("Int64")
df_score["Away score"] = pd.to_numeric(df_score["Away score"], errors="coerce").astype("Int64")
df1 = pd.concat([df, df_score], axis=1).dropna(subset=["Home score", "Away score"])
#Home results only
df_home = df1.loc[:, ["home", "home得点", "Away score"]].copy()
df_home.rename(columns={"home": "Team name", "homescore": "score", "アウェイscore": "Conceded"}, inplace=True)
df_home["War"] = "home"
df_home.head()
#Away results only
df_away = df1.loc[:, ["Away", "Away得点", "Home score"]].copy()
df_away.rename(columns={"Away": "Team name", "Awayscore": "score", "ホームscore": "Conceded"}, inplace=True)
df_away["War"] = "Away"
df_away.head()
#Combine home and away
df_total = pd.concat([df_home, df_away])
#Calculate goals and goals
df_total["Goal goal"] = df_total["score"] - df_total["Conceded"]
#Add points
df_total["Points"] = df_total["Goal goal"].apply(lambda x: 1 if x == 0 else 0 if x < 0 else 3)
df_total["Win or lose"] = df_total["Points"].replace({0: "defeat", 1: "Draw", 3: "victory"})
df_total.head()
#Scores / goals / goals / points total
pv_score = df_total.pivot_table(
index="Team name", values=["score", "Conceded", "得Conceded", "Points"], aggfunc=sum
)
pv_score
#Win / loss total
pv_wl = pd.crosstab(df_total["Team name"], [df_total["War"], df_total["Win or lose"]])
pv_wl
#Rename column
# pv_wl.columns = ["Victory A", "Draw A", "Defeat A", "Victory H", "Draw H", "Defeat H"]
pv_wl.rename(columns={"Away": "A", "home": "H"}, inplace=True)
pv_wl.columns = pv_wl.columns.swaplevel(0, 1)
pv_wl.columns = ["".join(col).strip() for col in pv_wl.columns.values]
#Add total
pv_wl["victory"] = pv_wl["victoryH"] + pv_wl["victoryA"]
pv_wl["Draw"] = pv_wl["DrawH"] + pv_wl["DrawA"]
pv_wl["defeat"] = pv_wl["defeatH"] + pv_wl["defeatA"]
#Addition of games
pv_wl["Number of games"] = pv_wl["victory"] + pv_wl["Draw"] + pv_wl["defeat"]
#Verification
pv_wl
df2 = df_total.copy()
#Create evaluation value
df2["Evaluation value"] = ((df2["Points"]) * 10000) + (df2["Goal goal"] * 100) + df2["score"]
df2
#Evaluation value aggregation
df3 = df2.pivot_table(
values="Evaluation value", index="Team name", columns="section", aggfunc=sum, fill_value=0
)
df3
#Cumulative sum of evaluation values
df_eval = df3.apply(lambda d: d.cumsum(), axis=1)
df_eval
#Create ranking from evaluation value
df_chart = df_eval.rank(ascending=False, method="min").astype(int)
df_chart.sort_values(by=df_chart.columns[-1], inplace=True)
df_chart
#Get final ranking
s1 = df_chart.iloc[:, -1]
s1.name = "Ranking"
df_diff = df_chart.diff(axis=1).fillna(0).astype(int)
df_diff
#Previous section difference
s2 = df_diff.iloc[:, -1].apply(lambda x: "-" if x == 0 else "▼" if x < 0 else "▲")
s2.name = "Previous section"
s2
#Combine everything
df4 = pd.concat([pv_score, pv_wl], axis=1).join([s1, s2])
#Ascending in order
df4.sort_values(["Ranking"], inplace=True)
df_rank = df4.reset_index().loc[
:,
[
"Previous section",
"Ranking",
"Team name",
"Points",
"Number of games",
"victory",
"Victory H",
"Victory A",
"Draw",
"Draw H",
"Draw A",
"defeat",
"Defeat H",
"Defeat A",
"Goal goal",
"score",
"Conceded",
],
]
df_rank
print(df_rank.to_markdown(index=False))
As of 10/07/2020
Previous section | Ranking | Team name | Points | Number of games | victory | Victory H | Victory A | Draw | Draw H | Draw A | defeat | Defeat H | Defeat A | Goal goal | score | Conceded |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
- | 1 | Honda FC | 18 | 8 | 5 | 2 | 3 | 3 | 2 | 1 | 0 | 0 | 0 | 11 | 15 | 4 |
- | 2 | Verspah Oita | 15 | 7 | 5 | 2 | 3 | 0 | 0 | 0 | 2 | 2 | 0 | 5 | 14 | 9 |
- | 3 | Tegevajaro Miyazaki | 14 | 7 | 4 | 1 | 3 | 2 | 1 | 1 | 1 | 1 | 0 | 6 | 11 | 5 |
- | 4 | MIO Biwako Shiga | 13 | 8 | 4 | 2 | 2 | 1 | 1 | 0 | 3 | 1 | 2 | 4 | 17 | 13 |
- | 5 | Iwaki FC | 13 | 8 | 4 | 3 | 1 | 1 | 0 | 1 | 3 | 1 | 2 | 0 | 14 | 14 |
- | 6 | Sony Sendai FC | 13 | 8 | 4 | 1 | 3 | 1 | 1 | 0 | 3 | 1 | 2 | 0 | 13 | 13 |
- | 7 | Matsue City FC | 12 | 9 | 4 | 3 | 1 | 0 | 0 | 0 | 5 | 1 | 4 | -5 | 10 | 15 |
▼ | 8 | FC Osaka | 11 | 8 | 3 | 2 | 1 | 2 | 2 | 0 | 3 | 1 | 2 | 3 | 12 | 9 |
▲ | 9 | Nara club | 11 | 8 | 3 | 1 | 2 | 2 | 1 | 1 | 3 | 2 | 1 | 2 | 10 | 8 |
▲ | 10 | Suzuka Point Getters | 11 | 8 | 3 | 2 | 1 | 2 | 0 | 2 | 3 | 3 | 0 | 0 | 9 | 9 |
▲ | 11 | Honda Lock SC | 10 | 8 | 3 | 0 | 3 | 1 | 0 | 1 | 4 | 4 | 0 | -7 | 8 | 15 |
- | 12 | Tokyo Musashino City FC | 8 | 6 | 2 | 2 | 0 | 2 | 0 | 2 | 2 | 1 | 1 | 1 | 7 | 6 |
- | 13 | Veertien Mie | 8 | 8 | 2 | 1 | 1 | 2 | 1 | 1 | 4 | 2 | 2 | -3 | 8 | 11 |
- | 14 | Line mail Aomori | 8 | 7 | 2 | 1 | 1 | 2 | 1 | 1 | 3 | 1 | 2 | -5 | 7 | 12 |
- | 15 | FC Maruyasu Okazaki | 5 | 8 | 1 | 0 | 1 | 2 | 1 | 1 | 5 | 3 | 2 | -5 | 7 | 12 |
- | 16 | Kochi United SC | 3 | 8 | 0 | 0 | 0 | 3 | 2 | 1 | 5 | 2 | 3 | -7 | 9 | 16 |
Recommended Posts