PRODUCE 101 JAPAN OFFICIAL SITE This is the Japanese version of the audition program imported from South Korea, and the results of voting for trainees who want to make their debut as a singer are announced weekly. As the week progresses, the number of dropouts will increase to 60th and 35th.
This time, the ranking results are extracted from the official website by scraping. I tried to visualize the change in the ranking of the trainees who survived by the latest ranking (9th week as of November 29, 2019).
Some of the trainees' names are hidden.
Functionalized so that weekly rankings can be collected individually. Get HTML elements using BeautifulSoup and convert them to text. Get in the form of "rank, name, week" (only part of it is shown).
def getWeeklyRank(week):
import requests
from bs4 import BeautifulSoup
import re
#Get the URL of the ranking page by formatting with numbers
url = 'https://produce101.jp/rank/?week={}'
html = requests.get(url.format(week))
#Handle URLs with Beautiful Soup
soup = BeautifulSoup(html.text, 'lxml')
#Get span and div elements for a particular class
span_rank = soup.find_all("span", class_="icon-rank")
div_name = soup.find_all("div", class_="name")
#Extract the text component of the tag containing rank and name into the list
rank = []
for i in range(len(span_rank)):
rank.append(int(span_rank[i].text))
name = []
for i in range(len(div_name)):
name.append(div_name[i].text)
#Save weekly Ranking to csv
#Create new only in the first week and write in additional mode from next week
if week == 1:
f = open('./weeklyRank.txt', 'w')
for i in range(len(rank)):
f.write(str(rank[i])+','+str(name[i])+','+str(week)+'\n')
f.close()
elif week > 1:
f = open('./weeklyRank.txt', 'a')
for i in range(len(rank)):
f.write(str(rank[i])+','+str(name[i])+','+str(week)+'\n')
f.close()
Execute the function weekly and get the ranking. (It's cool to get everything automatically, but this time I'll get it steadily.)
getWeeklyRank(1)
getWeeklyRank(2)
getWeeklyRank(3)
#No ranking announced in the 4th week
getWeeklyRank(5)
getWeeklyRank(6)
#No ranking announced in the 7th week
getWeeklyRank(8)
getWeeklyRank(9)
Delete the elements other than the name such as "* decline", and format the column heading as week and enter the ranking. Replace the rank of the trainees who dropped out on the way with "x".
#Erase the notation of decline
f = open('weeklyRank.txt', 'r')
data_lines = f.read()
data_lines = data_lines.replace('* Decline', '')
f.close()
f = open('weeklyRank.txt', 'w')
f.write(data_lines)
f.close()
Formats the ranking data obtained from HTML. There are cuts up to 60th place in the 5th week and up to 35th place in the 8th week, and the number of people will change, so we will respond individually.
def getWeeklyRank_format(data_path):
import pandas as pd
df_rank = pd.read_csv(data_path,header=None, names=('rank', 'name', 'week'))
df = df_rank[['name','week','rank']]
df_week1 = df_rank[df_rank['week'] == 1]
df_week5 = df_rank[df_rank['week'] == 5]
df_week8 = df_rank[df_rank['week'] == 8]
f = open('./weeklyRank_format.txt', 'w')
f.write('week')
#Get a member of week1
name_week1 = []
for e in range(len(df_week1)):
dfe = df[(df['week'] == 1) & (df['rank'] == e+1)]
nameArray = dfe['name'].values[0]
f.write(str(','+nameArray))
name_week1.append(str(nameArray))
#Get a member of week5
name_week5 = []
for e in range(len(df_week5)):
dfe = df[(df['week'] == 5) & (df['rank'] == e+1)]
nameArray = dfe['name'].values[0]
name_week5.append(str(nameArray))
f.write('\n')
#Get a member of week8
name_week8 = []
for e in range(len(df_week8)):
dfe = df[(df['week'] == 8) & (df['rank'] == e+1)]
nameArray = dfe['name'].values[0]
name_week8.append(str(nameArray))
f.write('\n')
#Enter the rank of trainees in the first week as column headings and the ranks after that as variables.
for i in range(1,10):
if i==1 or i==2 or i==3:
#Write week in column 0
f.write(str(i))
#Next, get the trainee's ranking in the first week
for j in range(0, len(name_week1)):
dfi = df[(df['week'] == i) & (df['name'] == name_week1[j])]
f.write(str(','+str(dfi['rank'].values[0])))
elif i==4:
continue
elif i==5 or i==6:
#Write week in column 0
f.write(str(i))
#Next, get the trainee's ranking in the first week
for j in range(0, len(name_week1)):
if name_week1[j] in name_week5:
dfk = df[(df['week'] == i) & (df['name'] == name_week1[j])]
f.write(str(','+str(dfk['rank'].values[0])))
elif name_week1[j] not in name_week5:
f.write(',x')
elif i==7:
continue
elif i==8 or i==9:
#Write week in column 0
f.write(str(i))
#Next, get the trainee's ranking in the first week
for j in range(0, len(name_week1)):
if name_week1[j] in name_week8:
dfk = df[(df['week'] == i) & (df['name'] == name_week1[j])]
f.write(str(','+str(dfk['rank'].values[0])))
elif name_week1[j] not in name_week8:
f.write(',x')
f.write('\n')
f.close()
Execute the function.
getWeeklyRank_format('./weeklyRank.txt')
Let's see if it worked.
import pandas as pd
df_rank = pd.read_csv('./weeklyRank_format.txt',header=0)
df_rank
This time, I will use Downloaded font: JK Gothic L to write in Japanese, keeping in mind the appearance.
#Trainee ranking from 1st week to 9th week
#Customize fonts
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
#Apply fonts by directly specifying ttf files
import matplotlib.font_manager
fp = matplotlib.font_manager.FontProperties(fname='/Users/[USER NAME]/.matplotlib/fonts/ttf/JKG-L_3.ttf')
#Set field
fig, axs = plt.subplots(figsize=(10,25))
x = df_rank['week']
axs.set_xlim(0.94,9.1)
axs.set_xticks([1, 2, 3, 4, 5, 6, 7, 8, 9])
axs.set_ylim(99, 0.6)
axs2 = axs.twinx()
labels = list(df_rank.columns[1:])[0:]
axs.set_yticks(list(np.arange(1,99)))
axs.set_yticklabels(labels, fontproperties=fp, color='darkslateblue')
axs.set_xticklabels(['1st week', '2nd week', '3rd week','4th week', '5th week', '6th week', '7th week', '8th week', '9th week'], rotation=0, fontsize=14, fontproperties=fp, color='darkslateblue')
axs.spines['top'].set_visible(False)
axs.spines['bottom'].set_visible(False)
axs.spines['right'].set_visible(False)
axs.spines['left'].set_visible(False)
axs.tick_params(left=False)
labels2 = list((np.arange(0,99)))
axs2.set_yticks(list(np.arange(1,99)))
axs2.set_yticklabels(labels2[99:0:-1], fontproperties=fp, color='darkslateblue')
axs2.set_ylim(0,98)
axs2.spines['top'].set_visible(False)
axs2.spines['bottom'].set_visible(False)
axs2.spines['right'].set_visible(False)
axs2.spines['left'].set_visible(False)
axs2.tick_params(right=False)
#Change the color of the polygonal line to rainbow
cmap = plt.get_cmap('rainbow')
for i in range(1, 99,1):
y = df_rank[df_rank.columns[i]]
if 'x' in list(y):
continue
else:
axs.plot(x,y,color=cmap(1-i/100),marker='o',markersize=8,linewidth = 3, alpha=0.3)
This should complete the graph. If you set the conditions, you can visualize the trainees who have greatly improved their ranking.
Recommended Posts