Target

Get movie titles from voting site rankings

Source code

import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait
import time

print("Input file_name:")
file_name = input()
Source_file = "/Users/micksmith/home/work/eBay/Python/" + file_name +".csv"


def scroll_down():
    # height = driver.find_element_by_id('pagination')
    # driver.execute_script('arguments[0].scrollIntoView(true);', height)
    # driver.execute_script('scrollBy(0, -150)')
    try:
        while True:
            height = driver.find_element_by_id('pagination')
            driver.execute_script('arguments[0].scrollIntoView(true);', height)
            driver.execute_script('scrollBy(0, -150)')
            time.sleep(1)
            # try:
            #     height.click()
            # except:
            #     pass
            if(len(driver.find_elements_by_id('pagination')) == 0):
                return
            height.click()
            time.sleep(1)
    except:
        return

def get_title(title_Eng):
    scroll_down()
    # for button in driver.find_elements_by_id('pagination__section'):
    #     button.find_element_by_id('pagination').click()
    #     print(button.text)
    # driver.find_element_by_id('pagination').click()
    items = driver.find_elements_by_class_name('listItem__title')
    for item in items:
        title_Eng.append(item.text)
    print("title_Eng:", title_Eng)
    return title_Eng
       
        
if __name__ == "__main__":
    # Open Browser
    options = Options()
    #options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage') 
    driver = webdriver.Chrome(executable_path="/Users/micksmith/home/work/eBay/Python/chromedriver", chrome_options=options)
    #url = "https://www.ranker.com/list/most-popular-anime-today/ranker-anime"
    url = "https://www.ranker.com/crowdranked-list/top-50-greatest-animated-films-of-all-time?ref=browse_list&l=1"
    
    title_Eng = []
    print("Page_Num:")
    Page_Num = int(input())
    print("MIN_Price:")
    MIN_Price = int(input())
    print("MAX_Price:")
    MAX_Price = int(input())
    
    
    driver.get(url)
    df = pd.DataFrame()
    df["Title_Eng"] = get_title(title_Eng)
    df["Page_Num"] = [Page_Num for i in range(len(df))]
    df["MIN_Price"] = [MIN_Price for i in range(len(df))]
    df["MAX_Price"] = [MAX_Price for i in range(len(df))]
    
    df.columns = ["Title_Eng","Page_Num","MIN_Price","MAX_Price"]
    df.to_csv(Source_file, index=False)
    # df.to_csv(Source_file)
    driver.quit()            
# res = requests.get(url)
# soup = BeautifulSoup(res.text)

# for title in soup.find_all(class_="listItem__data"):
#     title_Eng.append(title.text)

result

analysis

After scrolling to the bottom of the page, the next ranking will not be displayed unless you click the "LOAD MORE" button that appears.
scroll_down function
After getting the element position, fine-tune the scroll position (JavaScript)
Display the ranking as long as the "LOAD MORE" button appears
Get all the movie titles displayed

Document

Voting site used
- https://www.ranker.com/

Python Scraping get_title

Target

Source code

result

analysis

Document