macOS Catalina 10.15.3 Python 3.6.5
Search Google with any word and get a list of the search results up to any number of pages Output title and url to csv
# !python3
#Get the title and URL of google search result and output csv
import time, chromedriver_binary, os, csv
from selenium import webdriver
output_path = "/Final csv output directory
os.chdir(putput_path)
driver = webdriver.Chrome() #Prepare chrome
#open html
driver.get("https://www.google.com/") #Open google
search = driver.find_element_by_name("q") #Search box"q"To specify
search.send_keys(“xxx yyy zzz“) #Send search word
search.submit() #Perform a search
time.sleep(3) #Wait 3 seconds
def ranking(driver):
i = 1 #Fixed at 1
i_max = 10 #How many pages do you want to search?
title_list = []
link_list = []
#Loop until the current page exceeds the specified maximum analysis page(i_max)
while i <= i_max:
#Title and link are class="r"Is in
class_group = driver.find_elements_by_class_name("r")
# class="r"Extract titles and links from,For loop to add to list
for elem in class_group:
title_list.append(elem.find_element_by_class_name('LC20lb').text) #title(class="LC20lb")
link_list.append(elem.find_element_by_tag_name('a').get_attribute('href')) #Link(href of a tag)
#There is only one "Next" button, but I dare to search multiple by elements.An empty list means the last page.
if driver.find_elements_by_id("pnnext") == []:
i = i_max + 1 #Without the next page,Forcibly exceeds the maximum number of pages and ends the loop
else:
#The URL of the next page is id="pnnext"Href attribute of
next_page = driver.find_element_by_id("pnnext").get_attribute("href")
driver.get(next_page)
i = i + 1 #Go through the page
time.sleep(3) #3 seconds break,Repeat this up to the specified maximum number of pages
return title_list, link_list
#Execute the ranking function defined above to get the title and URL list
title, link = ranking(driver)
#To spit out with csv[[a,1],[b,2]]Create a list like
result = [list(row) for row in zip(title, link)]
#csv output using result
with open("result.csv", mode="w", encoding="utf-8") as f:
writer = csv.writer(f, lineterminator="\n")
writer.writerows(result)
#Close browser
driver.quit()
Recommended Posts