[Python] Scraping lens information from Kakaku.com

Scrap lens information from Kakaku.com in python. Get all price information and lens specs with csv. The items to be acquired are as follows.

Camera name, ranking, lowest price, lowest credit price, price URL, compatible mount, lens type, focus, detailed lens type, full size compatible, APS-C only, lens configuration, number of aperture blades, focal length, shortest shooting distance, Maximum shooting magnification, open F value, angle of view, camera shake correction mechanism, drip-proof, dust-proof, wide-angle, telephoto, macro, high magnification, fisheye (fisheye), tilt shooting, mirror, large diameter, pancake, filter diameter , Maximum diameter x length, weight

`renzu.py`


from bs4 import BeautifulSoup
import urllib.request
import re
import requests
import time
import datetime

#Target site URL
#price.com lens ranking
url = "https://kakaku.com/camera/camera-lens/ranking_1050/"
page_count = 1
linklist = []
#Get each lens page from all ranking pages
while True:
    category_res = requests.get(url + "?page=" + str(page_count)).text
    soup = BeautifulSoup(category_res, 'html.parser') #Beautiful Soup initialization
    print ("{}Page page".format(page_count))
    for elm in soup.find_all("a"):
        if 'href' in elm.attrs:
            link_url = elm.attrs['href']
            if "https://kakaku.com/item/" in link_url:
                linklist.append(link_url)
 #               print(link_url)
#Flag until the next page runs out
    a_next_tag= soup.find_all("li", {"class": "next"})
    if a_next_tag:
#    if page_count < 1:
        page_count += 1
        continue
    break
#Remove duplicates
linklist = sorted(list(set(linklist)),key=linklist.index)
################################################################
#Write file name (acquisition date and time)
now = datetime.datetime.now()
filename = "renzu"+now.strftime('%Y%m%d_%H%M%S') + '.csv'
f = open(filename,'a', encoding='cp932',errors='ignore')
f.write("Camera name,Ranking,Lowest price,クレジットLowest price,Price URL,")
#Move to the lens spec sheet
page_html = linklist[0] + "spec/#tab"
res = urllib.request.urlopen(page_html)
page_soup = BeautifulSoup(res, 'html.parser')
#Get a table of lens specs
table = page_soup.findAll("table", {"class":"tblBorderGray mTop15"})[0]
rows = table.findAll("tr")
index=-1
#Write each heading in the lens spec table
for row in rows:
    csvRow = []
    for cell in row.findAll('th'):
        index +=1
        if index==0:
            continue
        if index==17:
            continue
        if index==26:
            continue
        if index==29:
            continue        
        cell=cell.get_text()
        cell=re.sub(r"[\n\t\s]*", "", str(cell))        
        f.write(cell)
        f.write(",")         
f.write("\n")
#Write lens price information
for page_url in linklist:
     page_html = page_url + "spec/#tab"
     res = urllib.request.urlopen(page_html)
     page_soup = BeautifulSoup(res, 'html.parser')
#Required elements and class name
     name =  page_soup.find("h2",itemprop="name").text
     try:
        rank =  page_soup.find("span",class_="rankNum").text
     except AttributeError:
         rank = ''
     try:
         low_price = page_soup.find("div", class_="priceWrap").find("span",class_="priceTxt").text
         low_price =low_price.replace(',', '')
     except AttributeError:
         low_price = ''
     try:
         cre_price = page_soup.find("div", class_="creditCard").find("span",class_="priceTxt").text
         cre_price =cre_price.replace(',', '')
     except AttributeError:
         cre_price = ''
     print(rank)
     print(low_price)
     f.write(name)
     f.write(",")
     f.write(rank)
     f.write(",")
     f.write(low_price)
     f.write(",")
     f.write(cre_price)
     f.write(",")
     f.write(page_url)
     f.write(",")
#Write lens information
#Specify table
     table = page_soup.findAll("table", {"class":"tblBorderGray mTop15"})[0]
     rows = table.findAll("tr")
#Write table
     for row in rows:
        csvRow = []
        for cell in row.findAll('td'):
            cell=cell.get_text()
            cell=re.sub(r"[\n\t\s]*", "", str(cell))
            f.write(cell)
            f.write(",")
     f.write("\n") 
f.close()