Lisez les éléments suivants à partir du fichier CSV
Recherche par mot-clé
Page d'acquisition
Prix minimum du produit
Prix maximum du produit
Obtenez des informations sur les produits sur eBay
Sortie du fichier suivant en fonction des informations produit acquises
Fichier CSV
Nom du produit
État du produit
Prix de vente
Livraison
Montant total (prix de vente + frais de livraison)
Fichier dictionnaire
Fréquence des mots apparaissant dans les noms de produits
Classification à temps partiel des mots apparaissant dans les noms de produits
Graphique
Distribution de fréquence de quantité totale (diaphragme)
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait
import csv
import pandas as pd
import datetime
from collections import Counter
import collections
import numpy as np
import matplotlib as mpl
mpl.use('TkAgg') # or whatever other backend that you want
import matplotlib.pyplot as plt
import nltk
import time
import random
import re
# # Input Keywordkoda
# print("Input Keyword:")
# keyword = input().strip().replace(" ","+")
# # Input PageNumber
# print("Input PageNumber")
# page_num = int(input())
# # Input MaxPrice
# print("Input MaxPrice")
# x_max = int(input())
# Open Browser
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# options.add_argument('–-disable-dev-shm-usage')
# options.add_argument('--proxy-server="direct://"') #Connectez-vous directement, pas via un proxy
# options.add_argument('--disable-web-security')
driver = webdriver.Chrome(executable_path="/Users/micksmith/home/work/eBay/Python/chromedriver", chrome_options=options)
names = []
statuses = []
prices = []
shippings = []
amounts = []
displayed_results = 200
Source_file = "/Users/micksmith/home/work/eBay/Python/movie anime.csv"
CSV_file_path = "/Users/micksmith/home/work/eBay/Python/CSV/"
keyword_file_path = "/Users/micksmith/home/work/eBay/Python/Keyword/"
analysis_file_path = "/Users/micksmith/home/work/eBay/Python/Analysis/"
hist_file_path = "/Users/micksmith/home/work/eBay/Python/Hist/"
with open(Source_file, "r") as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
keyword = row[0].strip().replace(" ","+")
page_num = int(row[1])
x_min = int(row[2])
x_max = int(row[3])
#print(page_num)
#print(x_max)
#driver.implicitly_wait(random.randint(10,20))
# number_of_search
url = 'https://www.ebay.com/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw={}&_sacat=0&_ipg={}'.format(keyword, displayed_results)
driver.get(url)
number_of_search = driver.find_element_by_css_selector('#mainContent > div.s-answer-region.s-answer-region-center-top > div > div.clearfix.srp-controls__row-2 > div > div.srp-controls__control.srp-controls__count > h1 > span:nth-child(1)').text
number_of_search = int(re.sub("\\D", "", number_of_search))
if(page_num * displayed_results > number_of_search):
page_num = number_of_search // displayed_results + 1
print("number_of_search:", number_of_search)
print("page_num:", page_num)
time.sleep(3)
print(keyword, page_num, x_max)
for i in range (1,page_num+1):
url = 'https://www.ebay.com/sch/i.html?_from=R40&_nkw={}&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_ipg=200&_pgn={}'.format(keyword, i)
# driver.implicitly_wait(random.randint(1,3))
#driver.implicitly_wait(1)
driver.get(url)
#print("debug")
print("Page_num:", i)
time.sleep(random.randint(1,3))
items = driver.find_elements_by_class_name('s-item__info.clearfix')
for item in items:
# get Name
try:
name = item.find_element_by_class_name('s-item__title').text
name = name.replace("NEW LISTING", "")
names.append(name)
except:
names.append(" ")
print(name)
# #get Status
try:
status = item.find_element_by_class_name('SECONDARY_INFO').text
statuses.append(status)
except:
statuses.append(" ")
# #get Price
try:
price = item.find_element_by_class_name('s-item__price').text
if( 'to' in price):
price = price.replace(price, '0')
#print(price)
price = int(re.sub("\\D", "", price))
prices.append(price)
except:
prices.append(" ")
print(price)
# #get ShippingCost
try:
shipping = item.find_element_by_class_name('s-item__logisticsCost').text
shipping = int(re.sub("\\D", "", shipping))
# shipping = int(shipping)
shippings.append(shipping)
except:
shipping = 0
shippings.append(shipping)
print(shipping)
amounts.append(price + shipping)
#print(amount)
#print(len(names))
# TitleExtraction
title = names
titles = ' '.join(title)
keyword = keyword.replace("+"," ")
#print(mojiretsu)
counter = collections.Counter(titles.split())
d = counter.most_common(200)
date = datetime.datetime.today().strftime("%Y%m%d")
keyword_file_name = keyword_file_path +keyword + "_" + str(page_num) + "_" + date + ".txt"
with open(keyword_file_name, 'w') as f:
for key, value in d:
f.write('{0} {1}\n'.format(key, value))
# MorphologicalAnalysis
morph = nltk.word_tokenize(titles)
pos = nltk.pos_tag(morph)
entitles = nltk.chunk.ne_chunk(pos)
analysis_file_name = analysis_file_path + keyword + "_" + str(number_of_search) + "_" + date + ".txt"
#print(type(entitles))
with open(analysis_file_name, mode="w") as f:
f.write(str(entitles))
df = pd.DataFrame()
df['name'] = names
df['status'] = statuses
df['price'] = prices
df['shippingcost'] = shippings
df['amount'] = amounts
date = datetime.datetime.today().strftime("%Y%m%d")
csv_file_name = CSV_file_path + keyword + "_" + str(number_of_search) + "_" + date + ".csv"
df.to_csv(csv_file_name)
y_min = 0 # MinimumFrequency
y_max = number_of_search # MaximumFrequency
interval = (x_max - x_min) // 20 # Interval
bin_num = (x_max - x_min) // interval # bins
# Hists
# plt.xlim(x_min, x_max)
# plt.ylim(y_min, y_max)
plt.title(keyword + "_" + str(number_of_search) + "_" + date)
plt.xlabel("Price")
plt.ylabel("Frequency")
plt.xticks(np.arange(x_min, x_max, interval), rotation = 90)
plt.hist(prices, range=(x_min, x_max), bins = bin_num)
plt.savefig(hist_file_path + keyword + "_" + str(number_of_search) + "_" + date + ".png ")
#plt.show()
driver.quit()
Traceback (most recent call last):
File "/Users/UserName/home/work/eBay/Python/eBay_Scraping.py", line 89, in <module>
driver.get(url)
File "/Users/UserName/.pyenv/versions/anaconda3-4.2.0/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
self.execute(Command.GET, {'url': url})
File "/Users/UserName/.pyenv/versions/anaconda3-4.2.0/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/Users/UserName/.pyenv/versions/anaconda3-4.2.0/lib/python3.5/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.InvalidSessionIdException: Message: invalid session id
Recommended Posts