PyCharm 5.0 est sorti le 23 novembre 2015. La Note de publication inclut la prise en charge de Python 3.5 et la visualisation de l'exécution asynchrone. La fonction de visualisation de l'exécution asynchrone semblait utile, j'ai donc essayé de l'utiliser immédiatement.
J'ai essayé async / await article que j'ai écrit plus tôt avec PyCharm pour le visualiser.
Un programme qui télécharge simultanément 13 articles Wikipédia et les enregistre dans un fichier
Je l'ai écrit grossièrement.
async_web.py
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import aiohttp
import asyncio
async def download_file(title, url):
"""
Téléchargez l'URL et enregistrez-la dans un fichier
:param title: str
:param url: str
"""
local_filename = title + ".txt"
with aiohttp.ClientSession() as client:
async with client.get(url) as resp:
assert resp.status == 200
data = await resp.text()
await save_file(local_filename, data)
return local_filename
async def save_file(filename, text):
"""
Enregistrer le texte dans un fichier
:param filename: str
:param text: str
"""
path = "./data/{}".format(filename)
f = open(path, 'w')
f.write(text)
f.close()
async def task_download(url, title):
await download_file(url, title)
urls = [
['Yakult', 'https://ja.wikipedia.org/wiki/%E6%9D%B1%E4%BA%AC%E3%83%A4%E3%82%AF%E3%83%AB%E3%83%88%E3%82%B9%E3%83%AF%E3%83%AD%E3%83%BC%E3%82%BA'],
['Géant', 'https://ja.wikipedia.org/wiki/%E8%AA%AD%E5%A3%B2%E3%82%B8%E3%83%A3%E3%82%A4%E3%82%A2%E3%83%B3%E3%83%84'],
['Hanshin', 'https://ja.wikipedia.org/wiki/%E9%98%AA%E7%A5%9E%E3%82%BF%E3%82%A4%E3%82%AC%E3%83%BC%E3%82%B9'],
['Hiroshima', 'https://ja.wikipedia.org/wiki/%E5%BA%83%E5%B3%B6%E6%9D%B1%E6%B4%8B%E3%82%AB%E3%83%BC%E3%83%97'],
['Chunichi', 'https://ja.wikipedia.org/wiki/%E4%B8%AD%E6%97%A5%E3%83%89%E3%83%A9%E3%82%B4%E3%83%B3%E3%82%BA'],
['Yokohama', 'https://ja.wikipedia.org/wiki/%E6%A8%AA%E6%B5%9CDeNA%E3%83%99%E3%82%A4%E3%82%B9%E3%82%BF%E3%83%BC%E3%82%BA'],
['Sofban', 'https://ja.wikipedia.org/wiki/%E7%A6%8F%E5%B2%A1%E3%82%BD%E3%83%95%E3%83%88%E3%83%90%E3%83%B3%E3%82%AF%E3%83%9B%E3%83%BC%E3%82%AF%E3%82%B9'],
['Jambon soleil', 'https://ja.wikipedia.org/wiki/%E5%8C%97%E6%B5%B7%E9%81%93%E6%97%A5%E6%9C%AC%E3%83%8F%E3%83%A0%E3%83%95%E3%82%A1%E3%82%A4%E3%82%BF%E3%83%BC%E3%82%BA'],
['Lotte', 'https://ja.wikipedia.org/wiki/%E5%8D%83%E8%91%89%E3%83%AD%E3%83%83%E3%83%86%E3%83%9E%E3%83%AA%E3%83%BC%E3%83%B3%E3%82%BA'],
['Seibu', 'https://ja.wikipedia.org/wiki/%E5%9F%BC%E7%8E%89%E8%A5%BF%E6%AD%A6%E3%83%A9%E3%82%A4%E3%82%AA%E3%83%B3%E3%82%BA'],
['ORIX', 'https://ja.wikipedia.org/wiki/%E3%82%AA%E3%83%AA%E3%83%83%E3%82%AF%E3%82%B9%E3%83%BB%E3%83%90%E3%83%95%E3%82%A1%E3%83%AD%E3%83%BC%E3%82%BA'],
['Rakuten', 'https://ja.wikipedia.org/wiki/%E6%9D%B1%E5%8C%97%E6%A5%BD%E5%A4%A9%E3%82%B4%E3%83%BC%E3%83%AB%E3%83%87%E3%83%B3%E3%82%A4%E3%83%BC%E3%82%B0%E3%83%AB%E3%82%B9'],
['Équipe nationale de football du Japon', 'https://ja.wikipedia.org/wiki/%E3%82%B5%E3%83%83%E3%82%AB%E3%83%BC%E6%97%A5%E6%9C%AC%E4%BB%A3%E8%A1%A8'],
]
loop = asyncio.get_event_loop()
tasks = asyncio.wait([task_download(title, url) for title, url in urls])
loop.run_until_complete(tasks)
loop.close()
■ Threading graph
■ Asyncio graph
En regardant le graphique Asyncio, la partie Tâche 15-27 est la partie où la communication HTTP est effectuée avec ʻaio http, mais cela fonctionne bien en changeant de tâche. Cependant, il s'avère que le traitement ultérieur prend du temps. Puisque l'écriture de la fonction
save_filesur le disque bloque les E / S, on considère qu'il a fallu beaucoup de temps pour attendre l'écriture. En d'autres termes, réécrire la
fonction save_file` avec des E / S non bloquantes semble l'améliorer.
Je voulais réécrire la fonction save_file
écrite sur le disque avec des E / S non bloquantes, mais je ne savais pas comment le faire, alors je l'ai réécrite avec le processus multi-processus à l'ancienne.
async_web_mp.py
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import random
import aiohttp
import asyncio
import multiprocessing as mp
async def task_download(i, title, url):
local_filename = title + "_mp.txt"
with aiohttp.ClientSession() as client:
async with client.get(url) as resp:
assert resp.status == 200
# print(await resp.text())
data = await resp.text()
print(i, title, url)
process = mp.Process(target=save_file, args=(local_filename, data))
process.start()
return local_filename
def save_file(filename, text):
"""
Enregistrer le texte dans un fichier
:param filename: str
:param text: str
"""
path = "./data/{}".format(filename)
f = open(path, 'w')
f.write(text)
f.close()
n = "?{}".format(str(random.randint(1, 100000))) #Empêcher la mise en cache
urls = [
['Yakult', 'https://ja.wikipedia.org/wiki/%E6%9D%B1%E4%BA%AC%E3%83%A4%E3%82%AF%E3%83%AB%E3%83%88%E3%82%B9%E3%83%AF%E3%83%AD%E3%83%BC%E3%82%BA'],
['Géant', 'https://ja.wikipedia.org/wiki/%E8%AA%AD%E5%A3%B2%E3%82%B8%E3%83%A3%E3%82%A4%E3%82%A2%E3%83%B3%E3%83%84'],
['Hanshin', 'https://ja.wikipedia.org/wiki/%E9%98%AA%E7%A5%9E%E3%82%BF%E3%82%A4%E3%82%AC%E3%83%BC%E3%82%B9'],
['Hiroshima', 'https://ja.wikipedia.org/wiki/%E5%BA%83%E5%B3%B6%E6%9D%B1%E6%B4%8B%E3%82%AB%E3%83%BC%E3%83%97'],
['Chunichi', 'https://ja.wikipedia.org/wiki/%E4%B8%AD%E6%97%A5%E3%83%89%E3%83%A9%E3%82%B4%E3%83%B3%E3%82%BA'],
['Yokohama', 'https://ja.wikipedia.org/wiki/%E6%A8%AA%E6%B5%9CDeNA%E3%83%99%E3%82%A4%E3%82%B9%E3%82%BF%E3%83%BC%E3%82%BA'],
['Sofban', 'https://ja.wikipedia.org/wiki/%E7%A6%8F%E5%B2%A1%E3%82%BD%E3%83%95%E3%83%88%E3%83%90%E3%83%B3%E3%82%AF%E3%83%9B%E3%83%BC%E3%82%AF%E3%82%B9'],
['Jambon soleil', 'https://ja.wikipedia.org/wiki/%E5%8C%97%E6%B5%B7%E9%81%93%E6%97%A5%E6%9C%AC%E3%83%8F%E3%83%A0%E3%83%95%E3%82%A1%E3%82%A4%E3%82%BF%E3%83%BC%E3%82%BA'],
['Lotte', 'https://ja.wikipedia.org/wiki/%E5%8D%83%E8%91%89%E3%83%AD%E3%83%83%E3%83%86%E3%83%9E%E3%83%AA%E3%83%BC%E3%83%B3%E3%82%BA'],
['Seibu', 'https://ja.wikipedia.org/wiki/%E5%9F%BC%E7%8E%89%E8%A5%BF%E6%AD%A6%E3%83%A9%E3%82%A4%E3%82%AA%E3%83%B3%E3%82%BA'],
['ORIX', 'https://ja.wikipedia.org/wiki/%E3%82%AA%E3%83%AA%E3%83%83%E3%82%AF%E3%82%B9%E3%83%BB%E3%83%90%E3%83%95%E3%82%A1%E3%83%AD%E3%83%BC%E3%82%BA'],
['Rakuten', 'https://ja.wikipedia.org/wiki/%E6%9D%B1%E5%8C%97%E6%A5%BD%E5%A4%A9%E3%82%B4%E3%83%BC%E3%83%AB%E3%83%87%E3%83%B3%E3%82%A4%E3%83%BC%E3%82%B0%E3%83%AB%E3%82%B9'],
['Équipe nationale de football du Japon', 'https://ja.wikipedia.org/wiki/%E3%82%B5%E3%83%83%E3%82%AB%E3%83%BC%E6%97%A5%E6%9C%AC%E4%BB%A3%E8%A1%A8'],
]
loop = asyncio.get_event_loop()
tasks = asyncio.wait([task_download(i, x[0], x[1] + n) for i, x in enumerate(urls)])
loop.run_until_complete(tasks)
loop.close()
Il est passé à 11,5 secondes >> 5,5 secondes.
jawiki-latest-all-titles-in-ns0
async_web_counter.py
# -*- coding: utf-8 -*-
import threading
import time
import urllib
class WikipediaCrawler(object):
"""
Classe qui répond à l'URL de wikipedia
"""
PATH = './data/jawiki-latest-all-titles-in-ns0'
def __init__(self):
self.lock = threading.Lock()
def get_url(self):
f = open(WikipediaCrawler.PATH, 'r')
for title in f:
time.sleep(0.5)
if not self.lock.acquire(timeout=3):
#Quand tu ne peux pas obtenir la serrure
print('%s: Cannot acquire lock (timed out)' % t.name)
continue
#J'ai la serrure
try:
time.sleep(0.5)
url = WikipediaCrawler.get_url(title)
print(url)
yield url
finally:
#Ouvrir
self.lock.release()
@classmethod
def get_wikipedia_url(cls, title):
"""
Générer une URL wikipedia à partir du titre
"""
_base_url = "https://ja.wikipedia.org/wiki/{}"
url = _base_url.format(urllib.quote_plus(title))
return url[:-3]
def worker(crawler):
t = threading.current_thread()
for url in crawler.get_url():
print(url)
threads_count = 3
threads = []
crawler = WikipediaCrawler()
for i in range(threads_count):
t = threading.Thread(target=worker, args=(crawler,))
threads.append(t)
t.start()
for t in threads:
t.join()
What’s New in PyCharm 5 [Exploration de la bibliothèque standard Python (18) ~ édition threading.Lock ~](http://mocobeta-backup.tumblr.com/post/86764185357/python-%E6%A8%99%E6%BA%96%E3%83 % A9% E3% 82% A4% E3% 83% 96% E3% 83% A9% E3% 83% AA% E6% 8E% A2% E8% A8% AA-18-threadinglock% E7% B7% A8)