List of national flags of Wikipedia I downloaded (scraped) the national flag image with python.
macOS Catalina python3.8.0
pip install beautifulsoup4
pip install requests
pip install urllib
pip install lxml
from bs4 import BeautifulSoup
import requests
import urllib
import os
import time
#wikipedia "List of national flags" url
wiki_url = "https://ja.wikipedia.org/wiki/%E5%9B%BD%E6%97%97%E3%81%AE%E4%B8%80%E8%A6%A7"
#Get and parse html source
html_text = requests.get(wiki_url).text
soup = BeautifulSoup(html_text,"lxml")
#Get img tag
imgs = soup.find_all("img")
#Get national flag image URL
flag_urls = []
for tag in imgs:
#The img tag of the national flag image has the alt attribute"〇〇 flag"Because it is in the format (as of March 22, 2020),"flag"Process only those that include.
if "flag" not in tag.get("alt"):
continue
url = tag.get("src") #Get src attribute (relative path of url)
url = "https:"+url #https:Prefix to an absolute URL
flag_urls.append(url)
for url in flag_urls:
#Specifying the download destination path
#The end of each URL"125px-Flag_of_Country name.svg.png "It looks like this. from here"Flag_of_Country name.png "To the file name after downloading
png_name = url.split("px-")[-1].split(".")[0]+".png "
#Save it under a directory called figs. Make figs in advance
png_name = os.path.join("./figs",png_name)
#Download only if the file does not exist
if os.path.exists(png_name):
print("File",png_name,"already exists")
continue
urllib.request.urlretrieve(url,png_name)
print("File",png_name,"downloaded")
#Wait to not load the server
time.sleep(1)
The file name was partially garbled, but the download was successful.
Recommended Posts