Judging whether the acquired URL is a relative bus or an absolute path, and if it is a relative path, do not do the work of making it an absolute path this time Please note that the img path is a program created on the condition that only the absolute path is used on the site you want to acquire, so if you try to acquire an image from a site that uses a relative path, an ERROR will occur. ~~ I will write down the detailed explanation (?) Of the code in the blog linked below. ~~ (The blog has been released) (Scheduled as of August 11, 2014)
downloadImg.py
# -*- coding: utf-8 -*-
import urllib
import urllib2
import os.path
import sys
from HTMLParser import HTMLParser
def download(url):
img = urllib.urlopen(url)
localfile = open(os.path.basename(url),'wb')
localfile.write(img.read())
img.close()
localfile.close()
class imgParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
def handle_starttag(self,tagname,attribute):
if tagname.lower() == "img":
for i in attribute:
if i[0].lower() == "src":
img_url=i[1]
#Creating a file that collects the URLs of the acquired photos
f = open("collection_url.txt","a")
f.write("%s\t"%img_url)
f.close()
if __name__ == "__main__":
print('Enter the URL of the site where you want to get the photo.')
input_url = raw_input('>>> ')
serch_url = input_url
htmldata = urllib2.urlopen(serch_url)
print('Currently getting image files...')
parser = imgParser()
parser.feed(htmldata.read())
parser.close()
htmldata.close()
#Read the generated file
f = open("collection_url.txt","r")
for row in f:
row_url = row.split('\t')
len_url = len(row_url)
f.close()
number_url = []
for i in range(0,(len_url-1)):
number_url.append(row_url[i])
for j in range(0,(len_url-1)):
url = number_url[j]
download(url)
print('The image download is complete.')
#Delete file
os.remove("collection_url.txt")
Twitter :@fantmsite ~~ Blog: Fantm Site-BLOG ~~
Recommended Posts