introduction

Dans le "Batch download images from specific site URLs with python" écrit précédemment, par exemple

<img src="../sample.png ">

Cette fois, j'ai résolu le problème que je ne pouvais pas télécharger l'image à cause d'ERREUR. Cependant, ce programme est fondamentalement root sur l'url? Je pense à un cas où il est divisé sur (http: // nom de domaine / hiérarchie). (Je suis assez pauvre en explication et je ne comprends pas ce que je dis (^^;) ↑)

Code ajouté

Ce que j'ai ajouté dans ce code est une fonction appelée get_url_root. Argument: URL saisie par l'utilisateur Valeur de retour: URL jusqu'au nom de domaine. Si la valeur de retour est 0, cela signifie ERREUR.

`python`


def get_url_root(url):
    if("http://" in url):
        url_delet_http = url.lstrip("http://")
        if("/" in url_delet_http):
            url_root = "http://" + url_delet_http[0:url_delet_http.find("/")]
            return url_root
    elif("https://" in url):
        url_delet_https = url.lstrip("https://")
        if("/" in url_delet_http):
            url_root = "http://" + url_delet_http[0:url_delet_http.find("/")]
            return url_root
    return 0

Code édité

Enfin, la partie où vous téléchargez réellement l'image. Si l'url contient " ../ ", utilisez la fonction ci-dessus pour obtenir l'url jusqu'au nom de domaine, Par exemple, modifiez la partie «..» de «../ sample.png» en «http: // sample.com / sample.png».

`python`


for j in range(0,(len_url-1)):
        url = number_url[j]
        if("../" in url):
            root_url = get_url_root(serch_url)
            if(root_url!=0):
                url = url.replace("..",root_url)
                print url
                download(url)
        else:
            download(url)

code

`getimage.py`


# -*- coding: utf-8 -*- 

import urllib
import urllib2
import os.path
import sys
from HTMLParser import HTMLParser

def download(url):
    img = urllib.urlopen(url)
    localfile = open(os.path.basename(url),'wb')
    localfile.write(img.read())
    img.close()
    localfile.close()

def get_url_root(url):
    if("http://" in url):
        url_delet_http = url.lstrip("http://")
        if("/" in url_delet_http):
            url_root = "http://" + url_delet_http[0:url_delet_http.find("/")]
            return url_root
    elif("https://" in url):
        url_delet_https = url.lstrip("https://")
        if("/" in url_delet_http):
            url_root = "http://" + url_delet_http[0:url_delet_http.find("/")]
            return url_root
    return 0

class imgParser(HTMLParser):

    def __init__(self):
        HTMLParser.__init__(self)

    def handle_starttag(self,tagname,attribute):
        if tagname.lower() == "img":
            for i in attribute:
                if i[0].lower() == "src":
                    img_url=i[1]
                    #Création d'un fichier qui recueille les URL des photos acquises
                    f = open("collection_url.txt","a")
                    f.write("%s\t"%img_url)
                    f.close()

if __name__ == "__main__":

    print('Saisissez l'URL du site sur lequel vous souhaitez obtenir la photo.')
    input_url = raw_input('>>>  ')
    serch_url = input_url
    htmldata = urllib2.urlopen(serch_url)

    print('Obtention actuelle de fichiers image...')

    parser = imgParser()
    parser.feed(htmldata.read())

    parser.close()
    htmldata.close()

    #Lire le fichier généré
    f = open("collection_url.txt","r")
    for row in f:
        row_url = row.split('\t')
        len_url = len(row_url)
    f.close()

    number_url = []

    for i in range(0,(len_url-1)):
        number_url.append(row_url[i])

    for j in range(0,(len_url-1)):
        url = number_url[j]
        if("../" in url):
            root_url = get_url_root(serch_url)
            if(root_url!=0):
                url = url.replace("..",root_url)
                print url
                download(url)
        else:
            download(url)

    print('Le téléchargement de l'image est terminé.')

    #Supprimer le fichier
    os.remove("collection_url.txt")

Télécharger en masse des images à partir d'une URL spécifique avec python

introduction

Code ajouté

python

Code édité

python

code

getimage.py

`python`

`python`

`getimage.py`