BeautifulSoup4, super.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# python 2.x
#
# usage: python this_script.py "http://qiita.com/"
#
#Installez beautifulsoup4 à l'avance
# pip install beautifulsoup4
import codecs
import urllib2
import sys
from bs4 import BeautifulSoup
url = sys.argv[1]
html = urllib2.urlopen(url).read().decode('utf-8', 'ignore')
soup = BeautifulSoup(html, "html.parser")
links = [a.get("href") for a in soup.find_all("a")]
for l in links: print l
Recommended Posts