BeautifulSoup4, great.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# python 2.x
#
# usage: python this_script.py "http://qiita.com/"
#
#Install beautifulsoup4 in advance
# pip install beautifulsoup4
import codecs
import urllib2
import sys
from bs4 import BeautifulSoup
url = sys.argv[1]
html = urllib2.urlopen(url).read().decode('utf-8', 'ignore')
soup = BeautifulSoup(html, "html.parser")
links = [a.get("href") for a in soup.find_all("a")]
for l in links: print l
Recommended Posts