# list.html
<html>
<head><title></title></head>
<body>
<a href="http://www.example.com/index.html" title="link title a">Example A</a>
<a href="http://wwww.example.org/" title="link title b" target="_blank">Example B</a>
<a href="http://www.example.net/" title="link title c">Example C</a>
</body>
</html>
from bs4 import BeautifulSoup
soup = BeautifulSoup(open("list.html"))
link = soup.find("a")
print(link["title"])
# link title a
print(link["href"])
# http://www.example.com/index.html
print(link.string)
# Example A
link = soup.find("a", target="_blank")
print(link.string)
# Example B
print(link["title"])
# link title b
print(link["href"])
# http://wwww.example.org/
i = [ {"title": x["title"], "url": x["href"], "content": x.string } for x in soup.find_all("a")]
print(i)
# [{'content': 'Example A', 'url': 'http://www.example.com/index.html', 'title': 'link title a'}, {'content': 'Example B', 'url': 'http://wwww.example.org/', 'title': 'link title b'}, {'content': 'Example C', 'url': 'http://www.example.net/', 'title': 'link title c'}]
Recommended Posts