Plot the TOPIX time series of Yahoo Finance and observe the changes in the economy
urllib2 Make an http request and get html
urllib2.urlopen(url).read()
lxml xml, html parser Suppose the variable html contains the following html string
<table>
<tr><td>aa</td><td>bb</td></tr>
<tr><td>aa</td><td>bb</td></tr>
<tr><td>aa</td><td>bb</td></tr>
</tr>
You can display the contents of all td with the following code.
root = lxml.html.fromstring(html)
root.xpath("//table")
for tr in root.xpath("descendant::tr"):
for td in root.xpath("descendant::td"):
print td
import pylab
import urllib2
import lxml
import lxml.html
import re
dateFr = {"year": 2000, "month":1, "day":1}
dateTo = {"year": 2013, "month":11, "day": 1}
data = []
for page in range(1, 30):
print page
url = "http://info.finance.yahoo.co.jp/history/?code=998405.T&sy=%d&sm=%d&sd=%d&ey=%d&em=%d&ed=%d&tm=d&p=%d"
url = url % (dateFr["year"], dateFr["month"], dateFr["day"], dateTo["year"], dateFr["month"], dateFr["day"], page)
html = urllib2.urlopen(url).read()
root = lxml.html.fromstring(html)
table = root.xpath('//*[contains(concat(" ",normalize-space(@class)," "), " boardFin ")]')[0]
for tr in table.xpath("descendant::tr"):
tmp = [td.text for td in tr.xpath("descendant::td")]
if len(tmp) != 5:
continue
begin = float(tmp[1].replace(",", ""))
high = float(tmp[2].replace(",", ""))
low = float(tmp[3].replace(",", ""))
end = float(tmp[4].replace(",", ""))
data.append([low, high, low, high])
pylab.boxplot(data)
pylab.show()
Recommended Posts