It is a script that uses Python and lxml to get the attribute information of investment trusts from the website of the Investment Trusts Association (Comprehensive Investment Trust Search Library) by web scraping.
Windows10 x64 Python 2.7.11 lxml 3.5.0
getFundDetail.py
# -*- coding: utf-8 -*-
# python 2.7
import urllib2
import lxml.html
#Acquire investment trust attributes from the Investment Trusts Association website
def getFundDetail(isin_cd):
try:
#URL generation
ud = dict(isinCd=isin_cd)
url = 'http://tskl.toushin.or.jp/FdsWeb/view/FDST030000.seam?isinCd={isinCd}'.format(**ud)
html = urllib2.urlopen(url).read()
root = lxml.html.fromstring(html)
xpath_top = '//*[@id="contents"]/div[2]/div/table[1]//label'
xpath_mid = '//*[@id="contents"]/div[2]/div/table[3]//label'
xpath_bottom = '//*[@id="contents"]//table[@class="dividend"]//label'
labels_top = root.xpath(xpath_top)
labels_mid = root.xpath(xpath_mid)
labels_bottom = root.xpath(xpath_bottom)
func = lambda elem: elem.text.encode('utf-8').replace('\n','')
labels_top = map(func, labels_top)
labels_mid = map(func, labels_mid)
labels_bottom = map(func, labels_bottom)
#Set the value to empty dict
d = {}
d['isin_cd'] = isin_cd
d['closing_date'] = labels_top[0]
d['index_type'] = labels_top[1]
d['trustee_fee'] = labels_top[2]
d['unit_type'] = labels_top[3]
d['establishment_date'] = labels_top[4]
d['fund_ctg1'] = labels_top[5]
d['fund_ctg2'] = labels_top[6]
d['fund_name'] = labels_top[7]
d['fund_shortname'] = labels_top[8]
d['asset_manager'] = labels_top[9]
d['independent_division'] = labels_mid[0]
d['investment_asset'] = labels_mid[1]
d['investment_style'] = labels_mid[2]
d['establishment_date2'] = labels_mid[3]
d['close_date'] = labels_mid[4]
d['trustee_fee_am'] = labels_bottom[0]
#In case of no reward'buying_fee'The number of elements becomes 5
if len(labels_bottom) == 5:
d['buying_fee'] = 0
d['partical_redemption_charge'] = labels_bottom[1]
d['trustee_fee2'] = labels_bottom[2]
d['trustee_fee_seller'] = labels_bottom[3]
d['trustee_fee_custody'] = labels_bottom[4]
else:
d['buying_fee'] = labels_bottom[1]
d['partical_redemption_charge'] = labels_bottom[2]
d['trustee_fee2'] = labels_bottom[3]
d['trustee_fee_seller'] = labels_bottom[4]
d['trustee_fee_custody'] = labels_bottom[5]
return d
except:
d = {}
return d
if __name__ == '__main__':
dict = getFundDetail('JP90C000A931') #Japanese stock Alpha Quartet (monthly distribution type)
# dict = getFundDetail('JP90C0003E16') #Nomura Japan Equity Strategic Fund
# dict = getFundDetail('JP90C0002V73') # MMF
for k, v in dict.iteritems():
print k
print v
establishment_date2
2014/04/04
buying_fee
3.50
close_date
2019/04/04
independent_division
-
fund_shortname
index_type
-
trustee_fee_seller
0.60000
isin_cd
JP90C000A931
unit_type
Additional type
fund_name
Japanese stock Alpha Quartet (monthly distribution type)
establishment_date
2014/04/04
trustee_fee2
1.02500
investment_style
Fund of funds
fund_ctg1
Domestic stocks
trustee_fee
1.02500
fund_ctg2
Not applicable
asset_manager
Daiwa SB Investments Ltd.
investment_asset
stock
closing_date
12 times a year
trustee_fee_custody
0.02500
partical_redemption_charge
Outer frame 0.Less than 5%
trustee_fee_am
0.40000
Recommended Posts