期交所保證金 https://www.taifex.com.tw/chinese/5/IndexMargining.asp
Python程式碼如下:
import urllib2
from lxml import etree
import csv
def url(index):
return {
1 : "indexMarging",
2 : "stockMargining",
3 : "stockMargining",
4 : "iRMargining",
5 : "goldMargining",
6 : "fXMargining"
}[index]
def filename(index):
name = {
1 : "Index.csv",
2 : "Stock.csv",
3 : "ETF.csv",
4 : "IR.csv",
5 : "Gold.csv",
6 : "FX.csv"
}
return name.get(index, "")
def taifex(url, index):
try:
res = urllib2.urlopen(url)
content = res.read()
html = etree.HTML(content)
if index == 2:
tr = html.xpath('//div[@id="printhere"]/div[@class="section"]/table[@class="table_c"][1]/tbody/tr')
elif index == 3:
tr = html.xpath('//div[@id="printhere"]/div[@class="section"]/table[@class="table_c"][2]/tbody/tr')
else:
tr = html.xpath('//div[@class="section"]/table[@class="table_c"][1]/tr')
row = []
for td in tr:
col = []
for data in td:
if index == 2:
col.append(data.text.strip().replace(',', '').encode("big5hkscs"))
else:
col.append(data.text.strip().replace(',', '').encode("big5"))
row.append(col)
filepath = filename(index)
with open(filepath, "w") as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE, escapechar='', lineterminator='\n')
writer.writerows(row[1:])
except Exception as e:
print e
for i in range(1, 7):
print("抓 https://www.taifex.com.tw/cht/5/" + url(i))
taifex("https://www.taifex.com.tw/cht/5/" + url(i), i)
print("資料完成抓取...\n")
執行結果:
6個檔案中的其中一個檔案的畫面。

