期交所保證金 https://www.taifex.com.tw/chinese/5/IndexMargining.asp
Python程式碼如下:
import urllib2 from lxml import etree import csv def url(index): return { 1 : "indexMarging", 2 : "stockMargining", 3 : "stockMargining", 4 : "iRMargining", 5 : "goldMargining", 6 : "fXMargining" }[index] def filename(index): name = { 1 : "Index.csv", 2 : "Stock.csv", 3 : "ETF.csv", 4 : "IR.csv", 5 : "Gold.csv", 6 : "FX.csv" } return name.get(index, "") def taifex(url, index): try: res = urllib2.urlopen(url) content = res.read() html = etree.HTML(content) if index == 2: tr = html.xpath('//div[@id="printhere"]/div[@class="section"]/table[@class="table_c"][1]/tbody/tr') elif index == 3: tr = html.xpath('//div[@id="printhere"]/div[@class="section"]/table[@class="table_c"][2]/tbody/tr') else: tr = html.xpath('//div[@class="section"]/table[@class="table_c"][1]/tr') row = [] for td in tr: col = [] for data in td: if index == 2: col.append(data.text.strip().replace(',', '').encode("big5hkscs")) else: col.append(data.text.strip().replace(',', '').encode("big5")) row.append(col) filepath = filename(index) with open(filepath, "w") as csvfile: writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE, escapechar='', lineterminator='\n') writer.writerows(row[1:]) except Exception as e: print e for i in range(1, 7): print("抓 https://www.taifex.com.tw/cht/5/" + url(i)) taifex("https://www.taifex.com.tw/cht/5/" + url(i), i) print("資料完成抓取...\n")
執行結果:
6個檔案中的其中一個檔案的畫面。