#!/usr/bin/python
# -*- coding: utf-8 -*-
#---------------------------------------------
# 抓上櫃交易明細
# Version : 1.1
# Author : Amin white
# Release Date : 2012-06-27
# Python version : 2.7.2
#---------------------------------------------
import csv, os, datetime, urllib, codecs, time
from sgmllib import SGMLParser
#每日上櫃交易資訊
class brokerBS_otc(SGMLParser):
def __init__(self):
SGMLParser.__init__(self)
#初始化變數
def reset(self):
SGMLParser.reset(self)
self.webexist = False
self.broker = False
self.brokerid = []
self.pricevol = False
self.price_vol = 0
self.count = 0
self.price = []
self.sellvol = []
self.buyvol = []
#解析網頁元素
def parse(self,data):
self.feed(data)
self.close()
#對網頁表格控制
def start_td(self, attrs):
if len(attrs) == 2:
if attrs[0][1] == 'table-mainbody-left' and attrs[1][1] == '2':
self.webexist = True
elif len(attrs) == 1:
if attrs[0][1] == 'table-body-left' or attrs[0][1] == 'page_table-body-LEFT':
self.broker = True
elif attrs[0][1] == 'table-body-right' or attrs[0][1] == 'page_table-body-RIGHT':
self.pricevol = True
self.price_vol += 1
self.price_vol %= 3
#處理網頁資料
def handle_data(self, text):
#取得劵商代碼
if self.broker:
data = text.split(" ")
self.brokerid.append(data[0])
self.broker = False
self.count += 1
#取得買買交易明細
if self.pricevol:
#取得買賣價格
if self.price_vol == 1:
self.price.append(text)
self.pricevol = False
#取得買的數量
elif self.price_vol == 2:
self.buyvol.append(text.replace(',', ''))
self.pricevol = False
#取得賣的數量
elif self.price_vol == 0:
self.sellvol.append(text.replace(',', ''))
self.pricevol = False
self.brokerpric = 0
#上市櫃股票代碼與名稱
class Parser_strMode(SGMLParser):
def __init__(self):
SGMLParser.__init__(self)
def reset(self):
SGMLParser.reset(self)
self.stockinfo = False
self.no = 0
self.stock = []
self.col1 = ""
self.col2 = ""
self.col3 = ""
def parse(self,data):
self.feed(data)
self.close()
def start_td(self, attrs):
for name, value in attrs:
if len(attrs) == 1:
if name == 'bgcolor' and value == '#FAFAD2':
self.stockinfo = True
self.no+=1
self.no%=7
def handle_data(self, text):
if self.stockinfo:
if self.no == 1:
data = text.split(" ")
if data[0].isalnum():
self.col1 = data[0]
self.col2 = data[1]
elif self.no == 6:
self.col3 = text
elif self.no == 7:
self.no = 0
if self.no == 6 and self.col3 == 'ESVUFR' or self.col3 == 'EUOMSR' or self.col3 == 'EMXXXA' or self.col3 == 'ESVUFA':
self.stock.append(self.col1)
self.stockinfo = False
def Getstockid(webindex):
url = "http://brk.twse.com.tw:8000/isin/C_public.jsp?strMode=%s" %webindex
webcode = urllib.urlopen(url)
if webcode.code == 200:
stock = Parser_strMode()
stock.parse(webcode.read())
webcode.close()
stock.stock.sort()
if len(stock.stock) >0:
return stock
else:
return 0
#將Stock id寫入CSV file
def GetStockidtoCSVfile(filepath, stock):
#開啟檔案
writefile = file(filepath, 'wb')
#將檔案以UTF8的格式儲存
writefile.write(codecs.BOM_UTF8)
#將資料儲存至CSV檔中
writer = csv.writer(writefile)
#寫入抬頭名稱
writer.writerow([u'股票代號'.encode('utf8')])
#開始寫檔
for i in range(0, len(stock)):
writer.writerow(['%s' %stock[i].encode('utf8')])
#關閉檔案
writefile.close()
def GetTodayDate(mode):
#取得今天的日期
todaydate = datetime.datetime.today()
#取得年
year = todaydate.strftime('%Y')
#取得月
month = todaydate.strftime('%m')
#取得日
day = todaydate.strftime('%d')
#組合年月日
if mode == 1: #西元
todaydate = year + month + day
elif mode == 2: #民國
todaydate = str(int(year) - 1911) + month + day
#回傳
return todaydate
def main(startpos):
#取得當天交易的日期,格式如20121001
todaydate = GetTodayDate(1)
todaydate = '20121005'
#取得股票代碼
stockid = Getstockid(4)
if len(stockid.stock) == 0:
return 0
#當天交易明細存放的路徑,可自行更改
workdir = 'D:\\stock_database\\test\\otc\\' + todaydate
#建立交易明細存放的資料夾
if not os.path.isdir(workdir):
os.makedirs(workdir)
#將股票代碼儲存至檔案裡
StockidFile = workdir + '\\' + todaydate +'.csv'
GetStockidtoCSVfile(StockidFile, stockid.stock)
#開始對上櫃每個股票代碼抓取交易明細
for i in range(startpos, len(stockid.stock)):
stock_id = stockid.stock[i]
#建立交易明細檔案的名稱
CSVFile = workdir + '\\' + stock_id + '_' + todaydate +'.csv'
#開起檔案準備寫檔
fileobj = file(CSVFile, 'wb')
#將檔案以UTF8的格式儲存
fileobj.write(codecs.BOM_UTF8)
#將交易明細寫入檔案前的格式設定
writer = csv.writer(fileobj, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
#寫入明細的抬頭名稱
writer.writerow([u'序號'.encode('utf8'), u'交易日期'.encode('utf8'), u'股票種類'.encode('utf8'), u'股票代號'.encode('utf8'), \
u'券商'.encode('utf8'), u'價格'.encode('utf8'), u'買進股數'.encode('utf8'), u'賣出股數'.encode('utf8')])
#對交易筆數與讀取網頁次數的初始化
serialnumber = 1
Retrytime = 0
#開始抓取網頁的交易明細
for j in range(1, 5001):
while True:
url = "http://www.gretai.org.tw/ch/stock/aftertrading/broker_trading/brokerBS.php?stk_code=%s&topage=%s" %(stock_id, str(j))
webcode = urllib.urlopen(url)
#網頁順利開啟的回傳值200
if webcode.code == 200:
stock = brokerBS_otc()
stock.parse(webcode.read())
webcode.close()
stock.close()
#如果交易明細存在或是重複抓取網頁次數超過30次,則離開回圈
if stock.webexist or Retrytime > 30 :
break;
time.sleep(1)
print "Retry time : " + str(Retrytime) + " " + stock_id
Retrytime+=1
print 'No : ' + str(j) + ' stock kind : otc stockid : ' + stock_id + ' date : ' + todaydate + u' 券商筆數 : ' + str(len(stock.brokerid))
#將交易明細寫入檔案中
for k in range(0, len(stock.brokerid)):
writer.writerow([ '%d' %serialnumber, '%s' %todaydate, 'otc'.encode('utf8'), '%s' %stock_id, \
'%s' %stock.brokerid[k], '%s' %stock.price[k], '%s' %stock.buyvol[k], '%s' %stock.sellvol[k]])
serialnumber += 1
#找出網頁交易筆數當小於100筆的就是交易明細的最後一頁
if len(stock.brokerid) < 100:
stocklog = 'stockid : ' + stock_id + ' Date : ' + todaydate + ' total web page : %s' %(str(j)) + ' progress rate : ' + str(i) + '/' + str(len(stockid.stock)) + '\n'
print stocklog
break;
print 'No : ' + str(j) + ' stockid : ' + stock_id + ' date : ' + todaydate + u" write to csv OK......"
#關閉寫檔動作
fileobj.close()
if __name__ == "__main__":
main(0)
程式執行畫面
執行結果