#!/usr/bin/python # -*- coding: utf-8 -*- #--------------------------------------------- # 抓上櫃交易明細 # Version : 1.1 # Author : Amin white # Release Date : 2012-06-27 # Python version : 2.7.2 #--------------------------------------------- import csv, os, datetime, urllib, codecs, time from sgmllib import SGMLParser #每日上櫃交易資訊 class brokerBS_otc(SGMLParser): def __init__(self): SGMLParser.__init__(self) #初始化變數 def reset(self): SGMLParser.reset(self) self.webexist = False self.broker = False self.brokerid = [] self.pricevol = False self.price_vol = 0 self.count = 0 self.price = [] self.sellvol = [] self.buyvol = [] #解析網頁元素 def parse(self,data): self.feed(data) self.close() #對網頁表格控制 def start_td(self, attrs): if len(attrs) == 2: if attrs[0][1] == 'table-mainbody-left' and attrs[1][1] == '2': self.webexist = True elif len(attrs) == 1: if attrs[0][1] == 'table-body-left' or attrs[0][1] == 'page_table-body-LEFT': self.broker = True elif attrs[0][1] == 'table-body-right' or attrs[0][1] == 'page_table-body-RIGHT': self.pricevol = True self.price_vol += 1 self.price_vol %= 3 #處理網頁資料 def handle_data(self, text): #取得劵商代碼 if self.broker: data = text.split(" ") self.brokerid.append(data[0]) self.broker = False self.count += 1 #取得買買交易明細 if self.pricevol: #取得買賣價格 if self.price_vol == 1: self.price.append(text) self.pricevol = False #取得買的數量 elif self.price_vol == 2: self.buyvol.append(text.replace(',', '')) self.pricevol = False #取得賣的數量 elif self.price_vol == 0: self.sellvol.append(text.replace(',', '')) self.pricevol = False self.brokerpric = 0 #上市櫃股票代碼與名稱 class Parser_strMode(SGMLParser): def __init__(self): SGMLParser.__init__(self) def reset(self): SGMLParser.reset(self) self.stockinfo = False self.no = 0 self.stock = [] self.col1 = "" self.col2 = "" self.col3 = "" def parse(self,data): self.feed(data) self.close() def start_td(self, attrs): for name, value in attrs: if len(attrs) == 1: if name == 'bgcolor' and value == '#FAFAD2': self.stockinfo = True self.no+=1 self.no%=7 def handle_data(self, text): if self.stockinfo: if self.no == 1: data = text.split(" ") if data[0].isalnum(): self.col1 = data[0] self.col2 = data[1] elif self.no == 6: self.col3 = text elif self.no == 7: self.no = 0 if self.no == 6 and self.col3 == 'ESVUFR' or self.col3 == 'EUOMSR' or self.col3 == 'EMXXXA' or self.col3 == 'ESVUFA': self.stock.append(self.col1) self.stockinfo = False def Getstockid(webindex): url = "http://brk.twse.com.tw:8000/isin/C_public.jsp?strMode=%s" %webindex webcode = urllib.urlopen(url) if webcode.code == 200: stock = Parser_strMode() stock.parse(webcode.read()) webcode.close() stock.stock.sort() if len(stock.stock) >0: return stock else: return 0 #將Stock id寫入CSV file def GetStockidtoCSVfile(filepath, stock): #開啟檔案 writefile = file(filepath, 'wb') #將檔案以UTF8的格式儲存 writefile.write(codecs.BOM_UTF8) #將資料儲存至CSV檔中 writer = csv.writer(writefile) #寫入抬頭名稱 writer.writerow([u'股票代號'.encode('utf8')]) #開始寫檔 for i in range(0, len(stock)): writer.writerow(['%s' %stock[i].encode('utf8')]) #關閉檔案 writefile.close() def GetTodayDate(mode): #取得今天的日期 todaydate = datetime.datetime.today() #取得年 year = todaydate.strftime('%Y') #取得月 month = todaydate.strftime('%m') #取得日 day = todaydate.strftime('%d') #組合年月日 if mode == 1: #西元 todaydate = year + month + day elif mode == 2: #民國 todaydate = str(int(year) - 1911) + month + day #回傳 return todaydate def main(startpos): #取得當天交易的日期,格式如20121001 todaydate = GetTodayDate(1) todaydate = '20121005' #取得股票代碼 stockid = Getstockid(4) if len(stockid.stock) == 0: return 0 #當天交易明細存放的路徑,可自行更改 workdir = 'D:\\stock_database\\test\\otc\\' + todaydate #建立交易明細存放的資料夾 if not os.path.isdir(workdir): os.makedirs(workdir) #將股票代碼儲存至檔案裡 StockidFile = workdir + '\\' + todaydate +'.csv' GetStockidtoCSVfile(StockidFile, stockid.stock) #開始對上櫃每個股票代碼抓取交易明細 for i in range(startpos, len(stockid.stock)): stock_id = stockid.stock[i] #建立交易明細檔案的名稱 CSVFile = workdir + '\\' + stock_id + '_' + todaydate +'.csv' #開起檔案準備寫檔 fileobj = file(CSVFile, 'wb') #將檔案以UTF8的格式儲存 fileobj.write(codecs.BOM_UTF8) #將交易明細寫入檔案前的格式設定 writer = csv.writer(fileobj, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) #寫入明細的抬頭名稱 writer.writerow([u'序號'.encode('utf8'), u'交易日期'.encode('utf8'), u'股票種類'.encode('utf8'), u'股票代號'.encode('utf8'), \ u'券商'.encode('utf8'), u'價格'.encode('utf8'), u'買進股數'.encode('utf8'), u'賣出股數'.encode('utf8')]) #對交易筆數與讀取網頁次數的初始化 serialnumber = 1 Retrytime = 0 #開始抓取網頁的交易明細 for j in range(1, 5001): while True: url = "http://www.gretai.org.tw/ch/stock/aftertrading/broker_trading/brokerBS.php?stk_code=%s&topage=%s" %(stock_id, str(j)) webcode = urllib.urlopen(url) #網頁順利開啟的回傳值200 if webcode.code == 200: stock = brokerBS_otc() stock.parse(webcode.read()) webcode.close() stock.close() #如果交易明細存在或是重複抓取網頁次數超過30次,則離開回圈 if stock.webexist or Retrytime > 30 : break; time.sleep(1) print "Retry time : " + str(Retrytime) + " " + stock_id Retrytime+=1 print 'No : ' + str(j) + ' stock kind : otc stockid : ' + stock_id + ' date : ' + todaydate + u' 券商筆數 : ' + str(len(stock.brokerid)) #將交易明細寫入檔案中 for k in range(0, len(stock.brokerid)): writer.writerow([ '%d' %serialnumber, '%s' %todaydate, 'otc'.encode('utf8'), '%s' %stock_id, \ '%s' %stock.brokerid[k], '%s' %stock.price[k], '%s' %stock.buyvol[k], '%s' %stock.sellvol[k]]) serialnumber += 1 #找出網頁交易筆數當小於100筆的就是交易明細的最後一頁 if len(stock.brokerid) < 100: stocklog = 'stockid : ' + stock_id + ' Date : ' + todaydate + ' total web page : %s' %(str(j)) + ' progress rate : ' + str(i) + '/' + str(len(stockid.stock)) + '\n' print stocklog break; print 'No : ' + str(j) + ' stockid : ' + stock_id + ' date : ' + todaydate + u" write to csv OK......" #關閉寫檔動作 fileobj.close() if __name__ == "__main__": main(0)