#!/usr/bin/python # -*- coding: utf-8 -*- #--------------------------------------------- # 用下載方式,抓上櫃交易明細 # Version : 1.1 # Author : Amin white # Release Date : 2012-06-27 # Python version : 2.7.2 #--------------------------------------------- import csv, urllib, codecs, os, shutil, datetime from urllib import urlretrieve, urlencode from sgmllib import SGMLParser def WriteSaledayetoCSV(saledate): #宣告一個空的序列 saletemplist = [] #交易日期檔案存放的路徑,可自行修改 saledatefile = 'D:\stock_database\importMySQL_otc\OTC.csv' #以可讀可寫的方式開啟檔案 fileoption = codecs.open(saledatefile, 'ab+') #讀取檔案中已存在的日期 reader = csv.reader(fileoption) #將日期資料存放進序列中 row = 0 for col in reader: if row > 1: saletemplist.append(col[0].strip()) row += 1 #當天的日期做格式轉換,從民國年月日轉回西元年月日 tempdate = saledate.replace(saledate[0:3], str(int(saledate[0:3])+ 1911)) #是別當天交易日的日期是否存在序列中,若不存在,則寫入當天的交易日期 if not(tempdate in saletemplist): #fileoption.write(codecs.BOM_UTF8) #若使用了codecs.BOM_UTF8會在重新寫檔時,檔頭出現'\xef\xbb\xbf' #指定欲存入CSV的形式 writer = csv.writer(fileoption) #將日期寫入檔案中 writer.writerow(['%s' %tempdate.encode('utf8')]) #關閉檔案 fileoption.close() #上市櫃股票代碼與名稱 class Parser_strMode(SGMLParser): def __init__(self): SGMLParser.__init__(self) def reset(self): SGMLParser.reset(self) self.stockinfo = False self.no = 0 self.stock = [] self.col1 = "" self.col2 = "" self.col3 = "" def parse(self,data): self.feed(data) self.close() def start_td(self, attrs): for name, value in attrs: if len(attrs) == 1: if name == 'bgcolor' and value == '#FAFAD2': self.stockinfo = True self.no+=1 self.no%=7 def handle_data(self, text): if self.stockinfo: if self.no == 1: data = text.split(" ") if data[0].isalnum(): self.col1 = data[0] self.col2 = data[1] elif self.no == 6: self.col3 = text elif self.no == 7: self.no = 0 if self.no == 6 and self.col3 == 'ESVUFR' or self.col3 == 'EUOMSR' or self.col3 == 'EMXXXA' or self.col3 == 'ESVUFA': self.stock.append(self.col1) self.col3 = "" #[2014/8/3]修正 self.stockinfo = False def Getstockid(webindex): #上市上櫃股票代碼網址 url = "http://isin.twse.com.tw/isin/C_public.jsp?strMode=%s" %webindex #開啟網址 webcode = urllib.urlopen(url) #確認網頁成功連結 if webcode.code == 200: #使用解析網頁的class stock = Parser_strMode() #解析網頁標籤 stock.parse(webcode.read()) webcode.close() #股票代碼排序 stock.stock.sort() #確認股票代碼成功讀取 if len(stock.stock) >0: return stock else: return 0 #將Stock id寫入CSV file def GetStockidtoCSVfile(filepath, stock): #開啟檔案 writefile = file(filepath, 'wb') #將檔案以UTF8的格式儲存 writefile.write(codecs.BOM_UTF8) #將資料儲存至CSV檔中 writer = csv.writer(writefile) #寫入抬頭名稱 writer.writerow([u'股票代號'.encode('utf8')]) #開始寫檔 for i in range(0, len(stock)): writer.writerow(['%s' %stock[i].encode('utf8')]) #關閉檔案 writefile.close() def GetTodayDate(mode): #取得今天的日期 todaydate = datetime.datetime.today() #取得年 year = todaydate.strftime('%Y') #取得月 month = todaydate.strftime('%m') #取得日 day = todaydate.strftime('%d') #組合年月日 if mode == 1: #西元 todaydate = year + month + day elif mode == 2: #民國 todaydate = str(int(year) - 1911) + month + day #回傳 return todaydate def GetCSVfilefromweb(todaydate, stock): targetstockiddir = 'D:\\stock_database\\test\\otc\\'+ todaydate +'\\' if not os.path.isdir(targetstockiddir): os.makedirs(targetstockiddir) csvfile = targetstockiddir + todaydate +'.csv' GetStockidtoCSVfile(csvfile, stock) for i in range(0, len(stock)): stockid = stock[i] args = urllib.urlencode({'curstk':stockid, 'stk_date':todaydate}) CVSfile = targetstockiddir + str(stockid) + '_' + todaydate + '.csv' (a, b) = urlretrieve('http://www.gretai.org.tw/ch/stock/aftertrading/broker_trading/download_ALLCSV.php?', CVSfile, data=args) print '%03d' %i + ' : ' + CVSfile + ' ...... ' + '%03d' %i + '/' + str(len(stock)) def Transformcsv(stock): saledate = GetTodayDate(1) #西元 todaydate = GetTodayDate(2) #民國 sourcecsvdir = 'D:\\stock_database\\test\\otc\\%s\\' %todaydate targetdir = 'D:\\stock_database\\importMySQL_otc\\test\\%s\\' %saledate if not os.path.isdir(targetdir): os.makedirs(targetdir) GetStockidtoCSVfile(targetdir + saledate + '.csv', stock) for i in range(0, len(stock)): stockid = stock[i] sourcecsv = sourcecsvdir + stockid + "_" + todaydate +'.csv' targetfile = targetdir + stockid + "_" + saledate + '.csv' writefile = file(targetfile, 'wb') writefile.write(codecs.BOM_UTF8) writer = csv.writer(writefile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) writer.writerow([u'交易日期'.encode('utf8'), u'股票種類'.encode('utf8'), u'股票代號'.encode('utf8'), \ u'券商'.encode('utf8'), u'價格'.encode('utf8'), u'買進股數'.encode('utf8'), u'賣出股數'.encode('utf8')]) readfile = codecs.open(sourcecsv, 'r') # open the file reader = csv.reader(readfile) row = 0 salecount = 0 for col in reader: if row >= 3 : if len(col) == 11: for k in range(0, 2): if col[6 * k]: salecount+=1 brokerid = col[6 * k + 1].split(" ")[0] price = col[6 * k + 2] buyvol = col[6 * k + 3].replace(',', '') sellvol = col[6 * k + 4].replace(',', '') writer.writerow([ '%s' %saledate.encode('utf8'), 'otc', '%s' %stockid.encode('utf8'), '%s' %brokerid.encode('utf8'), \ '%s' %price.encode('utf8'), '%s' %buyvol.encode('utf8'), '%s' %sellvol.encode('utf8')]) elif len(col) == 5: salecount+=1 brokerid = col[1].split(" ")[0] price = col[2] buyvol = col[3].replace(',', '') sellvol = col[4].replace(',', '') writer.writerow([ '%s' %saledate.encode('utf8'), 'otc', '%s' %stockid.encode('utf8'), '%s' %brokerid.encode('utf8'), \ '%s' %price.encode('utf8'), '%s' %buyvol.encode('utf8'), '%s' %sellvol.encode('utf8')]) row+=1 readfile.close() writefile.close() print '%03d' %i + ' Stockid : ' + stockid + ' transform ' + todaydate + ' count : ' + '%05d' %salecount + ' ...... ' + '%03d' %i + '/' + str(len(stock)) def main(): #取得股票代碼 stock = Getstockid(4) #取得今天日期,格式為民國年月日 1011001 todaydate = GetTodayDate(2) #民國 #從網站上抓取CSV檔的交易資料 GetCSVfilefromweb(todaydate, stock.stock) #轉換CSV檔的交易明細 Transformcsv(stock.stock) #儲存交易日的日期到CSV檔中 WriteSaledayetoCSV(GetTodayDate(1)) if __name__ == "__main__": main()
[2014/8/3] 修正抓取上櫃股票代碼,變數中殘存上一筆CFICode代碼,導致下一筆判別錯誤,以致重複出現股票代碼。