#!/usr/bin/python # -*- coding: utf-8 -*- #--------------------------------------------- # 抓上市與上櫃各股每月營收 # Version : 1.1 # Author : Amin white # Release Date : 2012-06-27 # Python version : 2.7.2 #--------------------------------------------- #引用函式庫 import csv, codecs, urllib, datetime, os, time, pdb from sgmllib import SGMLParser def main(): #上市櫃公司自90年6月才有登入月營收資料 #包含上市與上櫃 stockkind = ['sii', 'otc'] #指定儲存的路徑,可自行變更儲存路徑 Savefiledir = 'D:\\Consolidated_Revenue\\' #建立儲存營收CSV資料夾 if not os.path.isdir(Savefiledir): os.makedirs(Savefiledir) #取得使用當天的日期 today = datetime.datetime.today() todaysec = time.mktime(datetime.datetime(int(today.strftime('%Y')), int(today.strftime('%m')), int(today.strftime('%d'))).timetuple()) for i in xrange(len(stockkind)): stocktype = stockkind[i] for j in range(2012, 2013): pyADYear = str(j) pyROCYear = str(j - 1911) for k in range(1, 13): #取得從1月至今日每月的營收 Revenuedaysec = time.mktime(datetime.datetime(j, k, 10).timetuple()) if Revenuedaysec <= todaysec: print '取得 ' + pyADYear + ' 年 ' + str('%02d' %k) + ' 月 ' + stocktype + ' 全部公司營收資料' #營收網址 url = 'http://mops.twse.com.tw/mops/web/ajax_t21sb06?TYPEK=' + stocktype + '&year= ' + pyROCYear + '&month=' + str('%02d' %k) + '&step=1&firstin=1&off=1' #解析網頁開始 webcode = urllib.urlopen(url) if webcode.code == 200: stock = Parser_htm() stock.parse(webcode.read()) webcode.close() #儲存CSV檔名 SaveCSVname = Savefiledir + stocktype + '_' + pyADYear + str('%02d' %k) + '.csv' print '設定寫入檔案名稱與格式內容......' #開始寫入檔案準備 fileoption = codecs.open(SaveCSVname, 'wb') #指定檔案以UTF8儲存 fileoption.write(codecs.BOM_UTF8) #指定CSV檔分隔的方式 writer = csv.writer(fileoption, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) print '寫入營收至 ' + SaveCSVname + ' 開始......' #寫入欄位說明 writer.writerow([u'公司代號'.encode('utf8'), u'公司名稱'.encode('utf8'), u'當月營收'.encode('utf8'), u'上月營收'.encode('utf8'),\ u'去年當月營收'.encode('utf8'), u'上月比較增減(%)'.encode('utf8'), u'去年同月增減(%)'.encode('utf8'), u'當月累計營收'.encode('utf8'), \ u'去年累計營收'.encode('utf8'), u'前期比較增減(%)'.encode('utf8')]) for i in xrange(len(stock.totaldata)): totaldata = stock.totaldata[i] #寫入每間公司各營收資料 writer.writerow([totaldata[0].encode('utf8'), totaldata[1].encode('utf8'), totaldata[2].encode('utf8'), \ totaldata[3].encode('utf8'), totaldata[4].encode('utf8'), totaldata[5].encode('utf8'), \ totaldata[6].encode('utf8'), totaldata[7].encode('utf8'), totaldata[8].encode('utf8'), \ totaldata[9].encode('utf8')]) #關閉檔案 fileoption.close() print '寫入營收至 ' + SaveCSVname + ' 完成......\n' #解析營收網頁class class Parser_htm(SGMLParser): #初始化class def __init__(self): SGMLParser.__init__(self) #初始化變數數值 def reset(self): SGMLParser.reset(self) self.bPrintDetail = False self.bEven_Odd_Row = False self.Id_Company_count = 0 self.bId_Company = False self.Rowcount = 0 self.bRow = False self.btblHead = False self.tblHeadcount =0 self.bHeadname = False self.Headnamecount = 0 self.Headname = [] self.stockdata = [] self.totaldata = [] #解析網頁 def parse(self,data): self.feed(data) self.close() #解析網頁標籤為tr的內容 def start_tr(self, attrs): if len(attrs) == 1: if attrs[0][0] == 'class': if attrs[0][1] == 'tblHead': self.tblHeadcount += 1 if attrs[0][1] == 'even' or attrs[0][1] == 'odd': self.bEven_Odd_Row = True def start_td(self, attrs): if len(attrs) == 0 and self.bEven_Odd_Row: self.Id_Company_count += 1 self.bId_Company = True if len(attrs) == 1: if attrs[0][0] == 'align' and attrs[0][1] == 'right': self.Rowcount += 1 self.bRow = True def start_th(self, attrs): if len(attrs) == 0 and self.tblHeadcount == 2: self.Headnamecount += 1 self.bHeadname = True def handle_data(self, text): if self.bHeadname and self.Headnamecount <= 9: #print text.strip().decode('utf8').encode('utf8') self.Headname.append(text.strip().decode('utf8').encode('utf8')) self.bHeadname = False else: self.Headnamecount = 0 if self.bId_Company: if self.Id_Company_count == 1: if self.bPrintDetail: print self.Headname[0] + ' : ' + text.strip() self.stockdata.append(text.strip().decode('utf8')) elif self.Id_Company_count == 2: #data = text.strip().decode('BIG5') if self.bPrintDetail: print self.Headname[1] + ' : ' + text.strip() self.stockdata.append(text.strip().decode('utf8')) self.Id_Company_count = 0 self.bId_Company = False if self.bRow: self.bRow = False if self.Rowcount < 8: if self.bPrintDetail: print self.Headname[self.Rowcount + 2] + ' : ' + text.strip().replace(",", "") self.stockdata.append(text.strip()) elif self.Rowcount == 8: if self.bPrintDetail: print self.Headname[self.Rowcount + 1] + ' : ' + text.strip().replace(",", "") + '\n' self.stockdata.append(text.strip()) self.Rowcount = 0 self.totaldata.append(self.stockdata) self.stockdata = [] """if self.Rowcount == 1: print '當月合併營收 : ' + text.strip() elif self.Rowcount == 2: print '上月合併營收 : ' + text.strip() elif self.Rowcount == 3: print '去年當月合併營收 : ' + text.strip() elif self.Rowcount == 4: print '上月比較增減(%) : ' + text.strip() elif self.Rowcount == 5: print '去年同月增減(%) : ' + text.strip() elif self.Rowcount == 6: print '當年累計營收 : ' + text.strip() elif self.Rowcount == 7: print '去年累計營收 : ' + text.strip() elif self.Rowcount == 8: print '前期比較增減(%) : ' + text.strip() + '\n' self.Rowcount = 0""" #函數進入點 if __name__ == "__main__": main()
Python執行畫面如下圖所示
CSV檔案儲存內容如下