#!/usr/bin/python # -*- coding: utf-8 -*- #--------------------------------------------- # 抓上市與上櫃各股每月營收 # Version : 1.1 # Author : Amin white # Release Date : 2012-06-27 # Python version : 2.7 #--------------------------------------------- #引用函式庫 import csv, codecs, urllib, datetime, os, time from sgmllib import SGMLParser def main(): #上市櫃公司自90年6月才有登入月營收資料 #包含上市與上櫃 stockkind = ['sii', 'otc'] #指定儲存的路徑,可自行修改儲存路徑 Savefiledir = 'D:\\Revenue\\' #建立儲存營收CSV資料夾 if not os.path.isdir(Savefiledir): os.makedirs(Savefiledir) #取得使用當天的日期 today = datetime.datetime.today() todaysec = time.mktime(datetime.datetime(int(today.strftime('%Y')), int(today.strftime('%m')), int(today.strftime('%d'))).timetuple()) for i in xrange(len(stockkind)): stocktype = stockkind[i] for j in range(2012, 2013): pyADYear = str(j) pyROCYear = str(j - 1911) for k in range(1, 13): #取得從1月至今日每月的營收 Revenuedaysec = time.mktime(datetime.datetime(j, k, 10).timetuple()) if Revenuedaysec <= todaysec: print '取得 ' + pyADYear + ' 年 ' + str('%02d' %k) + ' 月 ' + stocktype + ' 全部公司營收資料' #營收網址 url = "http://mops.twse.com.tw/t21/" + stocktype + "/t21sc03_" + pyROCYear + "_" + str(k) + ".html" #解析網頁開始 webcode = urllib.urlopen(url) if webcode.code == 200: stock = Parser_htm() stock.parse(webcode.read()) webcode.close() #儲存CSV檔名 SaveCSVname = Savefiledir + stocktype + '_' + pyADYear + str('%02d' %k) + '.csv' print '設定寫入檔案名稱與格式內容......' #開始寫入檔案準備 fileoption = codecs.open(SaveCSVname, 'wb') #指定檔案以UTF8儲存 fileoption.write(codecs.BOM_UTF8) #指定CSV檔分隔的方式 writer = csv.writer(fileoption, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) print '寫入營收至 ' + SaveCSVname + ' 開始......' #寫入欄位說明 writer.writerow([u'產業別'.encode('utf8'), u'公司代號'.encode('utf8'), u'公司名稱'.encode('utf8'), u'當月營收'.encode('utf8'), u'上月營收'.encode('utf8'),\ u'去年當月營收'.encode('utf8'), u'上月比較增減(%)'.encode('utf8'), u'去年同月增減(%)'.encode('utf8'), u'當月累計營收'.encode('utf8'), \ u'去年累計營收'.encode('utf8'), u'前期比較增減(%)'.encode('utf8')]) for i in xrange(len(stock.totaldata)): totaldata = stock.totaldata[i] #寫入每間公司各營收資料 writer.writerow([totaldata[0].encode('utf8'), totaldata[1].encode('utf8'), totaldata[2].encode('utf8'), \ totaldata[3].encode('utf8'), totaldata[4].encode('utf8'), totaldata[5].encode('utf8'), \ totaldata[6].encode('utf8'), totaldata[7].encode('utf8'), totaldata[8].encode('utf8'), \ totaldata[9].encode('utf8'), totaldata[10].encode('utf8')]) #關閉檔案 fileoption.close() print '寫入營收至 ' + SaveCSVname + ' 完成......\n' #解析營收網頁class class Parser_htm(SGMLParser): #初始化class等同constructor def __init__(self): SGMLParser.__init__(self) #初始化變數數值 def reset(self): SGMLParser.reset(self) self.bPrintDetail = False self.bStartParserHeml = False self.bStartParserdata = False self.bDataempty = False self.bItemclass = False self.szitemclass = "" self.bStockid = False self.szstockid = "" self.bCompanyname = False self.nowrapflag = False self.nowrapcount = 0 self.stockdata = [] self.totaldata = [] #解析網頁 def parse(self,data): self.feed(data) self.close() #解析網頁標籤為tr的內容 def start_tr(self, attrs): if len(attrs) == 1: if attrs[0][0] == 'align': if attrs[0][1] == 'right': self.bStartParserdata = True #解析網頁標籤為th的內容 def start_th(self, attrs): #抓取準備解析資料內容起點 if len(attrs) == 2: if attrs[0][0] == 'class' and attrs[0][1] == 'tt' and attrs[1][0] == 'align' and attrs[1][1] == 'left': self.bItemclass = True self.szitemclass = "" self.bStartParserHeml = True #抓取準備解析資料內容終點 if len(attrs) == 4: if attrs[0][0] == 'class' and attrs[0][1] == 'tt' and \ attrs[1][0] == 'nowrap' and attrs[1][1] == 'nowrap' and \ attrs[2][0] == 'colspan' and attrs[2][1] == '2' and \ attrs[3][0] == 'align' and attrs[3][1] == 'center': self.bStartParserHeml = False self.bStartParserdata = False #解析網頁標籤為td的內容 def start_td(self, attrs): #解析td標籤屬性名稱與屬性 if len(attrs) == 1: if attrs[0][0] == 'align': #抓股票代碼與資料解析起點 if attrs[0][1] == 'center': self.bStartParserdata = True self.bStockid = True #抓公司名稱起點 if attrs[0][1] == 'left': self.bCompanyname = True #收尋資料為空字串的終點與解析各營收內容起點 if attrs[0][0] == 'nowrap' and attrs[0][1] == 'nowrap': self.nowrapflag = True self.bDataempty = False self.nowrapcount += 1 #收尋資料為空字串的起點 if len(attrs) == 0: self.bDataempty = True #取得網頁表格內容是文字,數字以外的資料 def handle_entityref(self,ref): #解析資料內容為空字串 if ref == 'nbsp': if self.bDataempty: self.stockdata.append("") self.nowrapcount += 1 #print ref + " " + str(self.nowrapcount) #開始讀取各公司營收資料到暫存list中 def handle_data(self, text): #產業別 if self.bItemclass: data = unicode(text.strip(), "BIG5").encode('utf8').split(':') self.szitemclass = unicode(data[1], "utf8") if self.bPrintDetail: print "產業別 : " + self.szitemclass.encode('utf8') self.bItemclass = False if self.bStartParserdata: #公司代碼或稱股票代碼 if self.bStockid : if self.bPrintDetail: print "公司代號 : " + text.strip() self.szstockid = text.strip() self.stockdata.append(self.szitemclass) self.stockdata.append(self.szstockid) self.bStockid = False self.nowrapcount = 0 #公司名稱 if self.bCompanyname : #使用BIG5解碼,因為支援字型不夠,就需要加入以下的內容,若使用cp950解碼,只需加入網頁無法呈現的字型即可 """if self.szstockid == '1325': data = u'恒大' elif self.szstockid == '2353': data = u'宏碁' elif self.szstockid == '3046': data = u'建碁' elif self.szstockid == '6285': data = u'啟碁科技' elif self.szstockid == '4527': data = u'方土霖' elif self.szstockid == '8111': data = u'立碁電子' elif self.szstockid == '6174': data = u'安碁科技' else: data = text.strip().decode('BIG5')""" if self.szstockid == '4527': data = u'方土霖' else: data = text.strip().decode('cp950') if self.bPrintDetail: print "公司名稱 : " + data.encode('utf8') self.stockdata.append(data) self.bCompanyname = False if self.nowrapflag : self.nowrapflag = False #各營收資料 if self.nowrapcount == 1: if self.bPrintDetail: print "當月營收 : " + text.strip() data = text.strip().replace(",", "") self.stockdata.append(data) elif self.nowrapcount == 2: if self.bPrintDetail: print "上月營收 : " + text.strip() data = text.strip().replace(",", "") self.stockdata.append(data) elif self.nowrapcount == 3: if self.bPrintDetail: print "去年當月營收 : " + text.strip() data = text.strip().replace(",", "") self.stockdata.append(data) elif self.nowrapcount == 4: if self.bPrintDetail: print "上月比較增減(%) : " + text.strip() data = text.strip().replace(",", "") self.stockdata.append(data) elif self.nowrapcount == 5: if self.bPrintDetail: print "去年同月增減(%) : " + text.strip() data = text.strip().replace(",", "") self.stockdata.append(data) elif self.nowrapcount == 6: if self.bPrintDetail: print "當月累計營收 : " + text.strip() data = text.strip().replace(",", "") self.stockdata.append(data) elif self.nowrapcount == 7: if self.bPrintDetail: print "去年累計營收 : " + text.strip() data = text.strip().replace(",", "") self.stockdata.append(data) elif self.nowrapcount == 8: if self.bPrintDetail: print "前期比較增減(%) : " + text.strip() + "\n" data = text.strip().replace(",", "") self.stockdata.append(data) self.totaldata.append(self.stockdata) self.stockdata = [] #函數進入點 if __name__ == "__main__": main()
Python執行畫面如下圖所示
CSV檔案儲存內容如下