#!/usr/bin/python
# -*- coding: utf-8 -*-
#---------------------------------------------
# 抓上市與上櫃各股每月營收
# Version : 1.1
# Author : Amin white
# Release Date : 2012-06-27
# Python version : 2.7.2
#---------------------------------------------
#引用函式庫
import csv, codecs, urllib, datetime, os, time, pdb
from sgmllib import SGMLParser
def main():
#上市櫃公司自90年6月才有登入月營收資料
#包含上市與上櫃
stockkind = ['sii', 'otc']
#指定儲存的路徑,可自行變更儲存路徑
Savefiledir = 'D:\\Consolidated_Revenue\\'
#建立儲存營收CSV資料夾
if not os.path.isdir(Savefiledir):
os.makedirs(Savefiledir)
#取得使用當天的日期
today = datetime.datetime.today()
todaysec = time.mktime(datetime.datetime(int(today.strftime('%Y')), int(today.strftime('%m')), int(today.strftime('%d'))).timetuple())
for i in xrange(len(stockkind)):
stocktype = stockkind[i]
for j in range(2012, 2013):
pyADYear = str(j)
pyROCYear = str(j - 1911)
for k in range(1, 13):
#取得從1月至今日每月的營收
Revenuedaysec = time.mktime(datetime.datetime(j, k, 10).timetuple())
if Revenuedaysec <= todaysec:
print '取得 ' + pyADYear + ' 年 ' + str('%02d' %k) + ' 月 ' + stocktype + ' 全部公司營收資料'
#營收網址
url = 'http://mops.twse.com.tw/mops/web/ajax_t21sb06?TYPEK=' + stocktype + '&year= ' + pyROCYear + '&month=' + str('%02d' %k) + '&step=1&firstin=1&off=1'
#解析網頁開始
webcode = urllib.urlopen(url)
if webcode.code == 200:
stock = Parser_htm()
stock.parse(webcode.read())
webcode.close()
#儲存CSV檔名
SaveCSVname = Savefiledir + stocktype + '_' + pyADYear + str('%02d' %k) + '.csv'
print '設定寫入檔案名稱與格式內容......'
#開始寫入檔案準備
fileoption = codecs.open(SaveCSVname, 'wb')
#指定檔案以UTF8儲存
fileoption.write(codecs.BOM_UTF8)
#指定CSV檔分隔的方式
writer = csv.writer(fileoption, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
print '寫入營收至 ' + SaveCSVname + ' 開始......'
#寫入欄位說明
writer.writerow([u'公司代號'.encode('utf8'), u'公司名稱'.encode('utf8'), u'當月營收'.encode('utf8'), u'上月營收'.encode('utf8'),\
u'去年當月營收'.encode('utf8'), u'上月比較增減(%)'.encode('utf8'), u'去年同月增減(%)'.encode('utf8'), u'當月累計營收'.encode('utf8'), \
u'去年累計營收'.encode('utf8'), u'前期比較增減(%)'.encode('utf8')])
for i in xrange(len(stock.totaldata)):
totaldata = stock.totaldata[i]
#寫入每間公司各營收資料
writer.writerow([totaldata[0].encode('utf8'), totaldata[1].encode('utf8'), totaldata[2].encode('utf8'), \
totaldata[3].encode('utf8'), totaldata[4].encode('utf8'), totaldata[5].encode('utf8'), \
totaldata[6].encode('utf8'), totaldata[7].encode('utf8'), totaldata[8].encode('utf8'), \
totaldata[9].encode('utf8')])
#關閉檔案
fileoption.close()
print '寫入營收至 ' + SaveCSVname + ' 完成......\n'
#解析營收網頁class
class Parser_htm(SGMLParser):
#初始化class
def __init__(self):
SGMLParser.__init__(self)
#初始化變數數值
def reset(self):
SGMLParser.reset(self)
self.bPrintDetail = False
self.bEven_Odd_Row = False
self.Id_Company_count = 0
self.bId_Company = False
self.Rowcount = 0
self.bRow = False
self.btblHead = False
self.tblHeadcount =0
self.bHeadname = False
self.Headnamecount = 0
self.Headname = []
self.stockdata = []
self.totaldata = []
#解析網頁
def parse(self,data):
self.feed(data)
self.close()
#解析網頁標籤為tr的內容
def start_tr(self, attrs):
if len(attrs) == 1:
if attrs[0][0] == 'class':
if attrs[0][1] == 'tblHead':
self.tblHeadcount += 1
if attrs[0][1] == 'even' or attrs[0][1] == 'odd':
self.bEven_Odd_Row = True
def start_td(self, attrs):
if len(attrs) == 0 and self.bEven_Odd_Row:
self.Id_Company_count += 1
self.bId_Company = True
if len(attrs) == 1:
if attrs[0][0] == 'align' and attrs[0][1] == 'right':
self.Rowcount += 1
self.bRow = True
def start_th(self, attrs):
if len(attrs) == 0 and self.tblHeadcount == 2:
self.Headnamecount += 1
self.bHeadname = True
def handle_data(self, text):
if self.bHeadname and self.Headnamecount <= 9:
#print text.strip().decode('utf8').encode('utf8')
self.Headname.append(text.strip().decode('utf8').encode('utf8'))
self.bHeadname = False
else:
self.Headnamecount = 0
if self.bId_Company:
if self.Id_Company_count == 1:
if self.bPrintDetail:
print self.Headname[0] + ' : ' + text.strip()
self.stockdata.append(text.strip().decode('utf8'))
elif self.Id_Company_count == 2:
#data = text.strip().decode('BIG5')
if self.bPrintDetail:
print self.Headname[1] + ' : ' + text.strip()
self.stockdata.append(text.strip().decode('utf8'))
self.Id_Company_count = 0
self.bId_Company = False
if self.bRow:
self.bRow = False
if self.Rowcount < 8:
if self.bPrintDetail:
print self.Headname[self.Rowcount + 2] + ' : ' + text.strip().replace(",", "")
self.stockdata.append(text.strip())
elif self.Rowcount == 8:
if self.bPrintDetail:
print self.Headname[self.Rowcount + 1] + ' : ' + text.strip().replace(",", "") + '\n'
self.stockdata.append(text.strip())
self.Rowcount = 0
self.totaldata.append(self.stockdata)
self.stockdata = []
"""if self.Rowcount == 1:
print '當月合併營收 : ' + text.strip()
elif self.Rowcount == 2:
print '上月合併營收 : ' + text.strip()
elif self.Rowcount == 3:
print '去年當月合併營收 : ' + text.strip()
elif self.Rowcount == 4:
print '上月比較增減(%) : ' + text.strip()
elif self.Rowcount == 5:
print '去年同月增減(%) : ' + text.strip()
elif self.Rowcount == 6:
print '當年累計營收 : ' + text.strip()
elif self.Rowcount == 7:
print '去年累計營收 : ' + text.strip()
elif self.Rowcount == 8:
print '前期比較增減(%) : ' + text.strip() + '\n'
self.Rowcount = 0"""
#函數進入點
if __name__ == "__main__":
main()
Python執行畫面如下圖所示
CSV檔案儲存內容如下