#!/usr/bin/python
# -*- coding: utf-8 -*-
#---------------------------------------------
# 用下載方式,抓上櫃交易明細
# Version : 1.1
# Author : Amin white
# Release Date : 2012-06-27
# Python version : 2.7.2
#---------------------------------------------
import csv, urllib, codecs, os, shutil, datetime
from urllib import urlretrieve, urlencode
from sgmllib import SGMLParser
def WriteSaledayetoCSV(saledate):
#宣告一個空的序列
saletemplist = []
#交易日期檔案存放的路徑,可自行修改
saledatefile = 'D:\stock_database\importMySQL_otc\OTC.csv'
#以可讀可寫的方式開啟檔案
fileoption = codecs.open(saledatefile, 'ab+')
#讀取檔案中已存在的日期
reader = csv.reader(fileoption)
#將日期資料存放進序列中
row = 0
for col in reader:
if row > 1:
saletemplist.append(col[0].strip())
row += 1
#當天的日期做格式轉換,從民國年月日轉回西元年月日
tempdate = saledate.replace(saledate[0:3], str(int(saledate[0:3])+ 1911))
#是別當天交易日的日期是否存在序列中,若不存在,則寫入當天的交易日期
if not(tempdate in saletemplist):
#fileoption.write(codecs.BOM_UTF8) #若使用了codecs.BOM_UTF8會在重新寫檔時,檔頭出現'\xef\xbb\xbf'
#指定欲存入CSV的形式
writer = csv.writer(fileoption)
#將日期寫入檔案中
writer.writerow(['%s' %tempdate.encode('utf8')])
#關閉檔案
fileoption.close()
#上市櫃股票代碼與名稱
class Parser_strMode(SGMLParser):
def __init__(self):
SGMLParser.__init__(self)
def reset(self):
SGMLParser.reset(self)
self.stockinfo = False
self.no = 0
self.stock = []
self.col1 = ""
self.col2 = ""
self.col3 = ""
def parse(self,data):
self.feed(data)
self.close()
def start_td(self, attrs):
for name, value in attrs:
if len(attrs) == 1:
if name == 'bgcolor' and value == '#FAFAD2':
self.stockinfo = True
self.no+=1
self.no%=7
def handle_data(self, text):
if self.stockinfo:
if self.no == 1:
data = text.split(" ")
if data[0].isalnum():
self.col1 = data[0]
self.col2 = data[1]
elif self.no == 6:
self.col3 = text
elif self.no == 7:
self.no = 0
if self.no == 6 and self.col3 == 'ESVUFR' or self.col3 == 'EUOMSR' or self.col3 == 'EMXXXA' or self.col3 == 'ESVUFA':
self.stock.append(self.col1)
self.col3 = "" #[2014/8/3]修正
self.stockinfo = False
def Getstockid(webindex):
#上市上櫃股票代碼網址
url = "http://isin.twse.com.tw/isin/C_public.jsp?strMode=%s" %webindex
#開啟網址
webcode = urllib.urlopen(url)
#確認網頁成功連結
if webcode.code == 200:
#使用解析網頁的class
stock = Parser_strMode()
#解析網頁標籤
stock.parse(webcode.read())
webcode.close()
#股票代碼排序
stock.stock.sort()
#確認股票代碼成功讀取
if len(stock.stock) >0:
return stock
else:
return 0
#將Stock id寫入CSV file
def GetStockidtoCSVfile(filepath, stock):
#開啟檔案
writefile = file(filepath, 'wb')
#將檔案以UTF8的格式儲存
writefile.write(codecs.BOM_UTF8)
#將資料儲存至CSV檔中
writer = csv.writer(writefile)
#寫入抬頭名稱
writer.writerow([u'股票代號'.encode('utf8')])
#開始寫檔
for i in range(0, len(stock)):
writer.writerow(['%s' %stock[i].encode('utf8')])
#關閉檔案
writefile.close()
def GetTodayDate(mode):
#取得今天的日期
todaydate = datetime.datetime.today()
#取得年
year = todaydate.strftime('%Y')
#取得月
month = todaydate.strftime('%m')
#取得日
day = todaydate.strftime('%d')
#組合年月日
if mode == 1: #西元
todaydate = year + month + day
elif mode == 2: #民國
todaydate = str(int(year) - 1911) + month + day
#回傳
return todaydate
def GetCSVfilefromweb(todaydate, stock):
targetstockiddir = 'D:\\stock_database\\test\\otc\\'+ todaydate +'\\'
if not os.path.isdir(targetstockiddir):
os.makedirs(targetstockiddir)
csvfile = targetstockiddir + todaydate +'.csv'
GetStockidtoCSVfile(csvfile, stock)
for i in range(0, len(stock)):
stockid = stock[i]
args = urllib.urlencode({'curstk':stockid, 'stk_date':todaydate})
CVSfile = targetstockiddir + str(stockid) + '_' + todaydate + '.csv'
(a, b) = urlretrieve('http://www.gretai.org.tw/ch/stock/aftertrading/broker_trading/download_ALLCSV.php?', CVSfile, data=args)
print '%03d' %i + ' : ' + CVSfile + ' ...... ' + '%03d' %i + '/' + str(len(stock))
def Transformcsv(stock):
saledate = GetTodayDate(1) #西元
todaydate = GetTodayDate(2) #民國
sourcecsvdir = 'D:\\stock_database\\test\\otc\\%s\\' %todaydate
targetdir = 'D:\\stock_database\\importMySQL_otc\\test\\%s\\' %saledate
if not os.path.isdir(targetdir):
os.makedirs(targetdir)
GetStockidtoCSVfile(targetdir + saledate + '.csv', stock)
for i in range(0, len(stock)):
stockid = stock[i]
sourcecsv = sourcecsvdir + stockid + "_" + todaydate +'.csv'
targetfile = targetdir + stockid + "_" + saledate + '.csv'
writefile = file(targetfile, 'wb')
writefile.write(codecs.BOM_UTF8)
writer = csv.writer(writefile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
writer.writerow([u'交易日期'.encode('utf8'), u'股票種類'.encode('utf8'), u'股票代號'.encode('utf8'), \
u'券商'.encode('utf8'), u'價格'.encode('utf8'), u'買進股數'.encode('utf8'), u'賣出股數'.encode('utf8')])
readfile = codecs.open(sourcecsv, 'r') # open the file
reader = csv.reader(readfile)
row = 0
salecount = 0
for col in reader:
if row >= 3 :
if len(col) == 11:
for k in range(0, 2):
if col[6 * k]:
salecount+=1
brokerid = col[6 * k + 1].split(" ")[0]
price = col[6 * k + 2]
buyvol = col[6 * k + 3].replace(',', '')
sellvol = col[6 * k + 4].replace(',', '')
writer.writerow([ '%s' %saledate.encode('utf8'), 'otc', '%s' %stockid.encode('utf8'), '%s' %brokerid.encode('utf8'), \
'%s' %price.encode('utf8'), '%s' %buyvol.encode('utf8'), '%s' %sellvol.encode('utf8')])
elif len(col) == 5:
salecount+=1
brokerid = col[1].split(" ")[0]
price = col[2]
buyvol = col[3].replace(',', '')
sellvol = col[4].replace(',', '')
writer.writerow([ '%s' %saledate.encode('utf8'), 'otc', '%s' %stockid.encode('utf8'), '%s' %brokerid.encode('utf8'), \
'%s' %price.encode('utf8'), '%s' %buyvol.encode('utf8'), '%s' %sellvol.encode('utf8')])
row+=1
readfile.close()
writefile.close()
print '%03d' %i + ' Stockid : ' + stockid + ' transform ' + todaydate + ' count : ' + '%05d' %salecount + ' ...... ' + '%03d' %i + '/' + str(len(stock))
def main():
#取得股票代碼
stock = Getstockid(4)
#取得今天日期,格式為民國年月日 1011001
todaydate = GetTodayDate(2) #民國
#從網站上抓取CSV檔的交易資料
GetCSVfilefromweb(todaydate, stock.stock)
#轉換CSV檔的交易明細
Transformcsv(stock.stock)
#儲存交易日的日期到CSV檔中
WriteSaledayetoCSV(GetTodayDate(1))
if __name__ == "__main__":
main()
[2014/8/3] 修正抓取上櫃股票代碼,變數中殘存上一筆CFICode代碼,導致下一筆判別錯誤,以致重複出現股票代碼。