Excel VBA雖然可以用不同方法去取得網頁資訊,不過在特定網頁上不是每種方法都可行,需要花時間去試驗,這點與Python只要寫幾行程式碼就能達成相同功能是不同,驗證一句常聽過的話,「方法多不見得有用,有用的方法一種就好」,至於那一種方法好用,在 Excel 中筆者無法給一個標準答案。
請注意:我再次重申,部落格文章的程式碼,是要提供為參考與學習,一旦網頁改版請自行修改,別要求東要求西要我主動修改,你們用我寫東西程式賺錢了、交差了,請問有分我一杯羹嗎?既然賺錢沒分我,請問有什麼理由要求我修改,如果沒能力改,就花錢來找我上課。
以下就來看看相關的Excel VBA與Python程式碼。
公開資訊觀測站的重大訊息公告。
Postman畫面。
Excel VBA 抓取網頁資料的程式碼。
Option Explicit
Sub 公開資訊觀測站公告()
Dim tmp As String
Dim html As Object, html2 As Object
Dim htable As Object, htable2 As Object
Dim i As Integer, j As Integer, year As Integer, month As Integer
Dim Row As Integer
Dim url As String
Dim url2 As String
Dim stockid As String
stockid = Sheets(1).Range("A2")
year = Sheets(1).Range("B2")
url = "http://mops.twse.com.tw/mops/web/ajax_t05st01?firstin=1&TYPEK=sii&co_id=" & stockid & "&year=" & year & "&month=&b_date=&e_date="
Set html = CreateObject("htmlFile")
Set html2 = CreateObject("htmlFile")
html.body.innerHTML = encodeData(downloadData(url))
Set htable = html.getElementsByTagName("table")(1)
With ActiveSheet
Row = .Range("A65536").End(xlUp).Row
If Row < 5 Then
Row = 5
End If
.Range("A5:F" & Row).Clear
For i = 0 To htable.Rows.Length - 1
For j = 0 To htable.Rows(i).Cells.Length - 1
If (htable.Rows(i).Cells.Length - 2) >= j Then
.Cells(i + 1 + 3, j + 1) = Trim(htable.Rows(i).Cells(j).innerText)
End If
Next
If (htable.Rows.Length - 2) >= i Then
html2.body.innerHTML = encodeData(downloadData(recombineURL(html.getElementsByTagName("input")(14).onclick)))
Set htable2 = html2.getElementsByTagName("pre")(1)
.Cells(i + 2 + 3, j) = Trim(htable2.innerHTML)
End If
Next
End With
Set html = Nothing
Set htable = Nothing
Set html2 = Nothing
Set htable2 = Nothing
End Sub
Function downloadData(url As String)
Dim oXMLHTTP As Object
Set oXMLHTTP = CreateObject("Msxml2.XMLHTTP")
With oXMLHTTP
.Open "GET", url, False
.send
If .Status = 200 Then
downloadData = oXMLHTTP.ResponseBody
Else
MsgBox "無法取得資料"
End If
End With
Set oXMLHTTP = Nothing
End Function
Function encodeData(str As String)
Dim objStream As Object
Set objStream = CreateObject("ADODB.Stream")
With objStream
.Open
.WriteText str
.Position = 0
.Type = 2
.Charset = "UTF-8"
encodeData = .ReadText
.Close
End With
Set objStream = Nothing
End Function
Function recombineURL(str As String)
Dim tmp As String
tmp = Replace(Split(Split(str, "{")(1), "}")(0), Chr(10), "")
tmp = Replace(tmp, "'", "")
tmp = Replace(tmp, "document.t05st01_fm.seq_no.value", "seq_no")
tmp = Replace(tmp, ";document.t05st01_fm.spoke_time.value", "&spoke_time")
tmp = Replace(tmp, ";document.t05st01_fm.spoke_date.value", "&spoke_date")
tmp = Replace(tmp, ";document.t05st01_fm.co_id.value", "&co_id")
tmp = Replace(tmp, ";document.t05st01_fm.TYPEK.value", "&TYPEK")
tmp = "http://mops.twse.com.tw/mops/web/ajax_t05st01?firstin=1&" & Split(tmp, ";")(0) & "&step=2"
recombineURL = Trim(tmp)
End Function
Sub ClearData()
Dim Row As Integer
With Sheets(1)
Row = .Range("A65536").End(xlUp).Row
If Row < 5 Then Row = 5
.Range("A5:F" & Row).Clear
End With
End Sub
或第二種寫法
Option Explicit
Sub 公開資訊觀測站公告()
Dim tmp As String
Dim html As Object, html2 As Object
Dim htable As Object, htable2 As Object
Dim i As Integer, j As Integer, year As Integer, month As Integer
Dim Row As Integer
Dim url As String
Dim url2 As String
Dim stockid As String
stockid = Sheets(1).Range("A2")
year = Sheets(1).Range("B2")
url = "http://mops.twse.com.tw/mops/web/ajax_t05st01?firstin=1&TYPEK=sii&co_id=" & stockid & "&year=" & year & "&month=&b_date=&e_date="
Set html = CreateObject("htmlFile")
Set html2 = CreateObject("htmlFile")
html.body.innerHTML = downloadData(url)
Set htable = html.getElementsByTagName("table")(1)
With ActiveSheet
Row = .Range("A65536").End(xlUp).Row
If Row < 5 Then
Row = 5
End If
.Range("A5:F" & Row).Clear
For i = 0 To htable.Rows.Length - 1
For j = 0 To htable.Rows(i).Cells.Length - 1
If (htable.Rows(i).Cells.Length - 2) >= j Then
.Cells(i + 1 + 3, j + 1) = Trim(htable.Rows(i).Cells(j).innerText)
End If
Next
If (htable.Rows.Length - 2) >= i Then
html2.body.innerHTML = downloadData(recombineURL(html.getElementsByTagName("input")(14).onclick))
Set htable2 = html2.getElementsByTagName("pre")(1)
.Cells(i + 2 + 3, j) = Trim(htable2.innerHTML)
End If
Next
End With
Set html = Nothing
Set htable = Nothing
Set html2 = Nothing
Set htable2 = Nothing
End Sub
Function downloadData(url As String)
Dim oXMLHTTP As Object
Set oXMLHTTP = CreateObject("Msxml2.XMLHTTP")
With oXMLHTTP
.Open "GET", url, False
.send
If .Status = 200 Then
downloadData = .ResponseText
Else
MsgBox "無法取得資料"
End If
End With
Set oXMLHTTP = Nothing
End Function
Function recombineURL(str As String)
Dim tmp As String
tmp = Replace(Split(Split(str, "{")(1), "}")(0), Chr(10), "")
tmp = Replace(tmp, "'", "")
tmp = Replace(tmp, "document.t05st01_fm.seq_no.value", "seq_no")
tmp = Replace(tmp, ";document.t05st01_fm.spoke_time.value", "&spoke_time")
tmp = Replace(tmp, ";document.t05st01_fm.spoke_date.value", "&spoke_date")
tmp = Replace(tmp, ";document.t05st01_fm.co_id.value", "&co_id")
tmp = Replace(tmp, ";document.t05st01_fm.TYPEK.value", "&TYPEK")
tmp = "http://mops.twse.com.tw/mops/web/ajax_t05st01?firstin=1&" & Split(tmp, ";")(0) & "&step=2"
recombineURL = Trim(tmp)
End Function
Sub ClearData()
Dim Row As Integer
With Sheets(1)
Row = .Range("A65536").End(xlUp).Row
If Row < 5 Then Row = 5
.Range("A5:F" & Row).Clear
End With
End Sub
Excel 檔案畫面。Python 抓取網頁資料的程式碼。
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
stock = 2330
year = 106
url= "http://mops.twse.com.tw/mops/web/ajax_t05st01?firstin=1&TYPEK=sii&co_id="+ str(stock) + "&year=" + str(year) + "&month=&b_date=&e_date="
res = requests.get(url)
res.encoding='utf-8'
soup = bs(res.text,"lxml")
tb = soup.select('table')[1]
df = pd.read_html(tb.prettify('utf-8'), encoding= 'utf-8')
df[0]
兩種語言程式碼的比較。
有興趣Python或Excel VBA,可以參考比較,或隨時注意部落格「最新消息」公布的「Exccl VBA爬蟲入門班」開課時間。