import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
url = "http://www.nhi.gov.tw/QueryN/Query3.aspx"
res = requests.get(url)
soup = bs(res.text, 'lxml')
sle = soup.select('#ctl00_ContentPlaceHolder1_ddlCountyAreaCode option')
df = []
for val in sle:
tmp = []
if len(val['value']) > 0:
tmp.append(val['value'])
tmp.append(val.text)
df.append(tmp)
CountyAreaCode = pd.DataFrame(df)
CountyAreaCode = CountyAreaCode.drop([0])
payload ={
'ctl00$ContentPlaceHolder1$ddlCountyAreaCode': '01'
}
for ele in soup.select('input[type==hidden]'):
payload[ele['name']] = ele['value']
res = requests.post(url, data=payload)
soup = bs(res.text, "lxml")
sle = soup.select('#ctl00_ContentPlaceHolder1_ddlTownAreaCode option')
df = []
for val in sle:
tmp = []
if len(val['value']) > 0:
tmp.append(val['value'])
tmp.append(val.text)
df.append(tmp)
TownAreaCode = pd.DataFrame(df)
TownAreaCode = TownAreaCode.drop([0])
df1 = []
for Townid, Town in zip(TownAreaCode[0], TownAreaCode[1]):
payload['ctl00$ContentPlaceHolder1$ddlTownAreaCode'] = Townid
payload['ctl00$ContentPlaceHolder1$ddlSpecialCode'] = '5'
payload['ctl00$ContentPlaceHolder1$tbxPageNum'] = 999
payload['ctl00$ContentPlaceHolder1$btnSubmit'] = '開始查詢'
for ele in soup.select('input[type==hidden]'):
payload[ele['name']] = ele['value']
res = requests.post(url, data=payload)
soup = bs(res.text, "lxml")
tb = soup.select('table')[2]
df0 = pd.read_html(tb.prettify('utf8'), encoding = 'utf8', header = 0)[0]
df0 = df0[pd.notnull(df0['地址'])].iloc[:, 0:3]
df0.insert(0, '行政區', Town)
df1.append(df0)
pd.set_option("display.max_rows", None)
df = pd.concat(df1)
df = df.reset_index(drop=True)
df.to_csv("健保特約藥局.csv")
df
執行結果:18行:CountyAreaCode為個縣市列表。
38行:TownAreaCode為行政區列表。
注意:為避免部分Python開發環境無法正常執行,在24行、46行的程式碼 input[type==hidden] 可改為 input[type=hidden]



沒有留言:
張貼留言