import requests import pandas as pd from bs4 import BeautifulSoup as bs url = "http://www.nhi.gov.tw/QueryN/Query3.aspx" res = requests.get(url) soup = bs(res.text, 'lxml') sle = soup.select('#ctl00_ContentPlaceHolder1_ddlCountyAreaCode option') df = [] for val in sle: tmp = [] if len(val['value']) > 0: tmp.append(val['value']) tmp.append(val.text) df.append(tmp) CountyAreaCode = pd.DataFrame(df) CountyAreaCode = CountyAreaCode.drop([0]) payload ={ 'ctl00$ContentPlaceHolder1$ddlCountyAreaCode': '01' } for ele in soup.select('input[type==hidden]'): payload[ele['name']] = ele['value'] res = requests.post(url, data=payload) soup = bs(res.text, "lxml") sle = soup.select('#ctl00_ContentPlaceHolder1_ddlTownAreaCode option') df = [] for val in sle: tmp = [] if len(val['value']) > 0: tmp.append(val['value']) tmp.append(val.text) df.append(tmp) TownAreaCode = pd.DataFrame(df) TownAreaCode = TownAreaCode.drop([0]) df1 = [] for Townid, Town in zip(TownAreaCode[0], TownAreaCode[1]): payload['ctl00$ContentPlaceHolder1$ddlTownAreaCode'] = Townid payload['ctl00$ContentPlaceHolder1$ddlSpecialCode'] = '5' payload['ctl00$ContentPlaceHolder1$tbxPageNum'] = 999 payload['ctl00$ContentPlaceHolder1$btnSubmit'] = '開始查詢' for ele in soup.select('input[type==hidden]'): payload[ele['name']] = ele['value'] res = requests.post(url, data=payload) soup = bs(res.text, "lxml") tb = soup.select('table')[2] df0 = pd.read_html(tb.prettify('utf8'), encoding = 'utf8', header = 0)[0] df0 = df0[pd.notnull(df0['地址'])].iloc[:, 0:3] df0.insert(0, '行政區', Town) df1.append(df0) pd.set_option("display.max_rows", None) df = pd.concat(df1) df = df.reset_index(drop=True) df.to_csv("健保特約藥局.csv") df執行結果:
18行:CountyAreaCode為個縣市列表。
38行:TownAreaCode為行政區列表。
注意:為避免部分Python開發環境無法正常執行,在24行、46行的程式碼 input[type==hidden] 可改為 input[type=hidden]
沒有留言:
張貼留言