话不多说,直接放程序:
#coding=utf-8 import re from bs4 import BeautifulSoup import requests url = "https://3g.dxy.cn/newh5/view/pneumonia_peopleapp?from=timeline&isappinstalled=0" Province=["北京","天津","上海","重庆","河北","山西","辽宁","吉林","黑龙江","江苏","浙江","安徽","福建","江西","山东","河南","湖北","湖南","广东","海南","四川","贵州","云南","陕西","甘肃","青海","台湾","内蒙古","广西","西藏","宁夏","新疆","香港","澳门"] response=requests.get(url) response.encoding="utf-8" html=response.text virus_data = [] area = [] confirm = [] death = [] cured = [] # soup=BeautifulSoup(html,'html.parser') # top_info_div=soup.find_all('div',{'class':'mapTop___2VZCl'}) time =re.findall(r"<span>截至 (.*?)(北京时间).*?数据统计</span>",html,re.S)[0] confirm_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">(.*?)</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0] doubt_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">(.*?)</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0] death_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">(.*?)</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0] cured_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">(.*?)</span> 例.*?</span>',html,re.S)[0] virus_data =re.findall(r'<p class="descList___3iOuI"><i class=".*?"></i><div class="descText___Ui3tV">(.*?)</div></p>',html,re.S) # <span class="content___2hIPS">确诊.*?<span style="color: #.*?">.*?</span> 例.*?疑似.*?<span style="color: #.*?">(.*?)</span>.*?例.*?死亡.*?<span style="color: #.*?">.*?</span>.*?例.*?治愈.*?<span style="color: #.*?">.*?</span>.*?例.*?</span> # <span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span> # print(html) no_sub=[] no_sub_pre=re.findall(r'"provinceShortName":".*?,"comment":"","cities":\[.*?\]',html,re.S) # print(no_sub_pre) for i in range(len(no_sub_pre)): provinceName=re.findall(r'"provinceShortName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"","cities":\[\]',no_sub_pre[i],re.S) if provinceName != []: no_sub.append(provinceName) no_sub=sum(no_sub,[]) # print(no_sub) print("----------------------------------------------------------------") print("新型冠状病毒感染肺炎疫情实时动态查询") print("截至",time,"(北京时间) 数据统计") print("全国确诊",confirm_data,"例") print("疑似",doubt_data,"例") print("死亡",death_data,"例") print("康复",cured_data,"例") for i in virus_data: print(i) print("----------------------------------------------------------------") SearchCity=input("请输入你要查询的城市(或省份),若查询全部,则输入全部:") # link += [href.find('a',{'class':'font14'}).get('href') for href in top_info_div] # date += [span.find('span',{'class':'time'}).string for span in top_info_div] # announcer += [div.find('a',{'class':'column'}).string for div in top_info_div] province_data=[] province_name=re.findall(r'"provinceName":".*?","provinceShortName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S) province_name=[x for x in province_name if "cityName" not in x] province_confirm=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":(.*?),"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S) # province_suspected=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":(.*?),"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S) province_cured=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":(.*?),"deadCount":.*?,"comment":"',html,re.S) province_dead=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":(.*?),"comment":"',html,re.S) province_html={} for i in province_name: province_html["%s"%i]=re.findall('"provinceShortName":"%s","confirmedCount":(.*?)}]}'%i,html,re.S) # for i in province_name: # print(province_html[i]) # province_html=[x for x in province_html if "id" not in x] # print(province_html) # print(province_name) city_name=[] city_data=[] for i in province_name: city_name_sub=re.findall(r'"cityName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S) city_confirm_sub=re.findall(r'"cityName":".*?","confirmedCount":(.*?),"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S) city_suspect_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":(.*?),"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S) city_cured_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":(.*?),"deadCount":.*?',province_html['%s'%i][0],re.S) city_dead_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":(.*?)',province_html['%s'%i][0],re.S) for j in range(len(city_name_sub)): city_dict={'Province':'%s'%i,'cityName':city_name_sub[j],'confirm':city_confirm_sub[j],'cured':city_cured_sub[j],'dead':city_dead_sub[j]} city_name.append(city_name_sub[j]) city_data.append(city_dict) # city_data.append(city_data_sub) # city_name.append(city_name_sub) # city_confirm.append(city_confirm_sub) # city_suspect.append(city_suspect_sub) # city_cured.append(city_cured_sub) # city_dead.append(city_dead_sub) # for i in range(len(city_data)): # print(city_data[i]) for i in range(len(province_name)): province_dict={'provinceName':province_name[i],'confirm':province_confirm[i],'cured':province_cured[i],'dead':province_dead[i]} province_data.append(province_dict) if SearchCity=='全部': for i in range(len(province_data)): print(province_data[i]['provinceName'],"confirm:",province_data[i]["confirm"],"cured:",province_data[i]["cured"],"dead:",province_data[i]["dead"]) if SearchCity not in no_sub: for j in range(len(city_data)): if city_data[j]['Province']==province_data[i]['provinceName']: print("",end=' ') print(city_data[j]['cityName'],"confirm:",city_data[j]["confirm"],"cured:",city_data[j]["cured"],"dead:",city_data[j]["dead"],end='') if city_data[j]["dead"]=='': print(0) else: print() elif SearchCity in province_name: for i in range(len(province_data)): if province_data[i]['provinceName']==SearchCity: print(province_data[i]['provinceName'],"confirm:",province_data[i]["confirm"],"cured:",province_data[i]["cured"],"dead:",province_data[i]["dead"],end='') if province_data[i]["dead"]=='': print(0) else: print() break if SearchCity not in no_sub: print("其中包括:") for i in range(len(city_data)): if city_data[i]['Province']==SearchCity: print(end=' ') print(city_data[i]['cityName'],"confirm:",city_data[i]["confirm"],"cured:",city_data[i]["cured"],"dead:",city_data[i]["dead"],end='') if city_data[i]["dead"]=='': print(0) else: print() elif SearchCity in city_name: for i in range(len(city_data)): if city_data[i]['cityName']==SearchCity: print(city_data[i]['cityName'],"confirm:",city_data[i]["confirm"],"cured:",city_data[i]["cured"],"dead:",city_data[i]["dead"],end='') if city_data[i]["dead"]=='': print(0) else: print() break else: print("您所查询的城市/省份目前还没有瘟情消息!")
更新日志
2020-01-28 01:07:54 对新的网页结构做了适配
2020-01-28 01:14:56 在查询省份瘟疫信息时新增省内详细信息的输出
2020-01-28 02:39:38 优化查询方法,修复了查询某些特定省会出现显示其他省下属市区的bug
暂时先不改了,之后会改成bs4的,不用re了