话不多说,直接放程序:
#coding=utf-8
import re
from bs4 import BeautifulSoup
import requests
url = "https://3g.dxy.cn/newh5/view/pneumonia_peopleapp?from=timeline&isappinstalled=0"
Province=["北京","天津","上海","重庆","河北","山西","辽宁","吉林","黑龙江","江苏","浙江","安徽","福建","江西","山东","河南","湖北","湖南","广东","海南","四川","贵州","云南","陕西","甘肃","青海","台湾","内蒙古","广西","西藏","宁夏","新疆","香港","澳门"]
response=requests.get(url)
response.encoding="utf-8"
html=response.text
virus_data = []
area = []
confirm = []
death = []
cured = []
# soup=BeautifulSoup(html,'html.parser')
# top_info_div=soup.find_all('div',{'class':'mapTop___2VZCl'})
time =re.findall(r"<span>截至 (.*?)(北京时间).*?数据统计</span>",html,re.S)[0]
confirm_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">(.*?)</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0]
doubt_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">(.*?)</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0]
death_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">(.*?)</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0]
cured_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">(.*?)</span> 例.*?</span>',html,re.S)[0]
virus_data =re.findall(r'<p class="descList___3iOuI"><i class=".*?"></i><div class="descText___Ui3tV">(.*?)</div></p>',html,re.S)
# <span class="content___2hIPS">确诊.*?<span style="color: #.*?">.*?</span> 例.*?疑似.*?<span style="color: #.*?">(.*?)</span>.*?例.*?死亡.*?<span style="color: #.*?">.*?</span>.*?例.*?治愈.*?<span style="color: #.*?">.*?</span>.*?例.*?</span>
# <span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>
# print(html)
no_sub=[]
no_sub_pre=re.findall(r'"provinceShortName":".*?,"comment":"","cities":\[.*?\]',html,re.S)
# print(no_sub_pre)
for i in range(len(no_sub_pre)):
provinceName=re.findall(r'"provinceShortName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"","cities":\[\]',no_sub_pre[i],re.S)
if provinceName != []:
no_sub.append(provinceName)
no_sub=sum(no_sub,[])
# print(no_sub)
print("----------------------------------------------------------------")
print("新型冠状病毒感染肺炎疫情实时动态查询")
print("截至",time,"(北京时间) 数据统计")
print("全国确诊",confirm_data,"例")
print("疑似",doubt_data,"例")
print("死亡",death_data,"例")
print("康复",cured_data,"例")
for i in virus_data:
print(i)
print("----------------------------------------------------------------")
SearchCity=input("请输入你要查询的城市(或省份),若查询全部,则输入全部:")
# link += [href.find('a',{'class':'font14'}).get('href') for href in top_info_div]
# date += [span.find('span',{'class':'time'}).string for span in top_info_div]
# announcer += [div.find('a',{'class':'column'}).string for div in top_info_div]
province_data=[]
province_name=re.findall(r'"provinceName":".*?","provinceShortName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S)
province_name=[x for x in province_name if "cityName" not in x]
province_confirm=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":(.*?),"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S)
# province_suspected=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":(.*?),"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S)
province_cured=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":(.*?),"deadCount":.*?,"comment":"',html,re.S)
province_dead=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":(.*?),"comment":"',html,re.S)
province_html={}
for i in province_name:
province_html["%s"%i]=re.findall('"provinceShortName":"%s","confirmedCount":(.*?)}]}'%i,html,re.S)
# for i in province_name:
# print(province_html[i])
# province_html=[x for x in province_html if "id" not in x]
# print(province_html)
# print(province_name)
city_name=[]
city_data=[]
for i in province_name:
city_name_sub=re.findall(r'"cityName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S)
city_confirm_sub=re.findall(r'"cityName":".*?","confirmedCount":(.*?),"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S)
city_suspect_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":(.*?),"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S)
city_cured_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":(.*?),"deadCount":.*?',province_html['%s'%i][0],re.S)
city_dead_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":(.*?)',province_html['%s'%i][0],re.S)
for j in range(len(city_name_sub)):
city_dict={'Province':'%s'%i,'cityName':city_name_sub[j],'confirm':city_confirm_sub[j],'cured':city_cured_sub[j],'dead':city_dead_sub[j]}
city_name.append(city_name_sub[j])
city_data.append(city_dict)
# city_data.append(city_data_sub)
# city_name.append(city_name_sub)
# city_confirm.append(city_confirm_sub)
# city_suspect.append(city_suspect_sub)
# city_cured.append(city_cured_sub)
# city_dead.append(city_dead_sub)
# for i in range(len(city_data)):
# print(city_data[i])
for i in range(len(province_name)):
province_dict={'provinceName':province_name[i],'confirm':province_confirm[i],'cured':province_cured[i],'dead':province_dead[i]}
province_data.append(province_dict)
if SearchCity=='全部':
for i in range(len(province_data)):
print(province_data[i]['provinceName'],"confirm:",province_data[i]["confirm"],"cured:",province_data[i]["cured"],"dead:",province_data[i]["dead"])
if SearchCity not in no_sub:
for j in range(len(city_data)):
if city_data[j]['Province']==province_data[i]['provinceName']:
print("",end=' ')
print(city_data[j]['cityName'],"confirm:",city_data[j]["confirm"],"cured:",city_data[j]["cured"],"dead:",city_data[j]["dead"],end='')
if city_data[j]["dead"]=='':
print(0)
else:
print()
elif SearchCity in province_name:
for i in range(len(province_data)):
if province_data[i]['provinceName']==SearchCity:
print(province_data[i]['provinceName'],"confirm:",province_data[i]["confirm"],"cured:",province_data[i]["cured"],"dead:",province_data[i]["dead"],end='')
if province_data[i]["dead"]=='':
print(0)
else:
print()
break
if SearchCity not in no_sub:
print("其中包括:")
for i in range(len(city_data)):
if city_data[i]['Province']==SearchCity:
print(end=' ')
print(city_data[i]['cityName'],"confirm:",city_data[i]["confirm"],"cured:",city_data[i]["cured"],"dead:",city_data[i]["dead"],end='')
if city_data[i]["dead"]=='':
print(0)
else:
print()
elif SearchCity in city_name:
for i in range(len(city_data)):
if city_data[i]['cityName']==SearchCity:
print(city_data[i]['cityName'],"confirm:",city_data[i]["confirm"],"cured:",city_data[i]["cured"],"dead:",city_data[i]["dead"],end='')
if city_data[i]["dead"]=='':
print(0)
else:
print()
break
else:
print("您所查询的城市/省份目前还没有瘟情消息!")
更新日志
2020-01-28 01:07:54 对新的网页结构做了适配
2020-01-28 01:14:56 在查询省份瘟疫信息时新增省内详细信息的输出
2020-01-28 02:39:38 优化查询方法,修复了查询某些特定省会出现显示其他省下属市区的bug
暂时先不改了,之后会改成bs4的,不用re了