nCOV-2019疫情实时查询爬虫

话不多说,直接放程序:

#coding=utf-8
import re
from bs4 import BeautifulSoup
import requests

url = "https://3g.dxy.cn/newh5/view/pneumonia_peopleapp?from=timeline&isappinstalled=0"

Province=["北京","天津","上海","重庆","河北","山西","辽宁","吉林","黑龙江","江苏","浙江","安徽","福建","江西","山东","河南","湖北","湖南","广东","海南","四川","贵州","云南","陕西","甘肃","青海","台湾","内蒙古","广西","西藏","宁夏","新疆","香港","澳门"]


response=requests.get(url)
response.encoding="utf-8"
html=response.text

virus_data = []
area = []
confirm = []
death = []
cured = []

# soup=BeautifulSoup(html,'html.parser')
# top_info_div=soup.find_all('div',{'class':'mapTop___2VZCl'})
time =re.findall(r"<span>截至 (.*?)(北京时间).*?数据统计</span>",html,re.S)[0]
confirm_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">(.*?)</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0]
doubt_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">(.*?)</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0]
death_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">(.*?)</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>',html,re.S)[0]
cured_data=re.findall(r'<span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">(.*?)</span> 例.*?</span>',html,re.S)[0]
virus_data =re.findall(r'<p class="descList___3iOuI"><i class=".*?"></i><div class="descText___Ui3tV">(.*?)</div></p>',html,re.S)

# <span class="content___2hIPS">确诊.*?<span style="color: #.*?">.*?</span> 例.*?疑似.*?<span style="color: #.*?">(.*?)</span>.*?例.*?死亡.*?<span style="color: #.*?">.*?</span>.*?例.*?治愈.*?<span style="color: #.*?">.*?</span>.*?例.*?</span>
# <span class="content___2hIPS"><span>确诊 <span style=".*?">.*?</span> 例.*?</span><span>疑似.*?<span style=".*?">.*?</span> 例.*?</span><div></div><span>死亡.*?<span style=".*?">.*?</span> 例.*?</span><span>治愈.*?<span style=".*?">.*?</span> 例.*?</span>

# print(html)

no_sub=[]
no_sub_pre=re.findall(r'"provinceShortName":".*?,"comment":"","cities":\[.*?\]',html,re.S)
# print(no_sub_pre)
for i in range(len(no_sub_pre)):
    provinceName=re.findall(r'"provinceShortName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"","cities":\[\]',no_sub_pre[i],re.S)
    if provinceName != []:
        no_sub.append(provinceName)
no_sub=sum(no_sub,[])
# print(no_sub)


print("----------------------------------------------------------------")
print("新型冠状病毒感染肺炎疫情实时动态查询")
print("截至",time,"(北京时间) 数据统计")
print("全国确诊",confirm_data,"例")
print("疑似",doubt_data,"例")
print("死亡",death_data,"例")
print("康复",cured_data,"例")
for i in virus_data:
    print(i)
print("----------------------------------------------------------------")
SearchCity=input("请输入你要查询的城市(或省份),若查询全部,则输入全部:")

# link += [href.find('a',{'class':'font14'}).get('href') for href in top_info_div]
# date += [span.find('span',{'class':'time'}).string for span in top_info_div]
# announcer += [div.find('a',{'class':'column'}).string for div in top_info_div]

province_data=[]

province_name=re.findall(r'"provinceName":".*?","provinceShortName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S)
province_name=[x for x in province_name if "cityName" not in x]
province_confirm=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":(.*?),"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S)
# province_suspected=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":(.*?),"curedCount":.*?,"deadCount":.*?,"comment":"',html,re.S)
province_cured=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":(.*?),"deadCount":.*?,"comment":"',html,re.S)
province_dead=re.findall(r'"provinceName":".*?","provinceShortName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":(.*?),"comment":"',html,re.S)

province_html={}
for i in province_name:
    province_html["%s"%i]=re.findall('"provinceShortName":"%s","confirmedCount":(.*?)}]}'%i,html,re.S)

# for i in province_name:
#     print(province_html[i])

# province_html=[x for x in province_html if "id" not in x]
# print(province_html)
# print(province_name)

city_name=[]
city_data=[]

for i in province_name:
    city_name_sub=re.findall(r'"cityName":"(.*?)","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S)
    city_confirm_sub=re.findall(r'"cityName":".*?","confirmedCount":(.*?),"suspectedCount":.*?,"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S)
    city_suspect_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":(.*?),"curedCount":.*?,"deadCount":.*?',province_html['%s'%i][0],re.S)
    city_cured_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":(.*?),"deadCount":.*?',province_html['%s'%i][0],re.S)
    city_dead_sub=re.findall(r'"cityName":".*?","confirmedCount":.*?,"suspectedCount":.*?,"curedCount":.*?,"deadCount":(.*?)',province_html['%s'%i][0],re.S)
    for j in range(len(city_name_sub)):
        city_dict={'Province':'%s'%i,'cityName':city_name_sub[j],'confirm':city_confirm_sub[j],'cured':city_cured_sub[j],'dead':city_dead_sub[j]}
        city_name.append(city_name_sub[j])
        city_data.append(city_dict)

    # city_data.append(city_data_sub)

    # city_name.append(city_name_sub)
    # city_confirm.append(city_confirm_sub)
    # city_suspect.append(city_suspect_sub)
    # city_cured.append(city_cured_sub)
    # city_dead.append(city_dead_sub)

# for i in range(len(city_data)):
#     print(city_data[i])

for i in range(len(province_name)):
    province_dict={'provinceName':province_name[i],'confirm':province_confirm[i],'cured':province_cured[i],'dead':province_dead[i]}
    province_data.append(province_dict)
    
if SearchCity=='全部':
    for i in range(len(province_data)):
        print(province_data[i]['provinceName'],"confirm:",province_data[i]["confirm"],"cured:",province_data[i]["cured"],"dead:",province_data[i]["dead"])
        if SearchCity not in no_sub:
            for j in range(len(city_data)):
                if city_data[j]['Province']==province_data[i]['provinceName']:
                    print("",end='   ')
                    print(city_data[j]['cityName'],"confirm:",city_data[j]["confirm"],"cured:",city_data[j]["cured"],"dead:",city_data[j]["dead"],end='')
                    if city_data[j]["dead"]=='':
                        print(0)
                    else:
                        print()
elif SearchCity in province_name:
    for i in range(len(province_data)):
        if province_data[i]['provinceName']==SearchCity:
            print(province_data[i]['provinceName'],"confirm:",province_data[i]["confirm"],"cured:",province_data[i]["cured"],"dead:",province_data[i]["dead"],end='')
            if province_data[i]["dead"]=='':
                    print(0)
            else:
                print()
            break
    if SearchCity not in no_sub:
        print("其中包括:")
        for i in range(len(city_data)):
            if city_data[i]['Province']==SearchCity:
                print(end='  ')
                print(city_data[i]['cityName'],"confirm:",city_data[i]["confirm"],"cured:",city_data[i]["cured"],"dead:",city_data[i]["dead"],end='')
                if city_data[i]["dead"]=='':
                        print(0)
                else:
                    print()
elif SearchCity in city_name:
    for i in range(len(city_data)):
        if city_data[i]['cityName']==SearchCity:
            print(city_data[i]['cityName'],"confirm:",city_data[i]["confirm"],"cured:",city_data[i]["cured"],"dead:",city_data[i]["dead"],end='')
            if city_data[i]["dead"]=='':
                    print(0)
            else:
                print()
            break
else:
    print("您所查询的城市/省份目前还没有瘟情消息!")

 



更新日志

2020-01-28 01:07:54 对新的网页结构做了适配

2020-01-28 01:14:56 在查询省份瘟疫信息时新增省内详细信息的输出

2020-01-28 02:39:38 优化查询方法,修复了查询某些特定省会出现显示其他省下属市区的bug

点赞
  1. SAGIRI SAGIRI说道:

    暂时先不改了,之后会改成bs4的,不用re了

发表评论