最近刚学了bs4,群里刚好有同学在学爬虫(大概),就拿来练习了下
下面放代码
import requests
from bs4 import BeautifulSoup
data = list()
url = "https://www.shanghairanking.cn/rankings/bcur/2020"
response = requests.get(url)
response.encoding = "UTF-8"
html = response.text
soup = BeautifulSoup(html, "html.parser")
_tr = soup.find("table", {"class": "rk-table"}).find("tbody").find_all("tr")
for i in _tr:
_td = i.find_all("td")
record = dict()
name = _td[1].get_text().strip()
location = _td[2].get_text().strip()
campus_type = _td[3].get_text().strip()
total_score = _td[4].get_text().strip()
level = _td[5].get_text().strip()
record["name"] = name
record["location"] = location
record["campus_type"] = campus_type
record["total_score"] = total_score
record["level"] = level
print(record)
data.append(record)
print("-----------------")