最近刚学了bs4,群里刚好有同学在学爬虫(大概),就拿来练习了下
下面放代码
import requests from bs4 import BeautifulSoup data = list() url = "https://www.shanghairanking.cn/rankings/bcur/2020" response = requests.get(url) response.encoding = "UTF-8" html = response.text soup = BeautifulSoup(html, "html.parser") _tr = soup.find("table", {"class": "rk-table"}).find("tbody").find_all("tr") for i in _tr: _td = i.find_all("td") record = dict() name = _td[1].get_text().strip() location = _td[2].get_text().strip() campus_type = _td[3].get_text().strip() total_score = _td[4].get_text().strip() level = _td[5].get_text().strip() record["name"] = name record["location"] = location record["campus_type"] = campus_type record["total_score"] = total_score record["level"] = level print(record) data.append(record) print("-----------------")