import requests from bs4 import BeautifulSoup from pyecharts import Bar # 全部城市的最低溫度 ALL_DATA = [] def parse_url(url): headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" " (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", 'Referer': 'http://www.weather.com.cn/textFC/hz.shtml' } response = requests.get(url, headers=headers) text = response.content.decode("utf-8") soup = BeautifulSoup(text, "html5lib") # 因爲港澳臺頁面表格不標準,故須要採起容錯能力更強的解析器HTML5lib conMidtab = soup.find("div", class_='conMidtab') tables = conMidtab.find_all("table") for table in tables: trs = table.find_all("tr")[2:] for index, tr in enumerate(trs): tds = tr.find_all("td") city_td = tds[0] if index == 0: city_td = tds[1] city = list(city_td.stripped_strings)[0] temp_td = tds[-2] temp = list(temp_td.stripped_strings)[0] ALL_DATA.append({"city": city, "min_temp": int(temp)}) print({"city": city, "min_temp": temp}) def main(): areas = ['hb.shtml', 'db.shtml', 'hd.shtml', 'hz.shtml', 'hn.shtml', 'xb.shtml', 'xn.shtml', 'gat.shtml'] base_url = 'http://www.weather.com.cn/textFC/' for area in areas: url = base_url + area parse_url(url) ALL_DATA.sort(key=lambda data: data['min_temp']) data = ALL_DATA[0:10] cities = list(map(lambda x: x['city'], data)) min_temps = list(map(lambda x: x['min_temp'], data)) chart = Bar("中國天氣最低氣溫排行") chart.add("", cities, min_temps) chart.render('temperature.html') if __name__ == '__main__': main()