註釋:html
一、本例子僅爲測試代碼有效性,故只選取了四個城市進行數據獲取:web
二、本例尚有可優化之處,例如代碼的簡潔性、循環輸入城市名字等;正則表達式
1 #抓取天氣網站最近7天的天氣狀況,寫入文件並在控制檯顯示 2 from bs4 import BeautifulSoup #用來代替正則表達式取源碼中相應標籤的內容 3 import random 4 import requests #用來抓取網頁的html源代碼 5 import socket #用作異常處理 6 import time 7 import http.client #用作異常處理 8 import csv 9 10 def get_html(url,data=None): 11 """ 12 模擬瀏覽器來獲取網頁的html代碼 13 """ 14 header={ 15 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 16 'Accept-Encoding': 'gzip, deflate, sdch', 17 'Accept-Language': 'zh-CN,zh;q=0.8', 18 'Connection': 'keep-alive', 19 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235' 20 } 21 #設定超時時間,取隨機數是由於防止被網站認爲是爬蟲 22 timeout=random.choice(range(80,180)) 23 while True: 24 try: 25 rep=requests.get(url,headers=header,timeout=timeout) 26 rep.encoding="utf-8" 27 break 28 except socket.timeout as e: 29 print("3:",e) 30 time.sleep(random.choice(range(8,15))) 31 32 except socket.error as e: 33 print("4:",e) 34 time.sleep(random.choice(range(20,60))) 35 except http.client.BadStatusLine as e: 36 print("5:",e) 37 time.sleep(random.choice(range(30,80))) 38 39 except http.client.IncompleteRead as e: 40 print("6:",e) 41 time.sleep(random.choice(range(5,15))) 42 43 return rep.text 44 45 def get_data(html_txt): 46 final=[] 47 bs=BeautifulSoup(html_txt,"html.parser") #建立BeautifulSoup對象 48 body=bs.body #獲取body部分 49 data=body.find("div",{"id":"7d"}) #找到id爲7d的div 50 ul=data.find("ul") #獲取ul部分 51 li=ul.find_all("li") #獲取全部的li 52 53 for day in li: #對每一個標籤中的內容進行遍歷 54 temp=[] 55 date=day.find("h1").string #獲取日期 56 temp.append(date) #將日期添加到temp 中 57 inf=day.find_all("p") #找到li中的全部p標籤 58 temp.append(inf[0].string) #將第一個p標籤中的內容添加到temp列表中紅 59 if inf[1].find("span") is None: 60 temperature_high=None #傍晚沒有最高氣溫 61 else: 62 temperature_high=inf[1].find("span").string #最高氣溫 63 temperature_high=temperature_high.replace("℃","") 64 temperature_lower=inf[1].find("i").string #找到最低溫 65 temperature_lower=temperature_lower.replace("℃","") 66 temp.append(temperature_high) 67 temp.append(temperature_lower) 68 final.append(temp) #將temp添加到final中 69 70 return final 71 72 def write_data(data, name): 73 file_name = name 74 with open(file_name, 'a', errors='ignore', newline='') as f: 75 f_csv = csv.writer(f) 76 f_csv.writerows(data) 77 78 def get_url(): 79 city={ 80 "海口":"101310101", 81 "三亞":"101310201", 82 "蘇州":"101190401", 83 "鄭州":"101180101" 84 } 85 for k in city: 86 print(k) 87 city_name=input("請輸入你要查詢的城市名字:") 88 city_num=city[city_name] 89 weather_url="http://www.weather.com.cn/weather/%s.shtml"%city_num 90 return weather_url 91 92 if __name__=="__main__": 93 # url="http://www.weather.com.cn/weather/101190401.shtml" 94 url=get_url() 95 html=get_html(url) 96 result=get_data(html) 97 write_data(result,"weather.csv") 98 for i in result: 99 print(i) #打印天氣狀況