1 爬蟲通用框架 2 import requests 3 4 def get_html_text(url): 5 try: 6 r =requests.get(url,timeout=20) 7 8 r.raise_for_status() 9 10 r.encoding = r.apparent_encoding 11 12 return r.text 13 14 except: 15 return "產生異常" 16 17 if __name__ == '__main__': 18 url ="http://www.baidu.com" 19 print(get_html_text(url)) 20 21 22 實例 23 import requests 24 from bs4 import BeautifulSoup 25 26 def getHTMLText(url): 27 try: 28 r = requests.get(url,timeout=20) #設置超時 29 r.raise_for_status() # 判斷請求是否成功 30 r.encoding = r.apparent_encoding # 設置編碼 31 return r.text # 返回獲取內容 32 except: #異常處理 33 return "產生異常" 34 35 if __name__ == '__main__': 36 url = "https://book.douban.com/subject/1084336/comments/" # 須要請求的網址 37 # print(getHTMLText(url)) #調用函數 38 requests = getHTMLText(url) # 獲取文本內容 39 soup = BeautifulSoup(requests,"html.parser") # 文本解析 40 reasult = soup.find_all("div",class_="comment") # 文本處理 41 for i in reasult : 42 print(i.p.text) # 循壞打印文本