import requests from bs4 import BeautifulSoup response = requests.get("https://www.autohome.com.cn/news/") # 1. content /text 的區別 # print(response.content) # content 拿到的字節 response.encoding = 'gbk' # print(response.text) # text 拿到的文本信息 soup = BeautifulSoup(response.text,'html.parser') # tag = soup.find(id='auto-channel-lazyload-article') # 找惟一的值,縮小範圍 # h3 = tag.find(name='h3',class_ ='') # class是關鍵詞因此要加下劃線, 或者使用下面的方式 # h3 = tag.find(name='h3',attrs= {'class':''}) # # print(h3) # 鏈式寫法 li_list = soup.find(id='auto-channel-lazyload-article').find_all(name='li') for li in li_list: title = li.find('h3') #獲取標題 if not title:# 若是爲null,跳出 continue title = title.text summary = li.find("p").text url = li.find("a").get('href') img = li.find("img").get('src') print(img) # 保存圖片 res = requests.get(img) file_name = "%s.jpg"%(title,) with open(file_name,'wb') as f: f.write(res.content) #保存字節內容
更多精彩文章請關注 王明昌博客html