# 爬取汽車之家新聞 圖片 import requests from bs4 import BeautifulSoup response = requests.get( url="https://www.autohome.com.cn/news/" ) # response.encoding = "gbk" response.encoding = response.apparent_encoding # 設置編碼集, response.apparent_encoding父類網頁的編碼 # print(response.text) # soup = BeautifulSoup(response.text, features='lxml') soup = BeautifulSoup(response.text, features='html.parser') target = soup.find(id='auto-channel-lazyload-article') # print(target) li_list = target.find_all('li') # 列表對象 for i in li_list: a = i.find("a") # print(a) if a: print(a.attrs.get('href')) # 新聞 url txt = a.find('h3') print("對象: ", txt) print("文本:", txt.text) # 新聞標題 # 爬取全部的文本圖片 img = a.find('img') img_url = img.attrs.get('src') print(img_url) img_response = requests.get("https:"+img_url) import uuid file_name = str(uuid.uuid4()) + ".img" with open(file_name, 'wb') as f: f.write(img_response.content) # 返回二進制 數據 img_response.content