#!/usr/bin/env python # Version = 3.5.2 # __auth__ = '無名小妖' import requests from bs4 import BeautifulSoup import uuid response = requests.get( url='http://www.autohome.com.cn/news/' ) response.encoding = response.apparent_encoding # 使用原頁面的編碼進行解析 # response.status_code 狀態碼 # 將頁面字符串轉化成bs對象,features 是轉化方式,此處用的html.parser,而生產中用的是lxml,性能較好 soup = BeautifulSoup(response.text, features='html.parser') # 獲取id爲'auto-channel-lazyload-article' 的標籤 target = soup.find(id='auto-channel-lazyload-article') # 在此標籤下找到全部的li標籤 li = target.find_all('li') # 獲取每一個li標籤下的a標籤 for i in li: a = i.find('a') if a: # print(a.attrs.get('href')) txt = a.find('h3').text # 獲取圖片的地址 img_url = a.find('img').attrs.get('src') if not img_url.startswith("http:"): img_url = "http:" + img_url # 下載圖片 img_response = requests.get(url=img_url) h = img_url.split('.') jpg_name = '{}.{}'.format(uuid.uuid4(), h[-1]) with open(jpg_name, 'wb') as f: f.write(img_response.content)