思路:html
一、圖片放在<image>XXX</image>標籤中python
二、利用fiddler抓包獲取存放圖片信息的js文件urljson
三、利用requests庫獲取html內容,而後獲取其中圖片idapi
四、利用fiddler抓取下載圖片地址,結合圖片id來下載圖片(大文件)app
# -*- coding:UTF-8 -*- import requests, json, time from contextlib import closing class get_photos(object): def __init__(self): self.photos_id = [] self.download_server = 'https://unsplash.com/photos/xxx/download?force=trues'#下載圖片地址,經過fiddler抓包獲取 self.target = 'https://unsplash.com/napi/photos?page=1&per_page=24'#存有圖片信息的js文件地址 self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'} """ 函數說明:獲取圖片ID """ def get_ids(self): req = requests.get(url=self.target, headers=self.headers, verify=False) html = json.loads(req.text) print(type(html), len(html)) for i in range(len(html)): self.photos_id.append(html[i]['id'])#將列表html中獲取id放到列表photos_id中 print(self.photos_id) time.sleep(1) """ 函數說明:圖片下載 """ def download(self, photo_id, filename): self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'} self.target = self.download_server.replace('xxx', photo_id)#將xxx換成圖片id #closing能夠將任意對象轉成上下文對象 with closing(requests.get(url=self.target, stream=True, verify = False, headers = self.headers)) as r: with open('%d.jpg' % filename, 'ab+') as f: #下載大文件,r.iter_content表示獲取響應原始內容 for chunk in r.iter_content(chunk_size = 1024):#邊下載邊存硬盤,chunk_size表示以1024比例存數據 if chunk: f.write(chunk) f.flush()#強行把緩衝區中的內容放到磁盤中 if __name__ == '__main__': gp = get_photos()#類實例化 print('獲取圖片鏈接中:') gp.get_ids()#獲取圖片id print('圖片下載中:') for i in range(len(gp.photos_id)): print('正在下載第%d張圖片' % (i+1)) gp.download(gp.photos_id[i], (i+1))