python打造多線程圖片下載工具

時間 2020-07-15

原文原文鏈接

整理一下之前寫的代碼, 發現一個小工具, 下載高清桌面圖片, 使用的是多線程調用wget方式下載html

 1 import re
 2 import os
 3 import requests
 4 from threading import Thread
 5 
 6 
 7 # 請求頭
 8 def request_head(site):
 9     host = site.split("/")[2]
10     heads = {
11         "Content-Type": "text/html",
12         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
13         "Accept-Encoding": "gzip, deflate, br",
14         "Accept-Language": "en-US,en;q=0.9",
15         "Host": host,
16         "Referer": site,
17         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36",
18     }
19     return heads
20 
21 
22 # 發起請求
23 def request_spider(url):
24     try:
25         res = requests.get(url=url, headers=request_head(url))
26         if res.status_code == 200:
27             res.encoding = "utf-8"
28             # print(url)
29             return res.text
30     except Exception as e:
31         print("發起請求出錯：", e)
32 
33 
34 def dowload_spider(url):
35     file_path = os.path.dirname(os.path.abspath(__file__))
36     res = request_spider(url)
37     img_list = re.findall(r'data-progressive="(.*?)"', res)
38     if os.path.exists(f'{file_path}/images'):
39         os.chdir(f'{file_path}/images')
40     else:
41         os.mkdir(f'{file_path}/images')
42         os.chdir(f'{file_path}/images')
43     for item in img_list:
44         if not os.path.exists(item.rsplit('/')[-1]):
45             os.system(f'wget {item}')
46 
47 
48 if __name__ == '__main__':
49     works = []
50     url = 'https://bing.ioliu.cn/?p={}'
51     for page in range(1, 102):
52         thread = Thread(target=dowload_spider, args=(url.format(page),))
53         thread.start()
54         works.append(thread)
55     for work in works:
56         work.join()
57     print("程序完成!")