整理一下之前寫的代碼, 發現一個小工具, 下載高清桌面圖片, 使用的是多線程調用wget方式下載html
1 import re 2 import os 3 import requests 4 from threading import Thread 5 6 7 # 請求頭 8 def request_head(site): 9 host = site.split("/")[2] 10 heads = { 11 "Content-Type": "text/html", 12 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", 13 "Accept-Encoding": "gzip, deflate, br", 14 "Accept-Language": "en-US,en;q=0.9", 15 "Host": host, 16 "Referer": site, 17 "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36", 18 } 19 return heads 20 21 22 # 發起請求 23 def request_spider(url): 24 try: 25 res = requests.get(url=url, headers=request_head(url)) 26 if res.status_code == 200: 27 res.encoding = "utf-8" 28 # print(url) 29 return res.text 30 except Exception as e: 31 print("發起請求出錯:", e) 32 33 34 def dowload_spider(url): 35 file_path = os.path.dirname(os.path.abspath(__file__)) 36 res = request_spider(url) 37 img_list = re.findall(r'data-progressive="(.*?)"', res) 38 if os.path.exists(f'{file_path}/images'): 39 os.chdir(f'{file_path}/images') 40 else: 41 os.mkdir(f'{file_path}/images') 42 os.chdir(f'{file_path}/images') 43 for item in img_list: 44 if not os.path.exists(item.rsplit('/')[-1]): 45 os.system(f'wget {item}') 46 47 48 if __name__ == '__main__': 49 works = [] 50 url = 'https://bing.ioliu.cn/?p={}' 51 for page in range(1, 102): 52 thread = Thread(target=dowload_spider, args=(url.format(page),)) 53 thread.start() 54 works.append(thread) 55 for work in works: 56 work.join() 57 print("程序完成!")