源碼:html
1 import requests 2 from lxml import etree 3 from urllib import request 4 import os 5 6 # 獲取頁面圖集連接 7 def get_url_list(page,headers): 8 url = 'http://www.meizitu.com/a/more_{}.html'.format(page) 9 response = requests.get(url,headers=headers) 10 response.encoding = 'gbk' 11 # print(response.text) 12 html_ele = etree.HTML(response.text) 13 ele_list = html_ele.xpath('//ul[@class="wp-list clearfix"]/li') 14 page_list = [] 15 for ele in ele_list: 16 url = ele.xpath('./div/div/a/@href') 17 # print(url) 18 page_list.append(url[0]) 19 return page_list 20 21 # 保存圖片 22 def get_pictures(url,headers): 23 response = requests.get(url,headers=headers) 24 response.encoding = 'gbk' 25 html_ele = etree.HTML(response.text) 26 dir = html_ele.xpath('//div[@class="metaRight"]/h2/a')[0].text 27 dir_name = '妹子圖/' + dir 28 if not os.path.exists(dir_name): 29 os.makedirs(dir_name) 30 url_list = html_ele.xpath('//div[@id="picture"]/p/img/@src') 31 for url in url_list: 32 try: 33 name = url.split('/')[-1] 34 filename = dir_name + '/' + name 35 if not os.path.exists(filename): 36 request.urlretrieve(url,filename) 37 print(filename) 38 except: 39 pass 40 41 42 if __name__ == '__main__': 43 headers = { 44 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' 45 } 46 for page in range(1,73): 47 page_list = get_url_list(page,headers) 48 for url in page_list: 49 get_pictures(url,headers)