#菠蘿tang
#coding:utf-8
import urllib2 import urllib import os import time import json import jsonpath def handle_request(url, sort, page ): qurey_string = '&type=feed&include_fields=top_comments%2Cis_root%2Csource_link%2Citem%2Cbuyable%2Croot_id%2Cstatus%2Clike_count%2Clike_id%2Csender%2Calbum%2Creply_count%2Cfavorite_blog_id&_type=&' url_use = url + sort + qurey_string + 'start=' + str(24*page) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', } request = urllib2.Request(url=url_use, headers=headers) return request def download_image(content): unicodestr = json.loads(content) url_list = jsonpath.jsonpath(unicodestr, "$..path") for li in url_list: dirname = 'DuiTang'
if not os.path.exists(dirname): os.mkdir(dirname) filename = li.split('/')[-1] # print(filename)
filepath = dirname + '/' + filename # print(filepath)
urllib.urlretrieve(li, filepath) time.sleep(1) def main(): url = 'https://www.duitang.com/napi/blog/list/by_search/?kw=' start_page = int(input("請輸入起始抓取位置(24個圖爲一個部分):")) end_page = int(input("請輸入終止抓取位置:")) sort = raw_input("請輸入查詢的種類:") for page in range(start_page-1, end_page): print('第%s部分開始下載......'%(page+1)) request = handle_request(url, sort, page) content = urllib2.urlopen(request).read() print(content) # #解析內容,提取全部圖片連接,下載圖片
download_image(content) print('第%s部分下載完成' %(page+1)) time.sleep(2) if __name__ == '__main__': main()
#使用python2.7python
#堆糖的圖片顯示是按照json來的,分頁只是障眼法,主要參數爲:kw、和start位置!json
#獲取json數據須要努力學習!!!api
#unicodestr = json.loads(content)python2.7
#url_list = jsonpath.jsonpath(unicodestr, "$..path")學習