https://www.cnblogs.com/microman/p/6111711.htmlhtml
#!/usr/bin/env python # -*- encoding: utf-8 -*- # Created on 2017-12-07 13:40:43 # Project: adquan from pyspider.libs.base_handler import * class Handler(BaseHandler): crawl_config = { } def __init__(self): self.deal = Deal() @every(minutes=24 * 60) def on_start(self): self.crawl('http://creative.adquan.com/show/42759', callback=self.detail_page) @config(age=10 * 24 * 60 * 60) def index_page(self, response): for each in response.doc('a[href^="http"]').items(): self.crawl(each.attr.href, callback=self.detail_page) @config(priority=2) def detail_page(self, response): name = 'test' count = 0 for img in response.doc('.con_Text img').items(): url = img.attr.src if url: dir_path = self.deal.mkDir(name) extension = self.deal.getExtension(url) file_name = str(count) + '.' + extension count += 1 self.crawl(img.attr.src, callback=self.save_img, save={'dir_path': dir_path, 'file_name':file_name}) return { "url": response.url, "title": response.doc('title').text(), } def save_img(self, response): content = response.content dir_path = response.save['dir_path'] file_name = response.save['file_name'] file_path = dir_path + '/' + file_name self.deal.saveImg(content, file_path) import os DIR_PATH = "E:/pyspider/" class Deal: def __init__(self): self.path = DIR_PATH if not self.path.endswith('/'): self.path = self.path + '/' if not os.path.exists(self.path): os.makedirs(self.path) def mkDir(self, path): path = path.strip() dir_path = self.path + path exists = os.path.exists(dir_path) if not exists: os.makedirs(dir_path) return dir_path else: return dir_path def saveImg(self, content, path): f = open(path, 'wb') f.write(content) f.close() def saveBrief(self, content, dir_path, name): file_name = dir_path + "/" + name + ".txt" f = open(file_name, "w+") f.write(content.encode('utf-8')) def getExtension(self, url): extension = url.split('.')[-1] return extension
http://demo.pyspider.org/python