scrapy <一>_____開啓多個爬蟲

1 spiders同級目錄建立commands python

crawlall.pywindows

#-*-coding:utf-8-*-

from scrapy.commands import ScrapyCommand
from scrapy.crawler import CrawlerRunner
from scrapy.utils.conf import arglist_to_dict
from scrapy.exceptions import UsageError


class Command(ScrapyCommand):
    requires_project = True

    def syntax(self):
        return '[options]'

    def short_desc(self):
        return 'Runs all of the spiders'

    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
                          help="set spider argument (may be repeated)")
        parser.add_option("-o", "--output", metavar="FILE",
                          help="dump scraped items into FILE (use - for stdout)")
        parser.add_option("-t", "--output-format", metavar="FORMAT",
                          help="format to use for dumping items with -o")

    def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)
        try:
            opts.spargs = arglist_to_dict(opts.spargs)
        except ValueError:
            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)

    def run(self, args, opts):
        # settings = get_project_settings()

        spider_loader = self.crawler_process.spider_loader
        for spidername in args or spider_loader.list():
            print("*********cralall spidername************" + spidername)
            self.crawler_process.crawl(spidername, **opts.spargs)

        self.crawler_process.start()

setup.pyapp

#-*-coding:utf-8-*-
from setuptools import setup, find_packages

setup(name='scrapy-mymodule',
  entry_points={
    'scrapy.commands': [
      'crawlall=cnblogs.commands:crawlall',
    ],
  },
 )

setting.pyscrapy

#-------同時啓動多個爬蟲
COMMANDS_MODULE = 'BaiduSpider.commands'#爬蟲名

cmd 運行 scrapy crawlall ide

windows設置計劃任務   建立run.py 加入基本任務中ui

#-*-coding:utf-8-*-
from scrapy import cmdline

cmdline.execute(['scrapy','crawlall'])
相關文章
相關標籤/搜索