SPIDER_MODULES = ['Amazon.spiders']
NEWSPIDER_MODULE = 'Amazon.spiders'html
USER_AGENT = 'Amazon (+http://www.yourdomain.com)'python
ROBOTSTXT_OBEY = Falsecookie
COOKIES_ENABLED = True併發
TELNETCONSOLE_ENABLED = False
TELNETCONSOLE_HOST = '127.0.0.1'
TELNETCONSOLE_PORT = [6023,]app
DEFAULT_REQUEST_HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en', }
#爬蟲程序類中設置 custom_settings = { 'DEFAULT_REQUEST_HEADERS' : { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en', "User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36' } }
CONCURRENT_REQUESTS = 32默認值16
dom
CONCURRENT_REQUESTS_PER_DOMAIN = 16默認值8
ide
默認值0,表明無限制,須要注意兩點
調試
這個值就表明一個規定死的值,表明對同一網址延遲請求的秒數
code
DOWNLOAD_DELAY = 3xml
開啓True,默認False
AUTOTHROTTLE_ENABLED = True
起始的延遲
AUTOTHROTTLE_START_DELAY = 5
最小延遲
DOWNLOAD_DELAY = 3
最大延遲
AUTOTHROTTLE_MAX_DELAY = 10
每秒併發請求數的平均值
AUTOTHROTTLE_TARGET_CONCURRENCY = 16.0不能高於 CONCURRENT_REQUESTS_PER_DOMAIN或CONCURRENT_REQUESTS_PER_IP
AUTOTHROTTLE_DEBUG = True CONCURRENT_REQUESTS_PER_DOMAIN = 16 CONCURRENT_REQUESTS_PER_IP = 16