Scrapy下載器中間件實現隨機請求頭和代理ip

1、設置隨機請求頭

class UAMiddleWare(object):
    UA_LIST = [
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)'

    ]
    def process_request(self,request,spider):
        user_agent = random.choice(self.UA_LIST)
        request.headers['User-Agent'] = user_agent

2、設置隨機代理ip(開放代理)

class IPMiddleWare(object):
    PROXIES = ['http://121.123.32.1:8080','http://122.21.32.2:8000','http://221.32.123.321:8080']
    
    def process_request(self,request,spider):
        proxy = random.choice(self.PROXIES)
        request.meta['proxy'] = proxy

3、設置獨享代理

import base64
class IPduxiang(object):
    def process_request(self,request,spider):
        proxy = '123.32.12.3:16861'#獨享代理的IP地址
        account_password = 'qishuai@juan-juan.com:12342332'
        request.meta['proxy'] = proxy
        #base64.b64encode('轉換爲字節型')
        b64_password = base64.b64encode(account_password.encode('utf-8'))
        #須要設置請求頭   'Basic' + 轉換爲字符串
        request.headers['Proxy-Authorization'] = 'Basic'+b64_password.decode('utf-8')
# 相比開放代理池,獨享代理須要將用戶名和密碼進行base64加密再傳入請求頭中
相關文章
相關標籤/搜索