<爬蟲> requests模塊

1、get請求html

import requests

url = 'http://www.baidu.com/'

headers1 = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) '
                         'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}

'''發送get請求'''
r = requests.get(url,headers=headers1)
print(r)      #響應對象
r.encoding = 'utf8'
print(r.text)   #字符串形式查看響應

'''帶參數的get:傳原始參數便可,自動轉碼,自動拼接url'''
url = 'https://www.baidu.com/s?'
data = {'ie':'utf8',
        'wd':'美國'}

r = requests.get(url,headers=headers1,params=data)
with open('baidu.html','wb') as fp:
    fp.write(r.content)

'''r.text           字符串形式查看響應
   r.content        字節類型查看響應
   r.encoding       查看或者設置編碼
   r.status_code    查看狀態碼
   r.headers        查看響應頭
   r.url            查看請求的url'''

2、post請求web

import requests

'''必應翻譯'''

url = 'https://cn.bing.com/tlookupv3?isVertical=1&&IG=15AA57077E2A43C1A35CBA989989D08D&IID=translator.5038.46/'

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) '
                         'AppleWebKit/537.36 (KHTML, like Gecko) '
                         'Chrome/73.0.3683.86 Safari/537.36'}

formdata = {'from': 'en',
            'to': 'zh-Hans',
            'text': 'lion'}

'''發送post請求'''
r =  requests.post(url=url,headers=headers,data=formdata)
print(r)        #響應對象
print(r.json())   #查看json格式數據


# with open('baidu.html','wb') as fp:
#     fp.write(r.content)

3、使用代理ajax

import requests

#查詢ip地址的網頁
url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=2&tn=baiduhome_pg&wd=IP'

proxy = {'http':'http://119.23.248.167:8080'}

headers = {'user-agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                        ' Chrome/75.0.3770.100 Safari/537.36'}

r =  requests.get(url,headers=headers,proxies=proxy)

with open('daili.html','wb') as fp:
    fp.write(r.content)

4、cookie登陸json

import requests

'''登陸人人網:抓包獲取url接口和formdata,用post請求發送'''

'''若是碰到會話相關的問題,要先建立一個會話'''
s = requests.Session()
'''往下全部請求都經過s.get()進行'''

url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2019641626542'

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) '
                         'AppleWebKit/537.36 (KHTML, like Gecko) '
                         'Chrome/73.0.3683.86 Safari/537.36'}

formdata = {'email':'18404904721',
            'icode':'',
            'origURL':'http://www.renren.com/home',
            'domain':'renren.com',
            'key_id':1,
            'captcha_type':'web_login',
            'password':    '62a9c3375228ff329d57dc88ed0a3bc3fda0e3970e4f0ddb00562f7c8cc76316',
            'rkey':    '00b732e9c4b8d408b74655e15dd43a81',
            'f':'https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D43V5wyB58bk-wUHTFWFH94lsWfrtUQQh0HJ1zcm7kFQBvWjnNQDwwtVUR1o9aeRV%26wd%3D%26eqid%3Dcfb26d2d000cf393000000035d1db824',
            'cookie':'anonymid=jxczgs3yw3oby9; ln_uact=18404904721; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; jebe_key=2819f31f-79cc-428e-b61e-8b968e2beda4%7C920b82268747e02c45f3056eeda651c7%7C1561538325515%7C1%7C1561538325729; _r01_=1; depovince=ZGQT; JSESSIONID=abcGpDO8A0q87bIF9f8Uw; ick_login=7f3c3b5e-d7e8-4100-9231-80eef18b096d; first_login_flag=1; jebecookies=c36453f0-2b07-4452-bbc5-232414d8630a|||||'}

'''發送登陸請求'''
r = s.post(url=url,headers=headers,data=formdata)
print(r.text)

'''再次發送請求:登陸成功後,s自帶cookie'''
get_url = 'http://www.renren.com/971302264/profile'
r2 = s.get(url=get_url,headers=headers)
with open('ren.html','wb') as fp:
    fp.write(r2.content)

'''注:登陸人人網時第一次不須要驗證碼,該段代碼可用,
       若是密碼輸錯再次登陸,則須要驗證碼,
       formdata中的icode就是驗證碼,動態生成。
       在不須要驗證碼的登陸中,該流程是可借鑑的'''
相關文章
相關標籤/搜索