爬蟲請求2

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Cookie:小蛋糕,餅乾
# 特色
# 1. 用於存儲用戶的某些信息(不包含隱私信息)
# 2. 只用於存儲少許數據
# 3. cookie是個文件,位於瀏覽器
# 4. cookie有生命週期,一旦網頁退出,cookie就失效了
import requests
url = "http://dig.chouti.com"
response = requests.get(url)
print(response.cookies)

# 若是網站須要登陸,並且帶有驗證碼
# 利用cookie模擬登陸知乎
url = "https://www.zhihu.com"
response = requests.get(
    url,
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:49.0) Gecko/20100101 Firefox/49.0",
        "Cookie": '''q_c1=dd70a0398f1b4836a5160b9f036fb447|1519631377000|1519631377000; capsion_ticket="2|1:0|10:1519724534|14:capsion_ticket|44:Y2QxYTJlNzNmN2ZkNDBkZTk4MGZjYzQyNGUxNmRlNjE=|1e9c827a7211c81987a5eb811d9f4f4acf80756838f6a10ae0c626f07ba54262"; _zap=5a78f8c6-2120-4eb3-a98d-6e7be333baba; aliyungf_tc=AQAAADKpiieNwAcALROfdUDjSU0fRvqS; d_c0="ALBr4aKQNQ2PTgyHuBjhaOLB-kTqUZQ_mgw=|1519712984"; _xsrf=37942126-c9d8-41f8-9749-4910c1aea54b; z_c0="2|1:0|10:1519724580|4:z_c0|92:Mi4xUE5TUkJRQUFBQUFBc0d2aG9wQTFEU1lBQUFCZ0FsVk5KSGFDV3dBUG5JdVVENkJlZGhTUTVyZWY1V2hrTUtYYUpB|e58c9866486e68edaf8e39d2da0d2892b3e484de609ca171783175ff54637cc7"'''
    }
)
print(response.content)

 

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 1. 若是網站登陸的時候須要寫驗證碼,能夠考慮經過cookie直接登陸
# 例如:知乎網
# 注意:cookie須要手動登陸之後再粘貼

# 2. 若是網站登陸的時候不須要驗證碼,能夠考慮經過代碼自動登陸
# 例如:抽屜網
# 注意:它的cookie是經過代碼自動獲取的

# 抽屜網的登陸邏輯(其它網站不適用)
# 1. 先獲取首頁返回的cookie,由於它是用戶的憑證
# 2. 開始登陸(要攜帶上第一步的cookie)
# 3. 開始點贊/評論......
import requests
from random_agent import RandomAgent
from fake_useragent import UserAgent
agent = UserAgent()


# 不要重複造輪子
# pip search 工具包名字

# pip install fake_useragent

url = "http://dig.chouti.com/"
# 經過瀏覽器獲取的操做通常都是get請求
response = requests.get(
    url,
    headers={
        # "User-Agent": RandomAgent.rand()
        "User-Agent": agent.random
    }
)
cookies = response.cookies.get_dict()

url = "http://dig.chouti.com/login"
response = requests.post(
    url,
    data={"phone": "8615896901897","password": "qweqweqwe1","oneMonth": "1",},
    headers={"User-Agent": agent.random,},
    cookies=cookies,
)

cookies1 = response.cookies.get_dict()
print(cookies)
print(cookies1)

url = "http://dig.chouti.com/link/vote?linksId=17717073"
response = requests.post(
    url,
    headers={"User-Agent": agent.random,},
    cookies=cookies,
)
print(response.text)


# 1. 第一次訪問首頁的時候 服務器 返回一個cookie
# 2. 攜帶1的cookie進行登陸,服務器 返回另外一個cookie
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import requests
from fake_useragent import UserAgent
agent = UserAgent()

# 若是經過session發請求,會自動攜帶cookie
session = requests.session()
s1 = session.get("http://dig.chouti.com/", headers={"User-Agent": agent.random})
s2 = session.post("http://dig.chouti.com/login", data={"phone": "8615896901897","password": "qweqweqwe1","oneMonth": "1",}, headers={"User-Agent": agent.random})
s3 = session.post("http://dig.chouti.com/link/vote?linksId=17717071",headers={"User-Agent": agent.random})
print(s3.text)




# python2.7
f = open("1.txt", "w")
import codecs
f = codecs.open("1.txt", "w", encoding="utf-8")

# python3.6
f = open("1.txt", "w", encoding="utf-8")


# 網絡七層從下往上的協議有哪些?
# Cookie和Session的區別?
相關文章
相關標籤/搜索