requests模擬登陸知乎

一、首先分析登陸頁面,經分析得知,知乎登陸的POST數據:html

二、XSRF爲跨站請求僞造(Cross-site request forgery),經過蒐集資料,在大神的博客裏找到有相應資料,http://cuiqingcai.com/2076.html ,說的也很清楚,有興趣能夠查看。這個參數目的就是爲了防範XSRF攻擊而設置的一個hash值,每次訪問主頁都會生成這樣一個惟一的字符串。這裏咱們只關注如何去取這個xsrf值。右鍵分析網頁源碼發現:cookie

這樣一來,咱們只須要requests請求到頁面的響應response以後,用正則匹配獲得這個xsrf就好了。解決了這個問題咱們就能夠去模擬登陸了。session

三、直接貼上源碼post

# -*- coding: utf-8 -*-

import requests
try:
import cookielib
except:
import http.cookiejar as cookielib

import re

session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename="cookies.txt")
try:
session.cookies.load(ignore_discard=True)
except:
print ("cookie未能加載")

agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
header = {
"HOST":"www.zhihu.com",
"Referer": "https://www.zhizhu.com",
'User-Agent': agent
}

def is_login():
#經過我的中心頁面返回狀態碼來判斷是否爲登陸狀態
inbox_url = "https://www.zhihu.com/question/56250357/answer/148534773"
response = session.get(inbox_url, headers=header, allow_redirects=False)
if response.status_code != 200:
return False
else:
return True

def get_xsrf():
#獲取xsrf code
response = session.get("https://www.zhihu.com", headers=header)
match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text)
if match_obj:
return (match_obj.group(1))
else:
return ""


def get_index():
response = session.get("https://www.zhihu.com", headers=header)
with open("index_page.html", "wb") as f:
f.write(response.text.encode("utf-8"))
print ("ok")

def zhihu_login(account, password):
#知乎登陸
if re.match("^1\d{10}",account):
print ("手機號碼登陸")
post_url = "https://www.zhihu.com/login/phone_num"
post_data = {
"_xsrf": get_xsrf(),
"phone_num": account,
"password": password
}
else:
if "@" in account:
#判斷用戶名是否爲郵箱
print("郵箱方式登陸")
post_url = "https://www.zhihu.com/login/email"
post_data = {
"_xsrf": get_xsrf(),
"email": account,
"password": password
}

response_text = session.post(post_url, data=post_data, headers=header)
session.cookies.save()

zhihu_login("18782902568", "admin123")
# get_index()
is_login()
相關文章
相關標籤/搜索