一、首先分析登陸頁面,經分析得知,知乎登陸的POST數據:html
二、XSRF爲跨站請求僞造(Cross-site request forgery),經過蒐集資料,在大神的博客裏找到有相應資料,http://cuiqingcai.com/2076.html ,說的也很清楚,有興趣能夠查看。這個參數目的就是爲了防範XSRF攻擊而設置的一個hash值,每次訪問主頁都會生成這樣一個惟一的字符串。這裏咱們只關注如何去取這個xsrf值。右鍵分析網頁源碼發現:cookie
這樣一來,咱們只須要requests請求到頁面的響應response以後,用正則匹配獲得這個xsrf就好了。解決了這個問題咱們就能夠去模擬登陸了。session
三、直接貼上源碼post
# -*- coding: utf-8 -*- import requests try: import cookielib except: import http.cookiejar as cookielib import re session = requests.session() session.cookies = cookielib.LWPCookieJar(filename="cookies.txt") try: session.cookies.load(ignore_discard=True) except: print ("cookie未能加載") agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" header = { "HOST":"www.zhihu.com", "Referer": "https://www.zhizhu.com", 'User-Agent': agent } def is_login(): #經過我的中心頁面返回狀態碼來判斷是否爲登陸狀態 inbox_url = "https://www.zhihu.com/question/56250357/answer/148534773" response = session.get(inbox_url, headers=header, allow_redirects=False) if response.status_code != 200: return False else: return True def get_xsrf(): #獲取xsrf code response = session.get("https://www.zhihu.com", headers=header) match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text) if match_obj: return (match_obj.group(1)) else: return "" def get_index(): response = session.get("https://www.zhihu.com", headers=header) with open("index_page.html", "wb") as f: f.write(response.text.encode("utf-8")) print ("ok") def zhihu_login(account, password): #知乎登陸 if re.match("^1\d{10}",account): print ("手機號碼登陸") post_url = "https://www.zhihu.com/login/phone_num" post_data = { "_xsrf": get_xsrf(), "phone_num": account, "password": password } else: if "@" in account: #判斷用戶名是否爲郵箱 print("郵箱方式登陸") post_url = "https://www.zhihu.com/login/email" post_data = { "_xsrf": get_xsrf(), "email": account, "password": password } response_text = session.post(post_url, data=post_data, headers=header) session.cookies.save() zhihu_login("18782902568", "admin123") # get_index() is_login()