# -*- coding:utf-8 -*- import re import requests import http.cookiejar as cookieJar import time import codecs """ 1使用requests中的session會話加載Cookie,若是有Cookie 直接用,若是沒有,不加載,先模擬登陸,將登錄成功返回的cookie記錄保存文件,一變下次使用 2 在登陸過程當中,須要從登錄頁面返回的cookie中長出_xsrf,在傳遞參數的時候將其帶上,而且帶上用戶名和密碼,又可能會出現驗證碼。能夠使用打碼雲工具破解,把驗證碼帶上,就能夠登陸成功了,登陸成功以後,利用 session。cookie.save()函數,。將cookie保存本地 3 在以後發起請求,只須要將以前保存的cookie帶上,便可正常訪問知乎的數據 """ class ZHCookies(object): def __init__(self, phone_num, password): self.url = "https://www.zhihu.com/signup?next=%2F" self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/64.0.3282.186 Safari/537.36", 'Host': 'www.zhihu.com', "Referer": "https: // www.zhihu.com /" } self.filename = phone_num + "page.txt" self.password = password self.session = requests.Session() self.session.cookies = cookieJar.LWPCookieJar(filename=self.filename) self.phone_num = phone_num def get_cookies(self): response = self.session.get(self.url, headers=self.headers) # 查找_xsrf print(response) res = response.headers['Set-cookie'] xsrf = re.search(re.compile('_xsrf=(.*?);',re.S),res) xsrf = xsrf.group(1) # 再次登陸 while 1: url = "https://www.zhihu.com/login/phone_num" data = { "phone_num": self.phone_num, "password": self.password } response = self.session.post(url, data=data, headers=self.headers) res = response.json() if res.get("r") == 0: print("登陸成功") self.session.cookies.save() break else: print(res.get("mag")) print("正在嘗試登陸。。") time.sleep(1) if __name__ == "__main__": user_info = [{"phone_num": "********", "password": "*******"}] for user in user_info: zh = ZHCookies(user["phone_num"], user["password"]) # zh.get_cookies() # 利用cookie再次登陸 import random user = random.choice(user_info) with requests.Session() as session: # 隨機加載 本地cookies文件 session.cookies = cookieJar.LWPCookieJar() session.cookies.load(user["phone_num"] + "page.txt") # 發請求 url = "http://www.zhihu.com" response = session.get(url, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/64.0.3282.186 Safari/537.36", 'Host': 'www.zhihu.com', "Referer": "https: // www.zhihu.com /"}) # 保存 with codecs.open("zhihu1.html", "w", encoding="utf-8") as f: f.write(response.text)