1 import requests 2 import time 3 import re 4 import base64 5 import hmac 6 import hashlib 7 import json 8 import matplotlib.pyplot as plt 9 from http import cookiejar 10 from PIL import Image 11 12 HEADERS = { 13 'Connection': 'keep-alive', 14 'Host': 'www.zhihu.com', 15 'Referer': 'https://www.zhihu.com/', 16 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 ' 17 '(KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36' 18 } 19 LOGIN_URL = 'https://www.zhihu.com/signup' 20 LOGIN_API = 'https://www.zhihu.com/api/v3/oauth/sign_in' 21 FORM_DATA = { 22 'client_id': 'c3cef7c66a1843f8b3a9e6a1e3160e20', 23 'grant_type': 'password', 24 'source': 'com.zhihu.web', 25 'username': '', 26 'password': '', 27 # 改成'cn'是倒立漢字驗證碼 28 'lang': 'en', 29 'ref_source': 'homepage' 30 } 31 32 33 class ZhihuAccount(object): 34 35 def __init__(self): 36 self.login_url = LOGIN_URL 37 self.login_api = LOGIN_API 38 self.login_data = FORM_DATA.copy() 39 self.session = requests.session() 40 self.session.headers = HEADERS.copy() 41 self.session.cookies = cookiejar.LWPCookieJar(filename='./cookies.txt') 42 43 def login(self, username=None, password=None, load_cookies=True): 44 """ 45 模擬登陸知乎 46 :param username: 登陸手機號 47 :param password: 登陸密碼 48 :param load_cookies: 是否讀取上次保存的 Cookies 49 :return: bool 50 """ 51 if load_cookies and self.load_cookies(): 52 if self.check_login(): 53 return True 54 55 headers = self.session.headers.copy() 56 headers.update({ 57 'authorization': 'oauth c3cef7c66a1843f8b3a9e6a1e3160e20', 58 'X-Xsrftoken': self._get_token() 59 }) 60 username, password = self._check_user_pass(username, password) 61 self.login_data.update({ 62 'username': username, 63 'password': password 64 }) 65 timestamp = str(int(time.time()*1000)) 66 self.login_data.update({ 67 'captcha': self._get_captcha(self.login_data['lang'], headers), 68 'timestamp': timestamp, 69 'signature': self._get_signature(timestamp) 70 }) 71 72 resp = self.session.post(self.login_api, data=self.login_data, headers=headers) 73 if 'error' in resp.text: 74 print(json.loads(resp.text)['error']['message']) 75 elif self.check_login(): 76 return True 77 print('登陸失敗') 78 return False 79 80 def load_cookies(self): 81 """ 82 讀取 Cookies 文件加載到 Session 83 :return: bool 84 """ 85 try: 86 self.session.cookies.load(ignore_discard=True) 87 return True 88 except FileNotFoundError: 89 return False 90 91 def check_login(self): 92 """ 93 檢查登陸狀態,訪問登陸頁面出現跳轉則是已登陸, 94 如登陸成功保存當前 Cookies 95 :return: bool 96 """ 97 resp = self.session.get(self.login_url, allow_redirects=False) 98 if resp.status_code == 302: 99 self.session.cookies.save() 100 print('登陸成功') 101 return True 102 return False 103 104 def _get_token(self): 105 """ 106 從登陸頁面獲取 token 107 :return: 108 """ 109 110 resp = requests.get("https://www.zhihu.com") 111 cookies = resp.cookies 112 token = cookies.items()[0][1] 113 return token 114 115 def _get_captcha(self, lang, headers): 116 """ 117 請求驗證碼的 API 接口,不管是否須要驗證碼都須要請求一次 118 若是須要驗證碼會返回圖片的 base64 編碼 119 根據 lang 參數匹配驗證碼,須要人工輸入 120 :param lang: 返回驗證碼的語言(en/cn) 121 :param headers: 帶受權信息的請求頭部 122 :return: 驗證碼的 POST 參數 123 """ 124 if lang == 'cn': 125 api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=cn' 126 else: 127 api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en' 128 resp = self.session.get(api, headers=headers) 129 show_captcha = re.search(r'true', resp.text) 130 131 if show_captcha: 132 put_resp = self.session.put(api, headers=headers) 133 json_data = json.loads(put_resp.text) 134 img_base64 = json_data['img_base64'].replace(r'\n', '') 135 with open('./captcha.jpg', 'wb') as f: 136 f.write(base64.b64decode(img_base64)) 137 img = Image.open('./captcha.jpg') 138 if lang == 'cn': 139 plt.imshow(img) 140 print('點擊全部倒立的漢字,按回車提交') 141 points = plt.ginput(7) 142 capt = json.dumps({'img_size': [200, 44], 143 'input_points': [[i[0]/2, i[1]/2] for i in points]}) 144 else: 145 img.show() 146 capt = input('請輸入圖片裏的驗證碼:') 147 # 這裏必須先把參數 POST 驗證碼接口 148 self.session.post(api, data={'input_text': capt}, headers=headers) 149 return capt 150 return '' 151 152 def _get_signature(self, timestamp): 153 """ 154 經過 Hmac 算法計算返回簽名 155 實際是幾個固定字符串加時間戳 156 :param timestamp: 時間戳 157 :return: 簽名 158 """ 159 ha = hmac.new(b'd1b964811afb40118a12068ff74a12f4', digestmod=hashlib.sha1) 160 grant_type = self.login_data['grant_type'] 161 client_id = self.login_data['client_id'] 162 source = self.login_data['source'] 163 ha.update(bytes((grant_type + client_id + source + timestamp), 'utf-8')) 164 return ha.hexdigest() 165 166 def _check_user_pass(self, username, password): 167 """ 168 檢查用戶名和密碼是否已輸入,若無則手動輸入 169 """ 170 if username is None: 171 username = self.login_data.get('username') 172 if not username: 173 username = input('請輸入手機號:') 174 if len(username) == 11 and username.isdigit() and '+86' not in username: 175 username = '+86' + username 176 177 if password is None: 178 password = self.login_data.get('password') 179 if not password: 180 password = input('請輸入密碼:') 181 return username, password 182 183 184 if __name__ == '__main__': 185 account = ZhihuAccount() 186 account.login(username=None, password=None, load_cookies=True)
GitHub:https://github.com/liyunchen/Zhihu-Login/blob/master/zhihu_login.pygit