目標:模擬登陸知乎html
代碼以下:python
1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 __author__ = 'ziv·chan' 4 5 6 import re 7 import time 8 import requests 9 from PIL import Image 10 11 12 url_login = 'https://www.zhihu.com/login/phone_num' 13 14 headers = { 15 'Host' : 'www.zhihu.com', 16 'Origin' : 'https://www.zhihu.com', 17 'Referer' : 'https://www.zhihu.com/', 18 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36' 19 } 20 21 session = requests.session() 22 23 # 獲取xsrf 24 def get_xsrf(): 25 url = 'https://www.zhihu.com/#signin' 26 html = session.get(url) 27 pageCode = html.text 28 pattern = re.compile('name="_xsrf" value="(.*?)"/>',re.S) 29 xsrf = re.search(pattern,pageCode).group(1) 30 return xsrf 31 32 # 獲取驗證碼 33 def get_captcha(): 34 # 獲取驗證碼url 35 t = str(int(time.time() * 1000)) 36 url = 'http://www.zhihu.com/captcha.gif?r=%s&type=login' % t 37 cha = session.get(url) 38 with open('cha.jpg', 'wb') as f: 39 f.write(cha.content) 40 f.close() 41 im = Image.open('cha.jpg') 42 im.show() 43 im.close() 44 captcha = raw_input("請輸入驗證碼") 45 return captcha 46 47 48 form_data = { 49 '_xsrf' : get_xsrf(), 50 'password' : 'ChelseaFC.1', 51 'captcha' : get_captcha(), 52 'remember_me' : 'true', 53 'phone_num' : '18362972928' 54 } 55 print form_data 56 # 注意用法 57 res = session.post(url_login,data=form_data,headers=headers) 58 print res.json()['msg']
輸出:json
請輸入驗證碼edx5 {'phone_num': '18362972928', '_xsrf': u'83488f00833e19acc086395dbce597c4', 'password': 'ChelseaFC.1', 'remember_me': 'true', 'captcha': 'edx5'} 登錄成功
難點:驗證碼的URL中的參數‘r’取自當前時間的時間戳(1970紀元後通過的浮點秒數)再處理。session
以上。post