微博的的模擬登錄是比較坑的,看了網上不少大神的帖子,本身又看了微博的登錄時的json數據:一、發現登錄時在輸入帳號時用chrome能夠看到會有一個prelogin之類的網址,網址後面會有大串的隨機數。我測試了下,發如今沒有隨機數的狀況下的網址也能獲得所須要的severtime、nonce、等幾個數據。二、經過chrome查看json數據就能夠看到用戶名和密碼的加密方式,再找了網上大神的資料就能夠獲得用戶名su、和密碼sp。再把數據post 上去就能夠獲得一個重定向的微博登錄網址。三、將這個網址用正則表達式提取出來,再帶上cookie數據就能夠登錄了。而後你想幹什麼就幹什php
# _*_coding:utf-8 _*_ import base64 import urllib import urllib2 import re import rsa import cookielib class Weibo(object): def __init__(self, username, password): self.user = base64.b64encode(username) self.pwd = password @property def get_pre_url_values(self): values_dict = {} su = self.user pre_url = 'https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=' \ + str(su[:-1]) + '%3D&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.18)' header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36' } request = urllib2.Request(pre_url, headers=header) html = urllib2.urlopen(request).read().decode('utf-8') p = re.compile(r'"servertime":(.*?),') values_dict['servertime'] = p.search(html).group(1).strip('\"') p1 = re.compile(r'"pcid":(.*?),') values_dict['pcid'] = p1.search(html).group(1).strip('\"') p2 = re.compile(r'"nonce":(.*?),') values_dict['nonce'] = p2.search(html).group(1).strip('\"') p3 = re.compile(r'"pubkey":(.*?),') values_dict['pubkey'] = p3.search(html).group(1).strip('\"') p4 = re.compile(r'"rsakv":(.*?),') values_dict['rsakv'] = p4.search(html).group(1).strip('\"') return values_dict def get_password(self, blog_values): ''' 這個函數是微博的json數據和網上的大神方法的出來的(抄來的)^_^ ''' rsapubkey = int(blog_values['pubkey'], 16) key = rsa.PublicKey(rsapubkey, 65537) massage = str(blog_values['servertime']) + '\t' + str(blog_values['nonce']) + '\n' + str(self.pwd) password = rsa.encrypt(massage, key) sp = password.encode('hex') return sp def login_weibo(self, blog_values, sp): values = { 'entry': "weibo", 'gateway': '1', 'from': '', 'savestate': '7', 'userticket': '1', 'pagerefer': "", 'cfrom': '1', 'vsnf': '1', 'su': self.user, 'service': 'miniblog', 'servertime': blog_values['servertime'], 'nonce': blog_values['nonce'], 'pwencode': 'rsa2', 'rsakv': blog_values['rsakv'], 'sp': sp, 'sr': "1440*900", 'encoding': 'UTF-8', 'prelt': '503', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', 'returntype': 'META' } header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36' } url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' '''獲取cookie信息 ''' cj = cookielib.CookieJar() cj_support = urllib2.HTTPCookieProcessor(cj) opener = urllib2.build_opener(cj_support) data = urllib.urlencode(values) try: response = urllib2.Request(url, headers=header, data=data) html = opener.open(response).read().decode('gbk') except Exception, e: print e.message p = re.compile(r'location\.replace\(\'(.*?)\'\)') url = p.search(html).group(1) try: url_request = urllib2.Request(url) response_url = opener.open(url_request) page = response_url.read().decode('utf-8') p2 = re.compile(r'"userdomain":"(.*?)"') dom = p2.search(page).group(1) login_url = 'http://weibo.com/' + dom request_login_url = urllib2.Request(login_url) response_login_url = opener.open(request_login_url) per_html = response_login_url.read().decode('utf-8') except Exception: per_html = '登錄失敗' return per_html if __name__ == '__main__': wbobj = Weibo('用戶名', '密碼') sp = wbobj.get_password(wbobj.get_pre_url_values) html = wbobj.login_weibo(blog_values=wbobj.get_pre_url_values, sp=sp) print html
麼,好比:把女神的照片全要了、自動查看女神的微博並將郵件發給你,下次再來弄這個。最近失眠的厲害,快點找到工做吧!!html