利用cookielib和urllib2模塊模擬登錄163的例子有不少,近期看了《python模擬登錄163郵箱並獲取通信錄》一文,受到啓發,試着對收件箱、發件箱等進行了分析,並列出了全部郵件列表及狀態,包括髮件人、收件人、主題、發信時間、已讀未讀等狀態。php
一、參考代碼:http://hi.baidu.com/fc_lamp/blog/item/2466d1096fcc532de8248839.html%EF%BB%BFhtml
1 #-*- coding:UTF-8 -*- 2 import urllib,urllib2,cookielib 3 import xml.etree.ElementTree as etree #xml解析類 4 5 class Login163: 6 #假裝browser 7 header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} 8 username = '' 9 passwd = '' 10 cookie = None #cookie對象 11 cookiefile = './cookies.dat' #cookie臨時存放地 12 user = '' 13 14 def __init__(self,username,passwd): 15 self.username = username 16 self.passwd = passwd 17 #cookie設置 18 self.cookie = cookielib.LWPCookieJar() #自定義cookie存放 19 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie)) 20 urllib2.install_opener(opener) 21 22 #登錄 23 def login(self): 24 25 #請求參數設置 26 postdata = { 27 'username':self.username, 28 'password':self.passwd, 29 'type':1 30 } 31 postdata = urllib.urlencode(postdata) 32 33 #發起請求 34 req = urllib2.Request( 35 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1', 36 data= postdata,#請求數據 37 headers = self.header #請求頭 38 ) 39 40 result = urllib2.urlopen(req).read() 41 result = str(result) 42 self.user = self.username.split('@')[0] 43 44 self.cookie.save(self.cookiefile)#保存cookie 45 46 if '登陸成功,正在跳轉...' in result: 47 #print("%s 你已成功登錄163郵箱。---------\n" %(user)) 48 flag = True 49 else: 50 flag = '%s 登錄163郵箱失敗。'%(self.user) 51 52 return flag 53 54 #獲取通信錄 55 def address_list(self): 56 57 #獲取認證sid 58 auth = urllib2.Request( 59 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1', 60 headers = self.header 61 ) 62 auth = urllib2.urlopen(auth).read() 63 for i,sid in enumerate(self.cookie):#enumerate()用於同時返數字索引與數值,其實是一個元組:((0,test[0]),(1,test[1]).......)這有點像php裏的foreach 語句的做用 64 sid = str(sid) 65 if 'sid' in sid: 66 sid = sid.split()[1].split('=')[1] 67 break 68 self.cookie.save(self.cookiefile) 69 70 #請求地址 71 url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username 72 #參數設定(var 變量是必須要的,否則就只能看到:<code>S_OK</code><messages/>這類信息) 73 #這裏參數也是在firebug下查看的。 74 postdata = { 75 'func':'global:sequential', 76 'showAd':'false', 77 'sid':sid, 78 'uid':self.username, 79 'userType':'browser', 80 'var':'<?xml version="1.0"?><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>' 81 } 82 postdata = urllib.urlencode(postdata) 83 84 #組裝請求 85 req = urllib2.Request( 86 url = url, 87 data = postdata, 88 headers = self.header 89 ) 90 res = urllib2.urlopen(req).read() 91 92 #解析XML,轉換成json 93 #說明:因爲這樣請求後163給出的是xml格式的數據, 94 #爲了返回的數據能方便使用最好是轉爲JSON 95 json = [] 96 tree = etree.fromstring(res) 97 obj = None 98 for child in tree: 99 if child.tag == 'array': 100 obj = child 101 break 102 #這裏多參考一下,etree元素的方法屬性等,包括attrib,text,tag,getchildren()等 103 obj = obj[0].getchildren().pop() 104 for child in obj: 105 for x in child: 106 attr = x.attrib 107 if attr['name']== 'EMAIL;PREF': 108 value = {'email':x.text} 109 json.append(value) 110 return json 111 112 #Demo 113 print("Requesting......\n\n") 114 login = Login163('xxxx@163.com','xxxxx') 115 flag = login.login() 116 if type(flag) is bool: 117 print("Successful landing,Resolved contacts......\n\n") 118 res = login.address_list() 119 for x in res: 120 print(x['email']) 121 else: 122 print(flag)
二、分析收件箱、發件箱等網址python
在參考代碼中,獲取通信錄的url爲web
url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username,經過對郵箱地址的分析,發現收件箱、發件箱等的url爲url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username,其中func=
mbox:listMessages。其對收件箱、發件箱的具體區分在下面的postdata中,具體爲:json
(1)收件箱cookie
postdata = { 'func':'global:sequential', 'showAd':'false', 'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 'uid':self.username, 'userType':'browser', 'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>' }
(2)發件箱app
postdata = { 'func':'global:sequential', 'showAd':'false', 'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 'uid':self.username, 'userType':'browser', 'var':'<!--?xml version="1.0"?--><object><int name="fid">3</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>' }
能夠看出,兩段代碼的不一樣之處就是fid的取值不一樣,其中收件箱爲1,發件箱爲3,草稿箱爲2。jsp
三、xml解析ide
利用ElementTree 類來進行xml到字典的轉換。在獲取通信錄的實例中,主要使用了這一方法。本例子(具體代碼見後文)在收取郵件列表時,並無用這一方法,仍然使用的是字符串的處理方法。但這裏仍是列一下ElementTree 類對xml的處理。如(參考地址:http://hi.baidu.com/fc_lamp/blog/item/8ed2d53ada4586f714cecb3d.html):post
-<result> <code>S_OK</code> -<array name="var"> -<object> <string name="code">S_OK</string> -<array name="var"> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> </array> </object> +<object></object> </array> </result>
解決方法:
1 #-*- coding:UTF-8 -*- 2 3 import xml.etree.ElementTree as etree #xml解析類 4 def xml2json(xml): 5 json = [] 6 tree = etree.fromstring(xml) #若是是文件可用parse(source) 7 obj = None 8 for child in tree: 9 if child.tag == 'array': 10 obj = child 11 break 12 #這裏多參考一下,etree元素的方法屬性等,包括attrib,text,tag,getchildren()等 13 obj = obj[0].getchildren().pop() 14 for child in obj: 15 for x in child: 16 attr = x.attrib 17 if attr['name']== 'EMAIL;PREF': 18 value = {'email':x.text} 19 json.append(value) 20 return json
四、收件箱郵件列表
本例子只列出了收件箱郵件列表,若是須要,可根據以上介紹調整fid值,列出發件箱、草稿箱等的郵件列表。程序在windosxp、py2.6環境下調查經過,運行後,會在當前目錄下生成三個文件:inboxlistfile.txt記錄收件箱郵件列表,addfile.txt記錄通信錄,cookies.dat記錄cookies。具體代碼以下:
1 #-*- coding:UTF-8 -*- 2 #@小五義 http://www.cnblogs.com/xiaowuyi 3 #163郵件列表 4 import urllib,urllib2,cookielib 5 import xml.etree.ElementTree as etree #xml解析類 6 7 class Login163: 8 #假裝browser 9 header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} 10 username = '' 11 passwd = '' 12 cookie = None #cookie對象 13 cookiefile = './cookies.dat' #cookie臨時存放地 14 user = '' 15 16 def __init__(self,username,passwd): 17 self.username = username 18 self.passwd = passwd 19 #cookie設置 20 self.cookie = cookielib.LWPCookieJar() #自定義cookie存放 21 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie)) 22 urllib2.install_opener(opener) 23 24 #登錄 25 def login(self): 26 27 #請求參數設置 28 postdata = { 29 'username':self.username, 30 'password':self.passwd, 31 'type':1 32 } 33 postdata = urllib.urlencode(postdata) 34 35 #發起請求 36 req = urllib2.Request( 37 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1', 38 data= postdata,#請求數據 39 headers = self.header #請求頭 40 ) 41 42 result = urllib2.urlopen(req).read() 43 result = str(result) 44 #print result 45 self.user = self.username.split('@')[0] 46 47 self.cookie.save(self.cookiefile)#保存cookie 48 49 if '登陸成功,正在跳轉...' in result: 50 #print("%s 你已成功登錄163郵箱。---------n" %(user)) 51 flag = True 52 else: 53 flag = '%s 登錄163郵箱失敗。'%(self.user) 54 55 return flag 56 57 #獲取通信錄 58 def address_list(self): 59 60 #獲取認證sid 61 auth = urllib2.Request( 62 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1', 63 headers = self.header 64 ) 65 auth = urllib2.urlopen(auth).read() 66 67 #authstr=str(auth) 68 #print authstr 69 70 for i,sid in enumerate(self.cookie): 71 sid = str(sid) 72 #print 'sid:%s' %sid 73 if 'sid' in sid: 74 sid = sid.split()[1].split('=')[1] 75 break 76 self.cookie.save(self.cookiefile) 77 78 #請求地址 79 url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username 80 #參數設定(var 變量是必須要的,否則就只能看到:<code>S_OK</code><messages>這類信息) 81 #這裏參數也是在firebug下查看的。 82 postdata = { 83 'func':'global:sequential', 84 'showAd':'false', 85 'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 86 'uid':self.username, 87 'userType':'browser', 88 'var':'<!--?xml version="1.0"?--><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>' 89 } 90 postdata = urllib.urlencode(postdata) 91 92 #組裝請求 93 req = urllib2.Request( 94 url = url, 95 data = postdata, 96 headers = self.header 97 ) 98 res = urllib2.urlopen(req).read() 99 100 #print str(res) 101 102 #解析XML,轉換成json 103 #說明:因爲這樣請求後163給出的是xml格式的數據, 104 #爲了返回的數據能方便使用最好是轉爲JSON 105 json = [] 106 tree = etree.fromstring(res) 107 108 109 110 obj = None 111 for child in tree: 112 if child.tag == 'array': 113 obj = child 114 break 115 #這裏多參考一下,etree元素的方法屬性等,包括attrib,text,tag,getchildren()等 116 obj = obj[0].getchildren().pop() 117 for child in obj: 118 for x in child: 119 attr = x.attrib 120 if attr['name']== 'EMAIL;PREF': 121 value = {'email':x.text} 122 json.append(value) 123 return json 124 125 def minbox(self):#收件箱,fid爲1,發件箱爲3,草稿箱爲2 126 #獲取認證sid 127 auth = urllib2.Request( 128 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1', 129 headers = self.header 130 ) 131 auth = urllib2.urlopen(auth).read() 132 133 #authstr=str(auth) 134 #print authstr 135 136 for i,sid in enumerate(self.cookie): 137 sid = str(sid) 138 #print 'sid:%s' %sid 139 if 'sid' in sid: 140 sid = sid.split()[1].split('=')[1] 141 break 142 self.cookie.save(self.cookiefile) 143 144 #請求地址 145 url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username 146 #參數設定(var 變量是必須要的,否則就只能看到:<code>S_OK</code><messages>這類信息) 147 #這裏參數也是在firebug下查看的。 148 postdata = { 149 'func':'global:sequential', 150 'showAd':'false', 151 'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 152 'uid':self.username, 153 'userType':'browser', 154 'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>' 155 } 156 postdata = urllib.urlencode(postdata) 157 158 #組裝請求 159 req = urllib2.Request( 160 url = url, 161 data = postdata, 162 headers = self.header 163 ) 164 res = urllib2.urlopen(req).read() 165 166 liststr=str(res).split('<object>')#用object進行分割 167 inboxlistcount=len(liststr)-1#記錄郵件封數 168 inboxlistfile=open('inboxlistfile.txt','a') 169 t=0 #記錄當前第幾封信 170 for i in liststr: 171 if 'xml' in i and ' version=' in i: 172 inboxlistfile.write('inbox 共'+str(inboxlistcount)+'信') 173 inboxlistfile.write('\n') 174 if 'name="id"' in i: 175 t=t+1 176 inboxlistfile.write('第'+str(t)+'封:') 177 inboxlistfile.write('\n') 178 #寫入from 179 beginnum=i.find('name="from"') 180 endnum=i.find('</string>',beginnum) 181 inboxlistfile.write('From:'+i[beginnum+12:endnum]) 182 inboxlistfile.write('\n') 183 #寫入to 184 beginnum=i.find('name="to"') 185 endnum=i.find('</string>',beginnum) 186 inboxlistfile.write('TO:'+i[beginnum+10:endnum]) 187 inboxlistfile.write('\n') 188 #寫入subject 189 beginnum=i.find('name="subject"') 190 endnum=i.find('</string>',beginnum) 191 inboxlistfile.write('Subject:'+i[beginnum+15:endnum]) 192 inboxlistfile.write('\n') 193 #寫入date: 194 beginnum=i.find('name="sentDate"') 195 endnum=i.find('</date>',beginnum) 196 inboxlistfile.write('Date:'+i[beginnum+16:endnum]) 197 inboxlistfile.write('\n') 198 if 'name="read">true' in i: 199 inboxlistfile.write('郵件狀態:已讀') 200 inboxlistfile.write('\n') 201 else: 202 inboxlistfile.write('郵件狀態:未讀') 203 inboxlistfile.write('\n') 204 #寫用郵件尺寸 205 beginnum=i.find('name="size"') 206 endnum=i.find('</int>',beginnum) 207 inboxlistfile.write('郵件尺寸:'+i[beginnum+12:endnum]) 208 inboxlistfile.write('\n') 209 #寫入郵件編號,用於下載郵件 210 beginnum=i.find('name="id"') 211 endnum=i.find('</string>',beginnum) 212 inboxlistfile.write('郵件編號:'+i[beginnum+10:endnum]) 213 inboxlistfile.write('\n\n') 214 215 inboxlistfile.close() 216 217 218 219 #Demo 220 print("Requesting......nn") 221 login = Login163('AAAAA@163.com','AAAAA') 222 flag = login.login() 223 if type(flag) is bool: 224 login.minbox() 225 #login.letterdown() 226 print("Successful landing,Resolved contacts......nn") 227 res = login.address_list() 228 for x in res: 229 print(x['email']) 230 else: 231 print(flag)