python模擬163登錄獲取郵件列表

利用cookielib和urllib2模塊模擬登錄163的例子有不少,近期看了《python模擬登錄163郵箱並獲取通信錄》一文,受到啓發,試着對收件箱、發件箱等進行了分析,並列出了全部郵件列表及狀態,包括髮件人、收件人、主題、發信時間、已讀未讀等狀態。php

一、參考代碼:http://hi.baidu.com/fc_lamp/blog/item/2466d1096fcc532de8248839.html%EF%BB%BFhtml

  1 #-*- coding:UTF-8 -*-
  2 import urllib,urllib2,cookielib
  3 import xml.etree.ElementTree as etree #xml解析類
  4 
  5 class Login163:
  6    #假裝browser
  7     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
  8     username = ''
  9     passwd = ''
 10     cookie = None #cookie對象
 11     cookiefile = './cookies.dat' #cookie臨時存放地
 12     user = ''
 13     
 14     def __init__(self,username,passwd):
 15         self.username = username
 16         self.passwd = passwd
 17         #cookie設置
 18         self.cookie = cookielib.LWPCookieJar() #自定義cookie存放
 19         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
 20         urllib2.install_opener(opener)
 21 
 22    #登錄    
 23     def login(self):       
 24 
 25         #請求參數設置
 26         postdata = {
 27             'username':self.username,
 28             'password':self.passwd,
 29             'type':1
 30             }
 31         postdata = urllib.urlencode(postdata)
 32 
 33         #發起請求
 34         req = urllib2.Request(
 35                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',
 36                 data= postdata,#請求數據
 37                 headers = self.header #請求頭
 38             )
 39 
 40         result = urllib2.urlopen(req).read()
 41         result = str(result)
 42         self.user = self.username.split('@')[0]
 43 
 44         self.cookie.save(self.cookiefile)#保存cookie
 45         
 46         if '登陸成功,正在跳轉...' in result:
 47             #print("%s 你已成功登錄163郵箱。---------\n" %(user))
 48             flag = True
 49         else:
 50             flag = '%s 登錄163郵箱失敗。'%(self.user)
 51            
 52         return flag
 53 
 54    #獲取通信錄
 55     def address_list(self):
 56 
 57         #獲取認證sid
 58         auth = urllib2.Request(
 59                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
 60                 headers = self.header
 61             )
 62         auth = urllib2.urlopen(auth).read()
 63         for i,sid in enumerate(self.cookie):#enumerate()用於同時返數字索引與數值,其實是一個元組:((0,test[0]),(1,test[1]).......)這有點像php裏的foreach 語句的做用
 64             sid = str(sid)
 65             if 'sid' in sid:
 66                 sid = sid.split()[1].split('=')[1]
 67                 break
 68         self.cookie.save(self.cookiefile)
 69         
 70         #請求地址
 71         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username
 72         #參數設定(var 變量是必須要的,否則就只能看到:<code>S_OK</code><messages/>這類信息)
 73         #這裏參數也是在firebug下查看的。
 74         postdata = {
 75             'func':'global:sequential',
 76             'showAd':'false',
 77             'sid':sid,
 78             'uid':self.username,
 79             'userType':'browser',
 80             'var':'<?xml version="1.0"?><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>'
 81             }
 82         postdata = urllib.urlencode(postdata)
 83         
 84         #組裝請求
 85         req = urllib2.Request(
 86             url = url,
 87             data = postdata,
 88             headers = self.header
 89             )
 90         res = urllib2.urlopen(req).read()
 91         
 92         #解析XML,轉換成json
 93         #說明:因爲這樣請求後163給出的是xml格式的數據,
 94         #爲了返回的數據能方便使用最好是轉爲JSON
 95         json = []
 96         tree = etree.fromstring(res)
 97         obj = None
 98         for child in tree:
 99             if child.tag == 'array':
100                 obj = child            
101                 break
102         #這裏多參考一下,etree元素的方法屬性等,包括attrib,text,tag,getchildren()等
103         obj = obj[0].getchildren().pop()
104         for child in obj:
105             for x in child:
106                 attr = x.attrib
107                 if attr['name']== 'EMAIL;PREF':
108                     value = {'email':x.text}
109                     json.append(value)
110         return json
111         
112 #Demo
113 print("Requesting......\n\n")
114 login = Login163('xxxx@163.com','xxxxx')
115 flag = login.login()
116 if type(flag) is bool:
117     print("Successful landing,Resolved contacts......\n\n")
118     res = login.address_list()
119     for x in res:
120         print(x['email'])
121 else:
122     print(flag)
View Code

二、分析收件箱、發件箱等網址python

    在參考代碼中,獲取通信錄的url爲web

url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username,經過對郵箱地址的分析,發現收件箱、發件箱等的url爲url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username,其中func=

mbox:listMessages。其對收件箱、發件箱的具體區分在下面的postdata中,具體爲:json

(1)收件箱cookie

複製代碼
postdata = {
'func':'global:sequential',
'showAd':'false',
'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
'uid':self.username,
'userType':'browser',
'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
}
複製代碼

(2)發件箱app

複製代碼
postdata = {
'func':'global:sequential',
'showAd':'false',
'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
'uid':self.username,
'userType':'browser',
'var':'<!--?xml version="1.0"?--><object><int name="fid">3</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
}
複製代碼

能夠看出,兩段代碼的不一樣之處就是fid的取值不一樣,其中收件箱爲1,發件箱爲3,草稿箱爲2。jsp

三、xml解析ide

    利用ElementTree 類來進行xml到字典的轉換。在獲取通信錄的實例中,主要使用了這一方法。本例子(具體代碼見後文)在收取郵件列表時,並無用這一方法,仍然使用的是字符串的處理方法。但這裏仍是列一下ElementTree 類對xml的處理。如(參考地址:http://hi.baidu.com/fc_lamp/blog/item/8ed2d53ada4586f714cecb3d.html):post

複製代碼
-<result>
   <code>S_OK</code>
 -<array name="var">
  -<object>
     <string name="code">S_OK</string>
    -<array name="var">
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     </array>
   </object>
  +<object></object>
  </array>
 </result>
複製代碼

解決方法:

 1 #-*- coding:UTF-8 -*-
 2 
 3 import xml.etree.ElementTree as etree #xml解析類
 4 def xml2json(xml):
 5     json = []
 6     tree = etree.fromstring(xml) #若是是文件可用parse(source)
 7     obj = None
 8     for child in tree:
 9         if child.tag == 'array':
10             obj = child            
11             break
12     #這裏多參考一下,etree元素的方法屬性等,包括attrib,text,tag,getchildren()等
13     obj = obj[0].getchildren().pop()
14     for child in obj:
15         for x in child:
16             attr = x.attrib
17             if attr['name']== 'EMAIL;PREF':
18                 value = {'email':x.text}
19                 json.append(value)
20     return json

 

四、收件箱郵件列表

    本例子只列出了收件箱郵件列表,若是須要,可根據以上介紹調整fid值,列出發件箱、草稿箱等的郵件列表。程序在windosxp、py2.6環境下調查經過,運行後,會在當前目錄下生成三個文件:inboxlistfile.txt記錄收件箱郵件列表,addfile.txt記錄通信錄,cookies.dat記錄cookies。具體代碼以下:

  1 #-*- coding:UTF-8 -*-
  2 #@小五義 http://www.cnblogs.com/xiaowuyi
  3 #163郵件列表
  4 import urllib,urllib2,cookielib
  5 import xml.etree.ElementTree as etree #xml解析類
  6 
  7 class Login163:
  8    #假裝browser
  9     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
 10     username = ''
 11     passwd = ''
 12     cookie = None #cookie對象
 13     cookiefile = './cookies.dat' #cookie臨時存放地
 14     user = ''
 15     
 16     def __init__(self,username,passwd):
 17         self.username = username
 18         self.passwd = passwd
 19         #cookie設置
 20         self.cookie = cookielib.LWPCookieJar() #自定義cookie存放
 21         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
 22         urllib2.install_opener(opener)
 23 
 24    #登錄    
 25     def login(self):       
 26 
 27         #請求參數設置
 28         postdata = {
 29             'username':self.username,
 30             'password':self.passwd,
 31             'type':1
 32             }
 33         postdata = urllib.urlencode(postdata)
 34 
 35         #發起請求
 36         req = urllib2.Request(
 37                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',
 38                 data= postdata,#請求數據
 39                 headers = self.header #請求頭
 40             )
 41 
 42         result = urllib2.urlopen(req).read()
 43         result = str(result)
 44         #print result
 45         self.user = self.username.split('@')[0]
 46 
 47         self.cookie.save(self.cookiefile)#保存cookie
 48         
 49         if '登陸成功,正在跳轉...' in result:
 50             #print("%s 你已成功登錄163郵箱。---------n" %(user))
 51             flag = True
 52         else:
 53             flag = '%s 登錄163郵箱失敗。'%(self.user)
 54            
 55         return flag
 56 
 57    #獲取通信錄
 58     def address_list(self):
 59 
 60         #獲取認證sid
 61         auth = urllib2.Request(
 62                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
 63                 headers = self.header
 64             )
 65         auth = urllib2.urlopen(auth).read()
 66 
 67         #authstr=str(auth)
 68         #print authstr
 69         
 70         for i,sid in enumerate(self.cookie):
 71             sid = str(sid)
 72             #print 'sid:%s' %sid
 73             if 'sid' in sid:
 74                 sid = sid.split()[1].split('=')[1]
 75                 break
 76         self.cookie.save(self.cookiefile)
 77         
 78         #請求地址
 79         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username
 80         #參數設定(var 變量是必須要的,否則就只能看到:<code>S_OK</code><messages>這類信息)
 81         #這裏參數也是在firebug下查看的。
 82         postdata = {
 83             'func':'global:sequential',
 84             'showAd':'false',
 85             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
 86             'uid':self.username,
 87             'userType':'browser',
 88             'var':'<!--?xml version="1.0"?--><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>'
 89             }
 90         postdata = urllib.urlencode(postdata)
 91         
 92         #組裝請求
 93         req = urllib2.Request(
 94             url = url,
 95             data = postdata,
 96             headers = self.header
 97             )
 98         res = urllib2.urlopen(req).read()
 99 
100         #print str(res)
101         
102         #解析XML,轉換成json
103         #說明:因爲這樣請求後163給出的是xml格式的數據,
104         #爲了返回的數據能方便使用最好是轉爲JSON
105         json = []
106         tree = etree.fromstring(res)
107 
108         
109         
110         obj = None
111         for child in tree:
112             if child.tag == 'array':
113                 obj = child            
114                 break
115         #這裏多參考一下,etree元素的方法屬性等,包括attrib,text,tag,getchildren()等
116         obj = obj[0].getchildren().pop()
117         for child in obj:
118             for x in child:
119                 attr = x.attrib
120                 if attr['name']== 'EMAIL;PREF':
121                     value = {'email':x.text}
122                     json.append(value)
123         return json
124 
125     def minbox(self):#收件箱,fid爲1,發件箱爲3,草稿箱爲2
126         #獲取認證sid
127         auth = urllib2.Request(
128                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
129                 headers = self.header
130             )
131         auth = urllib2.urlopen(auth).read()
132 
133         #authstr=str(auth)
134         #print authstr
135         
136         for i,sid in enumerate(self.cookie):
137             sid = str(sid)
138             #print 'sid:%s' %sid
139             if 'sid' in sid:
140                 sid = sid.split()[1].split('=')[1]
141                 break
142         self.cookie.save(self.cookiefile)
143         
144         #請求地址
145         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username
146         #參數設定(var 變量是必須要的,否則就只能看到:<code>S_OK</code><messages>這類信息)
147         #這裏參數也是在firebug下查看的。
148         postdata = {
149             'func':'global:sequential',
150             'showAd':'false',
151             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
152             'uid':self.username,
153             'userType':'browser',
154             'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
155             }
156         postdata = urllib.urlencode(postdata)
157         
158         #組裝請求
159         req = urllib2.Request(
160             url = url,
161             data = postdata,
162             headers = self.header
163             )
164         res = urllib2.urlopen(req).read()
165 
166         liststr=str(res).split('<object>')#用object進行分割
167         inboxlistcount=len(liststr)-1#記錄郵件封數
168         inboxlistfile=open('inboxlistfile.txt','a')
169         t=0  #記錄當前第幾封信
170         for i in liststr:
171             if 'xml' in i and ' version=' in i:
172                 inboxlistfile.write('inbox 共'+str(inboxlistcount)+'')
173                 inboxlistfile.write('\n')
174             if 'name="id"' in i:
175                 t=t+1
176                 inboxlistfile.write(''+str(t)+'封:')
177                 inboxlistfile.write('\n')
178                 #寫入from
179                 beginnum=i.find('name="from"')
180                 endnum=i.find('</string>',beginnum)
181                 inboxlistfile.write('From:'+i[beginnum+12:endnum])
182                 inboxlistfile.write('\n')
183                 #寫入to
184                 beginnum=i.find('name="to"')
185                 endnum=i.find('</string>',beginnum)
186                 inboxlistfile.write('TO:'+i[beginnum+10:endnum])
187                 inboxlistfile.write('\n')
188                 #寫入subject
189                 beginnum=i.find('name="subject"')
190                 endnum=i.find('</string>',beginnum)
191                 inboxlistfile.write('Subject:'+i[beginnum+15:endnum])
192                 inboxlistfile.write('\n')
193                 #寫入date:
194                 beginnum=i.find('name="sentDate"')
195                 endnum=i.find('</date>',beginnum)
196                 inboxlistfile.write('Date:'+i[beginnum+16:endnum])
197                 inboxlistfile.write('\n')
198                 if 'name="read">true' in i:
199                     inboxlistfile.write('郵件狀態:已讀')
200                     inboxlistfile.write('\n')
201                 else:
202                     inboxlistfile.write('郵件狀態:未讀')
203                     inboxlistfile.write('\n')
204                 #寫用郵件尺寸
205                 beginnum=i.find('name="size"')
206                 endnum=i.find('</int>',beginnum)
207                 inboxlistfile.write('郵件尺寸:'+i[beginnum+12:endnum])
208                 inboxlistfile.write('\n')
209                 #寫入郵件編號,用於下載郵件
210                 beginnum=i.find('name="id"')
211                 endnum=i.find('</string>',beginnum)
212                 inboxlistfile.write('郵件編號:'+i[beginnum+10:endnum])
213                 inboxlistfile.write('\n\n')
214                 
215         inboxlistfile.close()
216                 
217         
218         
219 #Demo
220 print("Requesting......nn")
221 login = Login163('AAAAA@163.com','AAAAA')
222 flag = login.login()
223 if type(flag) is bool:
224     login.minbox()
225     #login.letterdown()
226     print("Successful landing,Resolved contacts......nn")
227     res = login.address_list()
228     for x in res:
229         print(x['email'])
230 else:
231     print(flag)
相關文章
相關標籤/搜索