打開http://crm.oldboyedu.com/crm/grade/single/ 鼠標右鍵查看源代碼,能夠看到咱們須要post的name。以下:python
這裏須要在post試提交token和search_str的值,這裏的token應該使用時效性的。能夠一次獲取,短期內屢次使用。服務器
在沒有使用cookie時提交數據報錯:cookie
這裏須要加上cookie本地存儲。app
具體代碼以下:ide
#!/usr/bin/env python3 __author__ = 'DSOWASP' #encoding:UTF-8 import urllib import urllib.parse import http.cookiejar import urllib.request import prettytable import collections # urllib.request.urlopen(url) 返回 http.client.HTTPResponse 對象 # 設置一個cookie處理器,它負責從服務器下載cookie到本地,而且在發送請求時帶上本地的cookie cj = http.cookiejar.LWPCookieJar() cookies_support = urllib.request.HTTPCookieProcessor(cj) opener = urllib.request.build_opener(cookies_support,urllib.request.HTTPHandler) urllib.request.install_opener(opener) # get #打開登陸主頁面(他的目的是從頁面下載cookie,這樣咱們在再送post數據時就有cookie了,不然發送不成功) # 獲取網站內容 url = "http://crm.oldboyedu.com/crm/grade/single/" h = urllib.request.urlopen(url) # 獲取csrfmiddlewaretoken data = h.read().decode('UTF-8') for i in data.split('\n'): # <form class="col-xs-12 col-sm-10 col-sm-offset-1" method="post" action="/crm/grade/single/"><input type='hidden' name='csrfmiddlewaretoken' value='QKNMJKS49GvbVBdBzADwFT2HuHxIUUcP' /> if "csrfmiddlewaretoken" in i: i = i.strip().split('value') m = i[1].split('\'') token = m[1] #構造header,通常header至少要包含一下兩項。這兩項是從抓到的包裏分析得出的。 # header_dict={'User-Agent':\ # 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'} # qq列表,爲了隱私,這裏已將qq號刪除 qq_list = [ 'xxxxx','yyyyy',... ] # 學號列表 xuehao_list = [ '1','2','3','4','5', '6','7','8','9','10','11','13','14','15', '16','17','18','19','20','21','22','23', '24','25','26','27','28','29','30','31', '32','33','34','35','36','37','38','39' ] chengji_list = collections.OrderedDict() #構造Post數據,他也是從抓大的包裏分析得出的。 for qq in qq_list: post = {} post['search_str'] = qq post['csrfmiddlewaretoken'] = token # 成績列表默認爲空 chengji_list[qq] = [] #須要給Post數據編碼 post_data = urllib.parse.urlencode(post).encode(encoding="utf-8") # post # req = urllib.request.Request(url,data=post_data,headers=header_dict) req = urllib.request.Request(url,data=post_data) data2 = urllib.request.urlopen(req) # print(type(data2)) # <class 'http.client.HTTPResponse'> data = data2.read() data = data.decode('UTF-8') data = data.split('\n') # p_chegnji 若是遇到'<td>'則爲True,下一行則就成績。 p_chengji = False for i in data: i = i.strip() if len(i) != 0: if p_chengji == True: chengji_list[qq].append(i) p_chengji = False # 獲取成績的的代碼須要根據實際網頁放回的源碼而定。能夠右鍵查看源代碼來分析一種較好的方法獲取到想要的值。 if '<td>' == i: p_chengji = True # 上課天數或網頁上已展現的成績列數 l = len(chengji_list[qq_list[0]]) # 打印模塊頭部 PrettyTlist = ['學號','QQ'] # 根據已展現的天數來擴充打印模塊頭部 for i in range(l): i = i + 1 PrettyTlist.append('Day%d'%i) # 添加打印模塊頭部'總分' PrettyTlist.append('總分') # 實例化打印頭部 a = prettytable.PrettyTable(PrettyTlist) # 總成績列表,qq:總成績 sumc_chengji_list = {} for xueyuan,grade in chengji_list.items(): if grade != []: sumc = 0 for i in grade: if i == 'A+': sumc += 100 elif i == 'A': sumc += 90 elif i == 'B+': sumc += 85 elif i == 'B': sumc += 80 elif i == 'B-': sumc += 70 elif i == 'C+': sumc += 60 elif i == 'C': sumc += 50 elif i == 'C-': sumc += 40 else: sumc += 0 else: # 若是查不到學員的成績 sumc = 0 for i in range(l): chengji_list[xueyuan].append('N/A') chengji_list[xueyuan].append(sumc) sumc_chengji_list[xueyuan]=sumc # [(qq,總成績),()] # sorted,把items()的值給lamdba,asd[0] 爲qq號,asd[1]爲總成績,key表示排序的列,resverse 爲True表示降序 # sorted(dic,value,reverse) # dic爲比較函數,value 爲排序的對象(這裏指鍵或鍵值), # reverse:註明升序仍是降序,True--降序,False--升序(默認) sort_chengji_list = sorted(sumc_chengji_list.items(),key = lambda asd:asd[1],reverse = True) # 生成打印列表 for xueyuan,grade in sort_chengji_list: add_row = [xuehao_list[qq_list.index(xueyuan)],xueyuan] for i in chengji_list[xueyuan]: add_row.append(i) a.add_row(add_row) print(a)
輸出結果:函數