經過Python程序模擬訪問北京預定掛號統一平臺,包括驗證碼識別、登錄、按醫院、時間、科室查詢可約號等。php
本程序僅爲學習使用,請勿用於其餘用途。html
def getCodePic(): randNum = random.random() url = "http://www.bjguahao.gov.cn/comm/code.php?id="+str(randNum) resp = urllib2.urlopen(url) tmp_pic="c:\\tmp.gif" open(tmp_pic,"wb").write(resp.read()) return tmp_pic
#識別 」http://www.bjguahao.gov.cn/comm/logon.php「驗證碼
#驗證碼特徵以下:
#1.驗證碼圖片大小爲38*15(寬*高);
#2.驗證碼包爲4個1-9的數字;
#3.數字大小爲6*8(寬*高),數字區域據距圖片(左上右下)的邊距分別爲(5,4,6,3),每一個數字間隔1個像素;
#4.數字顏色爲紅色,有粉色干擾噪點。
#
#識別方法:
#1.因爲驗證碼比較簡單且固定,可先獲取1-9每一個數字的樣本;
#2.提取驗證碼中的每一個數字,與樣本進行比對,獲取具體的數字。python
#1-9的數據樣本 yzm_keys={ 1:"001100011100101100001100001100001100001100111111", 2:"011110110011110011000011001110011000110000111111", 3:"011110110011000011011110000011000011110011011110", 4:"000011000111001111011011110011111111000011000011", 5:"111111110000111110110011000011000011110011011110", 6:"011110110011110000111110110011110011110011011110", 7:"111111000011000110000110001100001100011000011000", 8:"011110110011110011011110110011110011110011011110", 9:"011110110011110011110011011111000011110011011110", } #識別一個數字 def reg_one(im): ss="" for y in range(0,8): for x in range(0,6): v = im.getpixel((x,y)) if v==3: ss+="1" else: ss+='0' for i in range(1,10): if yzm_keys[i] == ss: return i print "reg failed." #傳入驗證碼圖片,識別4位驗證碼 def reg_yzm(f): im = Image.open(f) im1 = im.crop(( 5,4,11,12)) im2 = im.crop((12,4,18,12)) im3 = im.crop((19,4,25,12)) im4 = im.crop((26,4,32,12)) return "%d%d%d%d" %(reg_one(im1),reg_one(im2),reg_one(im3),reg_one(im4))
須要事先註冊。而後經過身份證,名字和驗證碼登錄。這裏須要考慮Cookies問題,在後面說明。cookie
def login(code): url = "http://www.bjguahao.gov.cn/comm/logon.php" req = urllib2.Request(url,urllib.urlencode({"sfzhm":"0000000000000000","truename":"張三","yzm":code})) #身份證和名字須要按實際狀況填寫 resp = urllib2.urlopen(req) res = resp.read() if len(res)==0: print "login success." else: print "error:",res
如下代碼查詢指定日期,航空總醫院的產科門診和產科專家門診的預定狀況。返回值爲html。session
def query(riqi): hpid='166' #航空總醫院 keid1='050142' #產科門診 keid2='050143' #產科專家 url = "http://www.bjguahao.gov.cn/comm/ghao.php" req1 = "%s?hpid=%s&keid=%s&date1=%s" %(url,hpid,keid1,riqi) resp1 = urllib2.urlopen(req1) req2 = "%s?hpid=%s&keid=%s&date1=%s" %(url,hpid,keid2,riqi) resp2 = urllib2.urlopen(req2) return resp1.read(),resp2.read()
經過解析查詢結果的HTML,輸出信息。app
def parse_print(html1,html2): #print "%10s %4s %4s %16s %16s %16s %4s %4s %4s %4s %4s" %('日期','星期','時間','科室','醫生','職稱','費用','專長','可掛','剩餘','狀態') seq1,seq2=parse_html.parse(html1) if len(seq1) == 11 : seq1.remove(seq1[7]) for i in seq1: print i, print "" if len(seq2) == 11 : seq2.remove(seq2[7]) for i in seq2: print i, print "" seq1,seq2=parse_html.parse(html2) if len(seq1) == 11 : seq1.remove(seq1[7]) for i in seq1: print i, print "" if len(seq2) == 11 : seq2.remove(seq2[7]) for i in seq2: print i, print ""
parse_html.py以下dom
#!/usr/bin/python # -*- coding: utf-8 -*- def parse_struct(s): stru=[] while len(s)>3: i1 = s.find("<td>") i2 = s.find("</td>") if i1<0 or i2<0: break t = s[i1+4:i2] stru.append(t) #print t s = s[i2+5:] return stru def parse(ss): tr_end_1 = ss.rfind("</tr>") tr_beg_1 = ss.rfind("<tr>") tr_end_2 = ss.rfind("</tr>",0,tr_beg_1) tr_beg_2 = ss.rfind("<tr>",0,tr_end_2) s1 = ss[tr_beg_2+5:tr_end_2] s2 = ss[tr_beg_1+5:tr_end_1] seq1= parse_struct(s1) seq2= parse_struct(s2) #for i in seq1: # print i return seq1,seq2 if __name__ == "__main__": parse(open("c:\\2014-04-23.html","rb").read())
這一部分還沒寫,原理基本差很少,生成要預定人的url,進行請求。會向手機發送一個驗證碼,而後輸入驗證碼,預定成功。學習
Cookies是啥?爲啥須要Cookies?這裏就不細說了,感興趣的能夠baidu之。這裏就說怎麼設置。ui
其實也很簡單,Python的urllib2模塊自己就支持設置Cookies的功能。url
cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) opener.addheaders = [ #('Host','www.bjguahao.gov.cn'), ('User-Agent','Mozilla/5.0 (Windows NT 6.1; rv:27.0) Gecko/20100101 Firefox/27.0'), #('Accept','image/png,image/*;q=0.8,*/*;q=0.5'), #('Accept-Language','zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3'), #('Accept-Encoding','gzip,deflate'), ('Referer','http://www.bjguahao.gov.cn/comm/index.html'), #('Connection','keep-alive'), #('Cookie','Hm_lvt_13e29334f151c8514bf6cf2533b9d9af=1395393461,1396936299,1397179537,1397461271; __c_review_45359=3; __c_last_45359=1397465208535; __c_visitor=1395393460566784; __c_sesslist_45359=drcwufogsr_cha%252Cdr9bf2510a_ch7%252Cch4; __c_today_45359=1; PHPSESSID=67003ba345132df2ef80474312c7b669; __c_pv_45359=21; __c_session_45359=1397461271515659; __c_session_at_45359=1397466158924; Hm_lpvt_13e29334f151c8514bf6cf2533b9d9af=1397465209') ] urllib2.install_opener(opener)注意Cookies要在程序已開始就設置好。