1、手動輸入php
1 import requests 2 from bs4 import BeautifulSoup 3 4 '''手動輸入驗證碼:用瀏覽器登陸古詩文網,抓包獲取登陸接口和form表單; 5 將驗證碼圖片下載,輸入驗證碼,加入form表單; 6 獲取form表單所需參數; 7 帶着form表單發送登陸請求 8 注:須要創建會話''' 9 10 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) ' 11 'AppleWebKit/537.36 (KHTML, like Gecko) ' 12 'Chrome/73.0.3683.86 Safari/537.36'} 13 14 def download_code(s): 15 #向登陸界面發送請求 16 url = 'https://so.gushiwen.org/user/login.aspx?from=' 17 r = s.get(url,headers=headers) 18 19 #解析獲取驗證碼圖片連接 20 soup = BeautifulSoup(r.text,'lxml') 21 img_src = soup.find('img',id='imgCode')['src'] 22 img_url = 'https://so.gushiwen.org' + img_src 23 print(img_url) 24 25 #向圖片連接發送請求,下載圖片 26 r_img = s.get(img_url,headers=headers) 27 with open('img/code.png','wb') as fp: 28 fp.write(r_img.content) 29 30 #查找form表單所需的兩個參數 31 __VIEWSTATE = soup.find('input',id='__VIEWSTATE')['value'] 32 __VIEWSTATEGENERATOR = soup.find('input',id='__VIEWSTATEGENERATOR')['value'] 33 34 return __VIEWSTATE,__VIEWSTATEGENERATOR 35 36 def login(s,VIEW,VIEWG): 37 post_url = 'https://so.gushiwen.org/user/login.aspx?from=' 38 39 code = input('輸入驗證碼:') 40 41 form_data = {'__VIEWSTATE':VIEW, 42 '__VIEWSTATEGENERATOR': VIEWG, 43 'code':code, 44 'denglu':'登陸', 45 'email':'18404904721', 46 'from':'', 47 'pwd':'gjp625262'} 48 49 r = s.post(url=post_url,headers=headers,data=form_data) 50 51 with open('gushi.html','w',encoding='utf8') as fp: 52 fp.write(r.text) 53 54 def main(): 55 #建立會話 56 s = requests.Session() 57 58 #下載驗證碼 59 VIEW,VIEWG = download_code(s) 60 61 #進行登陸 62 login(s,VIEW,VIEWG) 63 64 65 if __name__ == '__main__': 66 main()
2、tesseract光學識別html
from PIL import Image import pytesseract '''下載tesseract pip install pytesseract''' #打開圖片 img = Image.open(r'img/code.png') #轉化爲灰度圖片 img = img.convert('L') #二值化處理 threshold = 140 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) out = img.point(table,'1') out.show() img = img.convert('RGB') #識別圖片 print(pytesseract.image_to_string(img))
3、打碼平臺(雲打碼)json
from YDMHTTPDemo3 import YDMHttp '''打碼平臺:雲打碼 打碼兔''' ###################################################################### # 用戶名 username = 'mianxiang_mei' # 密碼 password = 'gjp625262' # 軟件ID,開發者分紅必要參數。登陸開發者後臺【個人軟件】得到! appid = 8212 # 軟件密鑰,開發者分紅必要參數。登陸開發者後臺【個人軟件】得到! appkey = 'dbd2645a635701a0a9f19fd0072d82c3' # 圖片文件 filename = 'img/code.png' # 驗證碼類型,# 例:1004表示4位字母數字,不一樣類型收費不一樣。請準確填寫,不然影響識別率。在此查詢全部類型 http://www.yundama.com/price.html codetype = 1004 # 超時時間,秒 timeout = 60 # 檢查 if (username == 'username'): print('請設置好相關參數再測試') else: # 初始化 yundama = YDMHttp(username, password, appid, appkey) # 登錄雲打碼 uid = yundama.login(); print('uid: %s' % uid) # 查詢餘額 balance = yundama.balance(); print('balance: %s' % balance) # 開始識別,圖片路徑,驗證碼類型ID,超時時間(秒),識別結果 cid, result = yundama.decode(filename, codetype, timeout); print('cid: %s, result: %s' % (cid, result)) ######################################################################
附:雲打碼調用的類api
import http.client, mimetypes, urllib, json, time, requests ###################################################################### class YDMHttp: apiurl = 'http://api.yundama.com/api.php' username = '' password = '' appid = '' appkey = '' def __init__(self, username, password, appid, appkey): self.username = username self.password = password self.appid = str(appid) self.appkey = appkey def request(self, fields, files=[]): response = self.post_url(self.apiurl, fields, files) response = json.loads(response) return response def balance(self): data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} response = self.request(data) if (response): if (response['ret'] and response['ret'] < 0): return response['ret'] else: return response['balance'] else: return -9001 def login(self): data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} response = self.request(data) if (response): if (response['ret'] and response['ret'] < 0): return response['ret'] else: return response['uid'] else: return -9001 def upload(self, filename, codetype, timeout): data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)} file = {'file': filename} response = self.request(data, file) if (response): if (response['ret'] and response['ret'] < 0): return response['ret'] else: return response['cid'] else: return -9001 def result(self, cid): data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid)} response = self.request(data) return response and response['text'] or '' def decode(self, filename, codetype, timeout): cid = self.upload(filename, codetype, timeout) if (cid > 0): for i in range(0, timeout): result = self.result(cid) if (result != ''): return cid, result else: time.sleep(1) return -3003, '' else: return cid, '' def report(self, cid): data = {'method': 'report', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid), 'flag': '0'} response = self.request(data) if (response): return response['ret'] else: return -9001 def post_url(self, url, fields, files=[]): for key in files: files[key] = open(files[key], 'rb'); res = requests.post(url, files=files, data=fields) return res.text