Python-圖片文字識別

時間 2020-04-18

原文原文鏈接

　　百度AI接口（手寫文字識別）：https://ai.baidu.com/docs#/OCR-API/9ef46660css

　　實現效果:html

　　步驟一：接入接口python

　　進入上述網站申請帳號,而後運行相關代碼，獲取 access_token 即算完成（因爲百度json每30天更新一次，故代碼中進行日期更新了的，如何獲取accss_token也可見代碼）json

　　步驟二：功能介紹:用戶輸入的圖片路徑可爲網絡上的url，也可爲本機上的地址，爲圖省事，圖片名稱爲 ValidateCode.jpg ，因爲本人接入的的百度AI接口的手寫文字識別，因此通常的驗證碼應該均可以經過，若是想加入其它功能，那麼返回json數據就會有所改變，具體能夠見API接口，本人是爲了簡化理解百度文檔介紹網絡

　　　　 1 request.urlretrieve(imagepath, 'ValidateCode.jpg') # 下載圖片 app

　　　　更新access_json：由於百度API規定：30天更新一次，因此我就把時間提早了。(別亂搞個人密鑰呀，我也是爲了分享呀QAQ)ide

 2　　　def accesjson():  3     flag = 0  4     fromtime = 1546061002    #起始時間
 5     nowtime = int(time.time())  6 
 7     #2592000剛好爲30天，故提早
 8     if nowtime - fromtime > 2000000:  9         flag = 1
10         gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) 11         # client_id 爲官網獲取的AK， client_secret 爲官網獲取的SK
12         host = 'https://aip.baidubce.com/oauth/2.0/token?grant_' \ 13                'type=client_credentials&client_id=Ooj730ZD0Rm7E1dmcPwoZX9s&client_secret=dr5T1icZGqK8ZFyTr4wi2AWbtNKMIsNd'
14         req = request.Request(host) 15         response = request.urlopen(req, context=gcontext).read().decode('UTF-8') 16         result = json.loads(response) 17     if flag == 1: 18         return result 19     else: 20         return None

　　　　圖片正式識別：注意，接入功能不同，放回json數據不同，具體看返回json就明白了post

 1 #返回圖片驗證碼
 2 def vercode():  3     f = open('ValidateCode.jpg', 'rb')  4     img = base64.b64encode(f.read())  5     #不一樣百度API接口不同，傳遞參數不同，返回json也不同
 6     host = 'https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting'
 7     headers = {  8         'Content-Type': 'application/x-www-form-urlencoded'
 9  } 10     #更換json
11     if accesjson() == None: 12         access_token = '24.18591b2e4c97956e0f830db9f66e5373.2592000.1548646630.282335-15301065'
13     else: 14         access_token = accesjson() 15         print('已更換最新json，歡迎繼續使用!') 16     host = host + '?access_token=' + access_token 17 
18     data = {} 19     data['access_token'] = access_token 20     data['image'] = img 21     res = requests.post(url=host, headers=headers, data=data) 22     req = res.json() 23     return req['words_result'][0]['words']

　　　　完整代碼：目前可實現的功能就是網絡上面的文字圖片識別，或本機圖片識別（和以前的抖音圖片加載相似。）網站

　　　　拓展：https://ai.qq.com/ (啥B騰訊的API接口，全是PHP，用都知道怎麼用，涼涼。)url

 1 #!/usr/bin/env python 
 2 # -*- coding: utf-8 -*- 
 3 # @Time : 2018/12/29 10:48 
 4 # @Author : Empirefree 
 5 # @File : 17-2-驗證碼.py 
 6 # @Software: PyCharm Community Edition
 7 
 8 import base64  9 import requests 10 from urllib import request 11 import os 12 import ssl 13 import json 14 import time 15 import re 16 
17 def IsHttp(imagepath): 18     if re.search('http', imagepath) != None: 19         return 1
20     else: 21         return 0 22 
23 #下載驗證碼
24 def downloadpic(imagepath): 25     # imagepath = "http://210.42.38.26:84/jwc_glxt/ValidateCode.aspx"
26     if IsHttp(imagepath): 27         request.urlretrieve(imagepath, 'ValidateCode.jpg')  # 下載圖片
28 
29     print(os.path.abspath('ValidateCode.jpg')) 30 
31 #百度限制，每30天更換一次access_json
32 def accesjson(): 33     flag = 0 34     fromtime = 1546061002    #起始時間
35     nowtime = int(time.time()) 36 
37     #2592000剛好爲30天，故提早
38     if nowtime - fromtime > 2000000: 39         flag = 1
40         gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) 41         # client_id 爲官網獲取的AK， client_secret 爲官網獲取的SK
42         host = 'https://aip.baidubce.com/oauth/2.0/token?grant_' \ 43                'type=client_credentials&client_id=Ooj730ZD0Rm7E1dmcPwoZX9s&client_secret=dr5T1icZGqK8ZFyTr4wi2AWbtNKMIsNd'
44         req = request.Request(host) 45         response = request.urlopen(req, context=gcontext).read().decode('UTF-8') 46         result = json.loads(response) 47     if flag == 1: 48         return result 49     else: 50         return None 51 
52 #返回圖片驗證碼
53 def vercode(): 54     f = open('ValidateCode.jpg', 'rb') 55     img = base64.b64encode(f.read()) 56     #不一樣百度API接口不同，傳遞參數不同，返回json也不同
57     host = 'https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting'
58     headers = { 59         'Content-Type': 'application/x-www-form-urlencoded'
60  } 61     #更換json
62     if accesjson() == None: 63         access_token = '24.18591b2e4c97956e0f830db9f66e5373.2592000.1548646630.282335-15301065'
64     else: 65         access_token = accesjson() 66         print('已更換最新json，歡迎繼續使用!') 67     host = host + '?access_token=' + access_token 68 
69     data = {} 70     data['access_token'] = access_token 71     data['image'] = img 72     res = requests.post(url=host, headers=headers, data=data) 73     req = res.json() 74     return req['words_result'][0]['words'] 75 
76 def checkcode(): 77     imagepath = input('請輸入您的圖片路徑: ') 78  downloadpic(imagepath) 79     str = vercode() 80     return str 81 
82 if __name__ == '__main__': 83 
84     str = checkcode() 85     print(str)