圖片驗證碼預處理

時間 2019-11-06
標籤圖片驗證碼預處理简体版
原文原文鏈接
from copy import deepcopyfrom PIL import Imageimport numpy as npfrom collections import Counterimport hashlibdef fileMd5(filePath):    with open(filePath, 'rb') as f:        a = hashlib.md5()        a.update(f.read())        return a.hexdigest()# 圖片只有背景色與文字色，兩個顏色的處理  二值化處理,圖片顏色越單調，效果越好# 可用來處理大部分彩色圖片，非干擾線def parseImg_1(filePath):    img = Image.open(filePath)    lim = img.convert('L')    data = np.asarray(lim)    onea = data.reshape(np.multiply(*data.shape))    d_dict = dict(Counter(onea))    keys = list(d_dict.keys())    sort_ = lambda x: d_dict.get(x)    keys.sort(key=sort_, reverse=True)    ress = []    for z in keys:        if not ress:            ress.append(int(z))        elif abs(z - ress[0]) > 50:            ress.append(int(z))            break    threshold = np.mean(ress)    dd = np.where(data > threshold, 255, 0)    img2 = Image.fromarray(np.uint8(dd))    img2.save('parsed.jpg')# 背景色轉白色def parseImg_2(filePath, transNum=35):    """    :param filePath: 圖片路徑    :param transNum: 根據背景色的多少，若只有一種 ，則寫1 便可    :return:    """    img = Image.open(filePath)    img = img.convert('L')    data = np.asarray(img)    ddd = data.reshape(np.multiply(*data.shape))    k = dict(Counter(ddd))    keys = list(k.keys())    sort_ = lambda x: k.get(x)    keys.sort(key=sort_, reverse=True)    # zh = keys[0]    for ind in range(transNum):        data = np.where(data == keys[ind], 255, data)    # data = np.where(data>,data,255)    img2 = Image.fromarray(np.uint8(data))    img2.save('parsed.jpg')# 干擾線降噪 適合處理較細的線，若是線太粗則效果很糟糕 , 處理一次每每得不到最好的效果# 也可用於去除噪點 ，太粗的點沒法去除def parseImg_3(filePath):    img = Image.open(filePath)    img = img.convert('L')    img = np.asarray(img)    # img = np.ndarray(img)    ano = np.ndarray(shape=img.shape)    h, w = img.shape[:2]    # ！！！opencv矩陣點是反的    # img[1,2] 1:圖片的高度，2：圖片的寬度    for y in range(1, w - 1):        for x in range(1, h - 1):            count = 0            if img[x, y - 1] > 150:                count = count + 1            if img[x, y + 1] > 150:                count = count + 1            if img[x - 1, y] > 150:                count = count + 1            if img[x + 1, y] > 150:                count = count + 1            if count > 2:                # img[x, y] = 255                ano[x, y] = 255            else:                ano[x, y] = img[x, y]    ano = np.where(ano > 150, 255, 0)    img2 = Image.fromarray(np.uint8(ano))    img2.save(filePath)# 干擾線降噪， 屢次調用 第三個處理方法，直處處理乾淨爲止def parseImg_3_wanshanbanben(filepath):    first = fileMd5(filepath)    while True:        parseImg_3(filepath)        second = fileMd5(filepath)        if first == second:            break        else:            first = second# 空心轉實心算法def parseImg_4(filePath):    img = Image.open(filePath)    img = img.convert('L')    img = np.asarray(img)    img = np.where(img > 200, 255, 0)    duizhao = np.zeros(shape=img.shape)    w,h = img.shape    # print(w,h)    isWhite = lambda x: img[x[0],x[1]]==255    startZuobiao = [[0, 0],[0,h],[w,0],[w,h] ]    hasPanduan = set()    hasPed = lambda x: '*'.join([str(item) for item in x]) in hasPanduan    hasPed_add = lambda x: hasPanduan.add('*'.join([str(item) for item in x]))    totalLen = deepcopy(len(hasPanduan))    # print(totalLen)    while True:        for zuo in startZuobiao:            x,y = zuo            for i,j in [                [x,y+1],[x,y-1],[x+1,y],[x-1,y] ,[x-1,y-1],[x-1,y+1],[x+1,y-1],[x+1,y+1]            ]:                if 0<=i<=w-1 and 0<=j<=h-1:                    if not hasPed([i,j]) and    isWhite([i,j]):                        duizhao[i,j] = 255                        startZuobiao.append([i,j])                    hasPed_add([i,j])        if len(hasPanduan) == totalLen:            # print(totalLen)            break        else:            totalLen = deepcopy(len(hasPanduan))    img2 = Image.fromarray(np.uint8(duizhao))    img2.save('parsed.jpg')if __name__ == '__main__':    # parseImg_1('pin.png')    # parseImg_1('q.jpg')    # # parseImg_2('ws.jpg')    # # parseImg_3('parsed.jpg')    # parseImg_3_wanshanbanben('parsed.jpg')    parseImg_4('pin.png')    # parseImg_1('pin2.png')    # parseImg_3_wanshanbanben('parsed.jpg')
相關標籤/搜索
每日一句
每一个你不满意的现在，都有一个你没有努力的曾经。