python 識別身份證號碼

時間 2019-12-09

原文原文鏈接

# !/usr/bin/python
# -*-coding:utf-8-*-
import sys

import time

time1 = time.time()
from PIL import Image
import pytesseract


###########二值化算法
def binarizing(img, threshold):
    pixdata = img.load()
    w, h = img.size
    for y in range(h):
        for x in range(w):
            if pixdata[x, y] < threshold:
                pixdata[x, y] = 0
            else:
                pixdata[x, y] = 255
    return img


###########去除干擾線算法
def depoint(img):  # input: gray image
    pixdata = img.load()
    w, h = img.size
    for y in range(1, h - 1):
        for x in range(1, w - 1):
            count = 0
            if pixdata[x, y - 1] > 245:
                count = count + 1
            if pixdata[x, y + 1] > 245:
                count = count + 1
            if pixdata[x - 1, y] > 245:
                count = count + 1
            if pixdata[x + 1, y] > 245:
                count = count + 1
            if count > 2:
                pixdata[x, y] = 255
    return img


########身份證號碼識別
def identity_OCR(pic_path):
    #####身份證號碼截圖
    img1 = Image.open(pic_path)
    w, h = img1.size
    ##將身份證放大3倍
    out = img1.resize((w * 3, h * 3), Image.ANTIALIAS)
    region = (125 * 3, 200 * 3, 370 * 3, 250 * 3)
    # 裁切身份證號碼圖片
    cropImg = out.crop(region)
    # 轉化爲灰度圖
    img = cropImg.convert('L')
    # 把圖片變成二值圖像。
    img1 = binarizing(img, 100)
    img2 = depoint(img)
    code = pytesseract.image_to_string(img2)
    print("識別該身份證號碼是:" + str(code))


if __name__ == '__main__':
    pic_path = "./1.png"
    identity_OCR(pic_path)
    time2 = time.time()
    print(u'總共耗時：' + str(time2 - time1) + 's')

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。