OpenCV+TensorFlow圖片手寫數字識別(附源碼)

時間 2019-11-10

標籤 opencv+tensorflow opencv tensorflow 圖片手寫數字識別源碼简体版

原文原文鏈接

初次接觸TensorFlow，而手寫數字訓練識別是其最基本的入門教程，網上關於訓練的教程不少，可是模型的測試大多都是官方提供的一些素材，能不能本身隨便寫一串數字讓機器識別出來呢？紙上得來終覺淺，帶着這個疑問昨晚研究了下，利用這篇文章來記錄下本身的一些心得！python

如下這個圖片是我隨機寫的一串數字，個人目標是利用訓練好的模型來識別出圖片裏面的手寫數字，開始實戰！網絡

2層卷積神經網絡的訓練：ide

from tensorflow.examples.tutorials.mnist import input_data
# 保存模型須要的庫
from tensorflow.python.framework.graph_util import convert_variables_to_constants
from tensorflow.python.framework import graph_util
# 導入其餘庫
import tensorflow as tf
import cv2
import numpy as np
# 獲取MINIST數據
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 建立會話
sess = tf.InteractiveSession()
# 佔位符
x = tf.placeholder("float", shape=[None, 784], name="Mul")
y_ = tf.placeholder("float", shape=[None, 10], name="y_")
# 變量
W = tf.Variable(tf.zeros([784, 10]), name='x')
b = tf.Variable(tf.zeros([10]), 'y_')
# 權重
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
# 誤差
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# 卷積
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
# 最大池化
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')

# 相關變量的建立
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
# 激活函數
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float", name='rob')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 用於訓練用的softmax函數
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2, name='res')
# 用於訓練做完後，做測試用的softmax函數
y_conv2 = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2, name="final_result")
# 交叉熵的計算，返回包含了損失值的Tensor。
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
# 優化器，負責最小化交叉熵
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
# 計算準確率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 初始化因此變量
sess.run(tf.global_variables_initializer())
# 保存輸入輸出，能夠爲以後用
tf.add_to_collection('res', y_conv)
tf.add_to_collection('output', y_conv2)
tf.add_to_collection('x', x)
# 訓練開始
for i in range(10000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g" % (i, train_accuracy))
# run()能夠看作輸入相關值給到函數中的佔位符，而後計算的出結果，這裏將batch[0]，給xbatch[1]給y_
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
# 將當前圖設置爲默認圖
graph_def = tf.get_default_graph().as_graph_def()
# 將上面的變量轉化成常量，保存模型爲pb模型時須要,注意這裏的final_result和前面的y_con2是同名，只有這樣纔會保存它，不然會報錯，
# 若是須要保存其餘tensor只須要讓tensor的名字和這裏保持一直便可
output_graph_def = tf.graph_util.convert_variables_to_constants(sess,graph_def, ['final_result'])
# 用saver 保存模型
saver = tf.train.Saver()
saver.save(sess, "model_data/model")

網絡訓練成功後在model_data文件夾裏有以下四個文件：函數

網絡模型的驗證可大體從如下三個部分來進行：
接下來就是要利用上面的圖片來測試咱們的模型。實際上圖像的預處理部分很關鍵，也就是如何準確的提取出上面圖像中的數字的區域，而且進行閾值分割，傳統的單一閾值分割很難達到要求，所以本次分割採用基於改進的Niblack的分割方法，你們有興趣能夠查閱相關的資料。
分割完了以後要標記連通區域，去除那些小點區域。找到其外接矩形，可認爲這個矩形區域就是咱們感興趣的區域。
降採樣爲28*28的大小來進行識別。
代碼部分以下所示：學習

"""
基於TensorFlow的手寫數字識別
Author_Zjh
2018/12/3
"""
import numpy as np
import cv2
import matplotlib.pyplot as plt
import imutils
import matplotlib.patches as mpatches
from skimage import data,segmentation,measure,morphology,color
import tensorflow as tf
class Number_recognition():
""" 模型恢復初始化"""
def __init__(self,img):
self.sess = tf.InteractiveSession()
saver = tf.train.import_meta_graph('model_data/model.meta')
saver.restore(self.sess, 'model_data/model') #模型恢復
# graph = tf.get_default_graph()
# 獲取輸入tensor,,獲取輸出tensor
self.input_x = self.sess.graph.get_tensor_by_name("Mul:0")
self.y_conv2 = self.sess.graph.get_tensor_by_name("final_result:0")
self.Preprocessing(img)#圖像預處理
def recognition(self,im):
im = cv2.resize(im, (28, 28), interpolation=cv2.INTER_CUBIC)
x_img = np.reshape(im, [-1, 784])
output = self.sess.run(self.y_conv2, feed_dict={self.input_x: x_img})
print('您輸入的數字是 %d' % (np.argmax(output)))
return np.argmax(output)#返回識別的結果

def Preprocessing(self,image):
if image.shape[0]>800:
image = imutils.resize(image, height=800) #若是圖像太大局部閾值分割速度會稍慢些，所以圖像太大時進行降採樣

img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # convert to gray picture
m1, n1 = img.shape
k = int(m1 / 19) + 1
l = int(n1 / 19) + 1
img = cv2.GaussianBlur(img, (3, 3), 0) # 高斯濾波
imm = img.copy()
# 基於Niblack的局部閾值分割法，對於提取文本類圖像分割效果比較好
for x in range(k):
for y in range(l):
s = imm[19 * x:19 * (x + 1), 19 * y:19 * (y + 1)]
me = s.mean() # 均值
var = np.std(s) # 方差
t = me * (1 - 0.2 * ((125 - var) / 125))
ret, imm[19 * x:19 * (x + 1), 19 * y:19 * (y + 1)] = cv2.threshold(
imm[19 * x:19 * (x + 1), 19 * y:19 * (y + 1)], t, 255, cv2.THRESH_BINARY_INV)
label_image = measure.label(imm) # 連通區域標記
for region in measure.regionprops(label_image): # 循環獲得每個連通區域屬性集
# 忽略小區域
if region.area < 100:
continue
minr, minc, maxr, maxc = region.bbox# 獲得外包矩形參數
cv2.rectangle(image, (minc, minr), (maxc, maxr), (0, 255, 0), 2)#繪製連通區域
im2 = imm[minr - 5:maxr + 5, minc - 5:maxc + 5] #得到感興趣區域，也即每一個數字的區域
number = self.recognition(im2)#進行識別
cv2.putText(image, str(number), (minc, minr - 10), 0, 2, (0, 0, 255), 2)#將識別結果寫在原圖上
cv2.imshow("Nizi", imm)
cv2.imshow("Annie", image)
cv2.waitKey(0)
if __name__=='__main__':
img = cv2.imread("num.jpg")
x=Number_recognition(img)