[TOC]python
學習tensorflow有一段時間了,想作點東西來練一下手。爲了更有意思點,下面將搭建一個簡單的驗證碼識別系統。git
下面將生成一萬張四位英文字母的驗證碼,驗證碼的大小是100 * 30的圖片,只包含大寫的英文字母,並將目標值保存到csv文件。app
import random import pandas as pd from PIL import Image, ImageDraw, ImageFont def generate_captcha(filename, format): """ 生成四位驗證碼 :param filename: 要保存的文件名 :param format: 保存圖片格式 :return: 驗證碼的值 """ # 定義使用Image類實例化一個長爲100px,寬爲30px,基於RGB的(255,255,255)顏色的圖片 img = Image.new(mode="RGB", size=(100, 30), color=(255, 255, 255)) # 實例化一支畫筆 draw = ImageDraw.Draw(img, mode="RGB") # 定義要使用的字體 font = ImageFont.truetype("arial", 28) result = "" for i in range(4): # 每循環一次,從a到z中隨機生成一個字母 # 65到90爲字母的ASCII碼,使用chr把生成的ASCII碼轉換成字符 # str把生成的數字轉換成字符串 char = random.choice([chr(random.randint(65, 90))]) result += char # 每循環一次從新生成隨機顏色 color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) # 把生成的字母或數字添加到圖片上 # 圖片長度爲100px,要生成4個數字或字母則每添加一個,其位置就要向後移動24px draw.text([i * 24 + 3, 0], char, color, font=font) # 保存生成的文件 with open(filename, "wb") as f: img.save(f, format=format) return result if __name__ == "__main__": data = [] # 生成10000張驗證碼圖片,並將目標值存入csv文件 for j in range(10000): val = generate_captcha("./pics/{}.png".format(j), "png") data.append([val]) # 將驗證碼的值保存到csv文件 df = pd.DataFrame(data, columns=['label']) df.to_csv('./pics/data.csv', header=False)
生成的驗證碼圖片是這樣子的,以下:dom
csv文件內容:函數
0,EFGQ 1,ZDKO 2,UWLD 3,CPDH ....
上面生成的圖片和其目標值是分開的,在進行訓練時不太方便(訓練時每次都要單獨的讀取圖片和特徵值)。保存爲tfrecords文件,在訓練時會方便不少,讀取出來的每條記錄既有圖片特徵值又有目標值。學習
import tensorflow as tf import os import numpy as np class CaptchaInput(object): def __init__(self, captcha_dir, letter, tfrecords_dir): """ :param captcha_dir: 驗證碼路徑 :param letter: 驗證碼字符種類 :param tfrecords_dir: tfrecords文件保存的目錄 """ self.captcha_dir = captcha_dir self.letter = letter self.tfrecords_dir = tfrecords_dir # 列出圖片文件,並進行排序 self.file_list = os.listdir(self.captcha_dir) self.file_list = [i for i in self.file_list if i.endswith(".png")] self.file_list.sort(key=lambda x: int(x[0:-4])) self.file_list = [os.path.join(self.captcha_dir, i) for i in self.file_list] # 標籤文件路徑 self.labels_path = os.path.join(self.captcha_dir, "data.csv") def read_captcha_image(self): """讀取驗證碼圖片數據""" # 構造文件隊列 file_queue = tf.train.string_input_producer(self.file_list, shuffle=False) # 構建閱讀器 reader = tf.WholeFileReader() # 讀取圖片內容 key, value = reader.read(file_queue) # 解碼圖片 image = tf.image.decode_png(value) image.set_shape([30, 100, 3]) # 批量讀取 image_batch = tf.train.batch([image], batch_size=len(self.file_list), num_threads=1, capacity=len(self.file_list)) return image_batch def read_captcha_label(self): """讀取 驗證碼標籤數據""" # 構造文件隊列 file_queue = tf.train.string_input_producer([self.labels_path], shuffle=False) # 構建文件閱讀器 reader = tf.TextLineReader() # 讀取標籤內容 key, value = reader.read(file_queue) records = [[0], [""]] index, label = tf.decode_csv(value, record_defaults=records) # 批量讀取 label_batch = tf.train.batch([label], batch_size=len(self.file_list), num_threads=1, capacity=len(self.file_list)) return label_batch def process_labels(self, labels): """將標籤字符轉換成數字張量""" # 構建字符索引 num_letter_dict = dict(enumerate(list(self.letter))) letter_num_dict = dict(zip(num_letter_dict.values(), num_letter_dict.keys())) ret = [] for label in labels: arr = [letter_num_dict[i] for i in label.decode("utf-8")] ret.append(arr) return np.array(ret) def write_to_tfrecords(self, images, labels): """ 將圖片和標籤寫入到tfrecords文件中 :param images: 特徵值 :param labels: 目標值 :return: """ # labels = tf.cast(labels, tf.uint8) # images = tf.cast(images, tf.uint8) # 創建存儲文件 fw = tf.python_io.TFRecordWriter(self.tfrecords_dir) for i in range(len(self.file_list)): # images[i]爲numpy.ndarray image_bytes = images[i].tobytes() # labels[i]爲numpy.ndarray label_bytes = labels[i].tobytes() example = tf.train.Example(features=tf.train.Features(feature={ "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes])), "label": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label_bytes])) })) print("保存第%d張圖片" % (i, )) fw.write(example.SerializeToString()) # 關閉 fw.close() def execute(self): image_batch = self.read_captcha_image() label_batch = self.read_captcha_label() with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord=coord) # [b'EFGQ' b'ZDKO' b'UWLD' ... b'TKPD' b'ZZEU' b'ATYA'] labels = sess.run(label_batch) # labels爲numpy.ndarray labels = self.process_labels(labels) # images爲numpy.ndarray images = sess.run(image_batch) self.write_to_tfrecords(images, labels) coord.request_stop() coord.join(threads) FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("captcha_dir", "./pics", "驗證碼圖片路徑") tf.app.flags.DEFINE_string("letter", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "驗證碼字符種類") tf.app.flags.DEFINE_string("tfrecords_dir", "./tfrecords/captcha.tfrecords", "驗證碼tfrecords文件") if __name__ == "__main__": c = CaptchaInput(FLAGS.captcha_dir, FLAGS.letter, FLAGS.tfrecords_dir) c.execute()
須要注意:字體
os.listdir
返回的文件名稱的順序是按照ascii表的順序(1.png, 10.png...)須要對其進行排序shuffle=False
,避免文件亂序了,圖片和目標值對應不上。import tensorflow as tf FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string("captcha_dir", "./tfrecords/captcha.tfrecords", "驗證碼數據文件") tf.app.flags.DEFINE_integer("batch_size", 100, "每批次訓練樣本數") def read_and_decode(): """讀取驗證碼數據 :return image_batch, label_batch """ # 文件隊列 file_queue = tf.train.string_input_producer([FLAGS.captcha_dir]) # 文件讀取器 reader = tf.TFRecordReader() # 讀取內容 key, value = reader.read(file_queue) # 解析tfrecords features = tf.parse_single_example(value, features={ "image": tf.FixedLenFeature([], tf.string), "label": tf.FixedLenFeature([], tf.string) }) # 解碼 image = tf.decode_raw(features["image"], tf.uint8) label = tf.decode_raw(features["label"], tf.uint8) # print(image, label) # 改變形狀 image_reshape = tf.reshape(image, [30, 100, 3]) label_reshape = tf.reshape(label, [4]) # print(image_reshape, label_reshape) # 批處理 image_batch, label_batch = tf.train.batch([image_reshape, label_reshape], batch_size=FLAGS.batch_size, num_threads=1, capacity=FLAGS.batch_size) return image_batch, label_batch def weight_variables(shape): """權重初始化函數""" w = tf.Variable(tf.random_normal(shape=shape, mean=0.0, stddev=1.0)) return w def bias_variables(shape): """偏置初始化函數""" b = tf.Variable(tf.constant(0.0, shape=shape)) return b def fc_model(image): """全鏈接模型""" with tf.variable_scope("fc_model"): image_reshape = tf.reshape(image, [-1, 30 * 100 * 3]) # 隨機初始化權重和偏重 weights = weight_variables([30 * 100 * 3, 4 * 26]) bias = bias_variables([4 * 26]) # 全鏈接計算 y_predict = tf.matmul(tf.cast(image_reshape, tf.float32), weights) + bias return y_predict def label_to_onehot(label): """目標值轉換成one-hot編碼""" label_onehot = tf.one_hot(label, depth=26, on_value=1.0, axis=2) return label_onehot def captcharec(): """驗證碼識別""" image_batch, label_batch = read_and_decode() # [100, 104] y_predict = fc_model(image_batch) y_true = label_to_onehot(label_batch) # softmax計算,交叉熵損失計算 with tf.variable_scope("soft_cross"): loss = tf.nn.softmax_cross_entropy_with_logits( labels=tf.reshape(y_true, [-1, 4 * 26]), logits=y_predict ) # 梯度降低損失優化 with tf.variable_scope("optimizer"): train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss) # 準確率 with tf.variable_scope("acc"): equal_list = tf.equal(tf.argmax(y_true, 2), tf.argmax(tf.reshape(y_predict, [-1, 4, 26]), 2)) accuracy = tf.reduce_mean(tf.cast(equal_list, tf.float32)) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord=coord) for i in range(3000): sess.run(train_op) print("第%d次訓練的準確率爲:%f" % (i, accuracy.eval())) coord.request_stop() coord.join(threads) if __name__ == '__main__': captcharec()