使用tensorflow搭建本身的驗證碼識別系統

[TOC]python

學習tensorflow有一段時間了,想作點東西來練一下手。爲了更有意思點,下面將搭建一個簡單的驗證碼識別系統。git

準備驗證碼數據

下面將生成一萬張四位英文字母的驗證碼,驗證碼的大小是100 * 30的圖片,只包含大寫的英文字母,並將目標值保存到csv文件。app

import random
import pandas as pd
from PIL import Image, ImageDraw, ImageFont


def generate_captcha(filename, format):
    """
    生成四位驗證碼
    :param filename: 要保存的文件名
    :param format: 保存圖片格式
    :return: 驗證碼的值
    """
    # 定義使用Image類實例化一個長爲100px,寬爲30px,基於RGB的(255,255,255)顏色的圖片
    img = Image.new(mode="RGB", size=(100, 30), color=(255, 255, 255))
    # 實例化一支畫筆
    draw = ImageDraw.Draw(img, mode="RGB")
    # 定義要使用的字體
    font = ImageFont.truetype("arial", 28)

    result = ""

    for i in range(4):
        # 每循環一次,從a到z中隨機生成一個字母
        # 65到90爲字母的ASCII碼,使用chr把生成的ASCII碼轉換成字符
        # str把生成的數字轉換成字符串
        char = random.choice([chr(random.randint(65, 90))])
        result += char

        # 每循環一次從新生成隨機顏色
        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

        # 把生成的字母或數字添加到圖片上
        # 圖片長度爲100px,要生成4個數字或字母則每添加一個,其位置就要向後移動24px
        draw.text([i * 24 + 3, 0], char, color, font=font)

    # 保存生成的文件
    with open(filename, "wb") as f:
        img.save(f, format=format)

    return result


if __name__ == "__main__":

    data = []

    # 生成10000張驗證碼圖片,並將目標值存入csv文件
    for j in range(10000):
        val = generate_captcha("./pics/{}.png".format(j), "png")
        data.append([val])

    # 將驗證碼的值保存到csv文件
    df = pd.DataFrame(data, columns=['label'])
    df.to_csv('./pics/data.csv', header=False)

生成的驗證碼圖片是這樣子的,以下:dom

csv文件內容:函數

0,EFGQ
1,ZDKO
2,UWLD
3,CPDH
....

保存爲tfrecords文件

上面生成的圖片和其目標值是分開的,在進行訓練時不太方便(訓練時每次都要單獨的讀取圖片和特徵值)。保存爲tfrecords文件,在訓練時會方便不少,讀取出來的每條記錄既有圖片特徵值又有目標值。學習

import tensorflow as tf
import os
import numpy as np


class CaptchaInput(object):

    def __init__(self, captcha_dir, letter, tfrecords_dir):
        """
        :param captcha_dir: 驗證碼路徑
        :param letter: 驗證碼字符種類
        :param tfrecords_dir: tfrecords文件保存的目錄
        """
        self.captcha_dir = captcha_dir
        self.letter = letter
        self.tfrecords_dir = tfrecords_dir

        # 列出圖片文件,並進行排序
        self.file_list = os.listdir(self.captcha_dir)
        self.file_list = [i for i in self.file_list if i.endswith(".png")]
        self.file_list.sort(key=lambda x: int(x[0:-4]))
        self.file_list = [os.path.join(self.captcha_dir, i) for i in self.file_list]

        # 標籤文件路徑
        self.labels_path = os.path.join(self.captcha_dir, "data.csv")

    def read_captcha_image(self):
        """讀取驗證碼圖片數據"""
        # 構造文件隊列
        file_queue = tf.train.string_input_producer(self.file_list, shuffle=False)

        # 構建閱讀器
        reader = tf.WholeFileReader()

        # 讀取圖片內容
        key, value = reader.read(file_queue)
        # 解碼圖片
        image = tf.image.decode_png(value)
        image.set_shape([30, 100, 3])

        # 批量讀取
        image_batch = tf.train.batch([image], batch_size=len(self.file_list),
                                     num_threads=1, capacity=len(self.file_list))
        return image_batch

    def read_captcha_label(self):
        """讀取 驗證碼標籤數據"""
        # 構造文件隊列
        file_queue = tf.train.string_input_producer([self.labels_path], shuffle=False)

        # 構建文件閱讀器
        reader = tf.TextLineReader()

        # 讀取標籤內容
        key, value = reader.read(file_queue)

        records = [[0], [""]]
        index, label = tf.decode_csv(value, record_defaults=records)

        # 批量讀取
        label_batch = tf.train.batch([label], batch_size=len(self.file_list),
                                     num_threads=1, capacity=len(self.file_list))

        return label_batch

    def process_labels(self, labels):
        """將標籤字符轉換成數字張量"""
        # 構建字符索引
        num_letter_dict = dict(enumerate(list(self.letter)))
        letter_num_dict = dict(zip(num_letter_dict.values(), num_letter_dict.keys()))

        ret = []

        for label in labels:
            arr = [letter_num_dict[i] for i in label.decode("utf-8")]
            ret.append(arr)

        return np.array(ret)

    def write_to_tfrecords(self, images, labels):
        """
        將圖片和標籤寫入到tfrecords文件中
        :param images: 特徵值
        :param labels: 目標值
        :return:
        """
        # labels = tf.cast(labels, tf.uint8)
        # images = tf.cast(images, tf.uint8)

        # 創建存儲文件
        fw = tf.python_io.TFRecordWriter(self.tfrecords_dir)
        for i in range(len(self.file_list)):
            # images[i]爲numpy.ndarray
            image_bytes = images[i].tobytes()
            # labels[i]爲numpy.ndarray
            label_bytes = labels[i].tobytes()

            example = tf.train.Example(features=tf.train.Features(feature={
                "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes])),
                "label": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label_bytes]))
            }))

            print("保存第%d張圖片" % (i, ))

            fw.write(example.SerializeToString())

        # 關閉
        fw.close()

    def execute(self):
        image_batch = self.read_captcha_image()
        label_batch = self.read_captcha_label()

        with tf.Session() as sess:

            coord = tf.train.Coordinator()

            threads = tf.train.start_queue_runners(sess, coord=coord)

            # [b'EFGQ' b'ZDKO' b'UWLD' ... b'TKPD' b'ZZEU' b'ATYA']
            labels = sess.run(label_batch)

            # labels爲numpy.ndarray
            labels = self.process_labels(labels)
            # images爲numpy.ndarray
            images = sess.run(image_batch)

            self.write_to_tfrecords(images, labels)

            coord.request_stop()
            coord.join(threads)


FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("captcha_dir", "./pics", "驗證碼圖片路徑")
tf.app.flags.DEFINE_string("letter", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "驗證碼字符種類")
tf.app.flags.DEFINE_string("tfrecords_dir", "./tfrecords/captcha.tfrecords", "驗證碼tfrecords文件")


if __name__ == "__main__":
    c = CaptchaInput(FLAGS.captcha_dir, FLAGS.letter, FLAGS.tfrecords_dir)
    c.execute()

須要注意:字體

  • os.listdir返回的文件名稱的順序是按照ascii表的順序(1.png, 10.png...)須要對其進行排序
  • 使用tensorflow讀取圖片和標籤文件時,須要加上shuffle=False,避免文件亂序了,圖片和目標值對應不上。

驗證碼訓練

import tensorflow as tf


FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("captcha_dir", "./tfrecords/captcha.tfrecords", "驗證碼數據文件")
tf.app.flags.DEFINE_integer("batch_size", 100, "每批次訓練樣本數")


def read_and_decode():
    """讀取驗證碼數據
    :return image_batch, label_batch
    """
    # 文件隊列
    file_queue = tf.train.string_input_producer([FLAGS.captcha_dir])

    # 文件讀取器
    reader = tf.TFRecordReader()

    # 讀取內容
    key, value = reader.read(file_queue)
    # 解析tfrecords
    features = tf.parse_single_example(value, features={
        "image": tf.FixedLenFeature([], tf.string),
        "label": tf.FixedLenFeature([], tf.string)
    })
    # 解碼
    image = tf.decode_raw(features["image"], tf.uint8)
    label = tf.decode_raw(features["label"], tf.uint8)
    # print(image, label)

    # 改變形狀
    image_reshape = tf.reshape(image, [30, 100, 3])
    label_reshape = tf.reshape(label, [4])
    # print(image_reshape, label_reshape)

    # 批處理
    image_batch, label_batch = tf.train.batch([image_reshape, label_reshape],
                                              batch_size=FLAGS.batch_size, num_threads=1, capacity=FLAGS.batch_size)
    return image_batch, label_batch


def weight_variables(shape):
    """權重初始化函數"""
    w = tf.Variable(tf.random_normal(shape=shape, mean=0.0, stddev=1.0))
    return w


def bias_variables(shape):
    """偏置初始化函數"""
    b = tf.Variable(tf.constant(0.0, shape=shape))
    return b


def fc_model(image):
    """全鏈接模型"""
    with tf.variable_scope("fc_model"):
        image_reshape = tf.reshape(image, [-1, 30 * 100 * 3])

        # 隨機初始化權重和偏重
        weights = weight_variables([30 * 100 * 3, 4 * 26])
        bias = bias_variables([4 * 26])

        # 全鏈接計算
        y_predict = tf.matmul(tf.cast(image_reshape, tf.float32), weights) + bias

    return y_predict


def label_to_onehot(label):
    """目標值轉換成one-hot編碼"""
    label_onehot = tf.one_hot(label, depth=26, on_value=1.0, axis=2)
    return label_onehot


def captcharec():
    """驗證碼識別"""
    image_batch, label_batch = read_and_decode()
    # [100, 104]
    y_predict = fc_model(image_batch)

    y_true = label_to_onehot(label_batch)

    # softmax計算,交叉熵損失計算
    with tf.variable_scope("soft_cross"):
        loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=tf.reshape(y_true, [-1, 4 * 26]),
            logits=y_predict
        )

    # 梯度降低損失優化
    with tf.variable_scope("optimizer"):
        train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

    # 準確率
    with tf.variable_scope("acc"):
        equal_list = tf.equal(tf.argmax(y_true, 2), tf.argmax(tf.reshape(y_predict, [-1, 4, 26]), 2))
        accuracy = tf.reduce_mean(tf.cast(equal_list, tf.float32))

    init_op = tf.global_variables_initializer()

    with tf.Session() as sess:

        sess.run(init_op)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord=coord)

        for i in range(3000):
            sess.run(train_op)

            print("第%d次訓練的準確率爲:%f" % (i, accuracy.eval()))

        coord.request_stop()
        coord.join(threads)


if __name__ == '__main__':
    captcharec()
相關文章
相關標籤/搜索