【TensorFlow系列】【一】利用TFRecordDataset讀取圖片數據

時間 2019-11-15

標籤 TensorFlow系列利用 tfrecorddataset 讀取圖片數據简体版

原文原文鏈接

本文轉自我的微信公衆號，主要講述以下三個問題：python

如何將原始圖片數據與label轉化爲TFRecords格式的數據？微信
如何利用TFRecordDataset讀取TFRecords格式的數據？網絡
如何從TFRecordDataset中獲取數據進行NN訓練？session

總體思路爲：多線程

jpg---->train.tfrecords----->dataset------>NNapp

【一】TFReocrd綜述dom

【二】生成TFRecords文件ide

def image2tfrecord(image_list,label_list):
    len2 = len(image_list)
    print("len=",len2)
    writer = tf.python_io.TFRecordWriter("train.tfrecords")
    for i in range(len2):
        #讀取圖片並解碼
        image = Image.open(image_list[i])
        image = image.resize((28,28))
        #轉化爲原始字節
        image_bytes = image.tobytes()
        #建立字典
        features = {}
        #用bytes來存儲image
        features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
        # 用int64來表達label
        features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])]))
        #將全部的feature合成features
        tf_features = tf.train.Features(feature=features)
        #轉成example
        tf_example = tf.train.Example(features=tf_features)
        #序列化樣本
        tf_serialized = tf_example.SerializeToString()
        #將序列化的樣本寫入rfrecord
        writer.write(tf_serialized)
    writer.close()

【三】解析TFrecord文件函數

定義解析數據函數
#入參example_proto也就是tf_serialized
def pares_tf(example_proto):
    #定義解析的字典
    dics = {}
    dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64)
    dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string)
    #調用接口解析一行樣本
    parsed_example = tf.parse_single_example(serialized=example_proto,features=dics)
    image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8)
    image = tf.reshape(image,shape=[28*28])
    #這裏對圖像數據作歸一化，是關鍵，沒有這句話，精度不收斂，爲0.1左右，
    # 有了這裏的歸一化處理，精度與原始數據一致
    image = tf.cast(image,tf.float32)*(1./255)-0.5
    label = parsed_example['label']
    label = tf.cast(label,tf.int32)
    label = tf.one_hot(label, depth=10, on_value=1)
    return image,label

【四】利用TFRecordDataset讀取數據並進行NN訓練fetch

此處，依舊以LeNet爲例。

import tensorflow as tf
from PIL import Image

def paths2list(path_file_name):
    list = []
    for line in open(path_file_name):
        list.append(line[0:len(line)-1])
    return list
def pathslabel2list(path_file_name):
    list = []
    for line in open(path_file_name):
        #存儲是label是string格式，這裏須要強轉一下
        list.append(int(line[0:len(line)-1]))
    return list
def one_hot_2_int(one_hot):
    for i in range(10):
        if one_hot[i] == 1:
            return  i
        else:
            continue
    return 0
train_image_list = paths2list(r"E:\mnist_jpg\jpg\train\train_image_list.txt")
train_image_label_list =  pathslabel2list(r"E:\mnist_jpg\jpg\train\train_label_list.txt")

#定義建立TFRcord的文件

def image2tfrecord(image_list,label_list):
    len2 = len(image_list)
    print("len=",len2)
    writer = tf.python_io.TFRecordWriter("train.tfrecords")
    for i in range(len2):
        #讀取圖片並解碼
        image = Image.open(image_list[i])
        image = image.resize((28,28))
        #轉化爲原始字節
        image_bytes = image.tobytes()
        #建立字典
        features = {}
        #用bytes來存儲image
        features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
        # 用int64來表達label
        features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])]))
        #將全部的feature合成features
        tf_features = tf.train.Features(feature=features)
        #轉成example
        tf_example = tf.train.Example(features=tf_features)
        #序列化樣本
        tf_serialized = tf_example.SerializeToString()
        #將序列化的樣本寫入rfrecord
        writer.write(tf_serialized)
    writer.close()
#調用上述接口，將image與label數據轉化爲tfrecord格式的數據
image2tfrecord(train_image_list,train_image_label_list)

#定義解析數據函數
#入參example_proto也就是tf_serialized
def pares_tf(example_proto):
    #定義解析的字典
    dics = {}
    dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64)
    dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string)
    #調用接口解析一行樣本
    parsed_example = tf.parse_single_example(serialized=example_proto,features=dics)
    image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8)
    image = tf.reshape(image,shape=[28*28])
    #這裏對圖像數據作歸一化，是關鍵，沒有這句話，精度不收斂，爲0.1左右，
    # 有了這裏的歸一化處理，精度與原始數據一致
    image = tf.cast(image,tf.float32)*(1./255)-0.5
    label = parsed_example['label']
    label = tf.cast(label,tf.int32)
    label = tf.one_hot(label, depth=10, on_value=1)
    return image,label

dataset = tf.data.TFRecordDataset(filenames=['train.tfrecords'])
dataset = dataset.map(pares_tf)
dataset = dataset.batch(32).repeat(1)

iterator = dataset.make_one_shot_iterator()

next_element = iterator.get_next()

#定義輸入數據mnist圖片大小28*28*1=784,None表示batch_size
x = tf.placeholder(dtype=tf.float32,shape=[None,28*28],name="x")
#定義標籤數據,mnist共10類
y_ = tf.placeholder(dtype=tf.float32,shape=[None,10],name="y_")
#將數據調整爲二維數據，w*H*c---> 28*28*1,-1表示N張
image = tf.reshape(x,shape=[-1,28,28,1])

#第一層，卷積核={5*5*1*32}，池化核={2*2*1,1*2*2*1}
w1 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,1,32],stddev=0.1,dtype=tf.float32,name="w1"))
b1= tf.Variable(initial_value=tf.zeros(shape=[32]))
conv1 = tf.nn.conv2d(input=image,filter=w1,strides=[1,1,1,1],padding="SAME",name="conv1")
relu1 = tf.nn.relu(tf.nn.bias_add(conv1,b1),name="relu1")
pool1 = tf.nn.max_pool(value=relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")
#shape={None，14,14,32}
#第二層，卷積核={5*5*32*64}，池化核={2*2*1,1*2*2*1}
w2 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,32,64],stddev=0.1,dtype=tf.float32,name="w2"))
b2 = tf.Variable(initial_value=tf.zeros(shape=[64]))
conv2 = tf.nn.conv2d(input=pool1,filter=w2,strides=[1,1,1,1],padding="SAME")
relu2 = tf.nn.relu(tf.nn.bias_add(conv2,b2),name="relu2")
pool2 = tf.nn.max_pool(value=relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool2")
#shape={None，7,7,64}
#FC1
w3 = tf.Variable(initial_value=tf.random_normal(shape=[7*7*64,1024],stddev=0.1,dtype=tf.float32,name="w3"))
b3 = tf.Variable(initial_value=tf.zeros(shape=[1024]))
#關鍵，進行reshape
input3 = tf.reshape(pool2,shape=[-1,7*7*64],name="input3")
fc1 = tf.nn.relu(tf.nn.bias_add(value=tf.matmul(input3,w3),bias=b3))
#shape={None，1024}
#FC2
w4 = tf.Variable(initial_value=tf.random_normal(shape=[1024,10],stddev=0.1,dtype=tf.float32,name="w4"))
b4 = tf.Variable(initial_value=tf.zeros(shape=[10]))
fc2 = tf.nn.bias_add(value=tf.matmul(fc1,w4),bias=b4)
#shape={None，10}
#定義交叉熵損失
# 使用softmax將NN計算輸出值表示爲機率
y = tf.nn.softmax(fc2)

# 定義交叉熵損失函數
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y)))
#定義solver
train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss=cross_entropy)

#定義正確值,判斷兩者下表index是否相等
correct_predict = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
#定義如何計算準確率
accuracy = tf.reduce_mean(tf.cast(correct_predict,dtype=tf.float32),name="accuracy")
#定義初始化op
init = tf.global_variables_initializer()

with tf.Session() as sess:
    print("start")
    sess.run(fetches=init)
    i = 0
    try:
        while True:
            #經過session每次從數據集中取值
            image,label= sess.run(fetches=next_element)
            sess.run(fetches=train, feed_dict={x: image, y_: label})
            if i % 100 == 0:
                train_accuracy = sess.run(fetches=accuracy, feed_dict={x: image, y_: label})
                print(i, "accuracy=", train_accuracy)
            i = i + 1
    except tf.errors.OutOfRangeError:
        print("end!")
  數據來源以下圖：

訓練結果以下圖：

【五】總結
1.圖片數據進行與處理時，必定要歸一化，即將0-255處理到0-1
2.TFRcord文件的生成與讀取，原理就是序列化與反序列化的過程，名字、類型對上便可。
3.TFRcord能夠保存多個feature，只解析其中感興趣的部分。
4.使用TFRecordDataset讀取數據，其底層封裝了多線程，隊列等操做，簡單、便捷。
5.使用dataset讀取數據，不用修改網絡，只須要將數據送入網絡中用placeholder代替的輸入與標籤數據便可。
6.儘可能將原始數據轉化爲TFRecord格式的數據，並用dataset進行讀取。速度快、方便、簡單。

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。