本文轉自我的微信公衆號,主要講述以下三個問題:python
如何將原始圖片數據與label轉化爲TFRecords格式的數據?微信
如何利用TFRecordDataset讀取TFRecords格式的數據?網絡
如何從TFRecordDataset中獲取數據進行NN訓練?session
總體思路爲:多線程
jpg---->train.tfrecords----->dataset------>NNapp
【一】TFReocrd綜述dom
【二】生成TFRecords文件ide
def image2tfrecord(image_list,label_list): len2 = len(image_list) print("len=",len2) writer = tf.python_io.TFRecordWriter("train.tfrecords") for i in range(len2): #讀取圖片並解碼 image = Image.open(image_list[i]) image = image.resize((28,28)) #轉化爲原始字節 image_bytes = image.tobytes() #建立字典 features = {} #用bytes來存儲image features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes])) # 用int64來表達label features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])])) #將全部的feature合成features tf_features = tf.train.Features(feature=features) #轉成example tf_example = tf.train.Example(features=tf_features) #序列化樣本 tf_serialized = tf_example.SerializeToString() #將序列化的樣本寫入rfrecord writer.write(tf_serialized) writer.close()
【三】解析TFrecord文件函數
定義解析數據函數 #入參example_proto也就是tf_serialized def pares_tf(example_proto): #定義解析的字典 dics = {} dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64) dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string) #調用接口解析一行樣本 parsed_example = tf.parse_single_example(serialized=example_proto,features=dics) image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8) image = tf.reshape(image,shape=[28*28]) #這裏對圖像數據作歸一化,是關鍵,沒有這句話,精度不收斂,爲0.1左右, # 有了這裏的歸一化處理,精度與原始數據一致 image = tf.cast(image,tf.float32)*(1./255)-0.5 label = parsed_example['label'] label = tf.cast(label,tf.int32) label = tf.one_hot(label, depth=10, on_value=1) return image,label
【四】利用TFRecordDataset讀取數據並進行NN訓練fetch
此處,依舊以LeNet爲例。
import tensorflow as tf from PIL import Image def paths2list(path_file_name): list = [] for line in open(path_file_name): list.append(line[0:len(line)-1]) return list def pathslabel2list(path_file_name): list = [] for line in open(path_file_name): #存儲是label是string格式,這裏須要強轉一下 list.append(int(line[0:len(line)-1])) return list def one_hot_2_int(one_hot): for i in range(10): if one_hot[i] == 1: return i else: continue return 0 train_image_list = paths2list(r"E:\mnist_jpg\jpg\train\train_image_list.txt") train_image_label_list = pathslabel2list(r"E:\mnist_jpg\jpg\train\train_label_list.txt") #定義建立TFRcord的文件 def image2tfrecord(image_list,label_list): len2 = len(image_list) print("len=",len2) writer = tf.python_io.TFRecordWriter("train.tfrecords") for i in range(len2): #讀取圖片並解碼 image = Image.open(image_list[i]) image = image.resize((28,28)) #轉化爲原始字節 image_bytes = image.tobytes() #建立字典 features = {} #用bytes來存儲image features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes])) # 用int64來表達label features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])])) #將全部的feature合成features tf_features = tf.train.Features(feature=features) #轉成example tf_example = tf.train.Example(features=tf_features) #序列化樣本 tf_serialized = tf_example.SerializeToString() #將序列化的樣本寫入rfrecord writer.write(tf_serialized) writer.close() #調用上述接口,將image與label數據轉化爲tfrecord格式的數據 image2tfrecord(train_image_list,train_image_label_list) #定義解析數據函數 #入參example_proto也就是tf_serialized def pares_tf(example_proto): #定義解析的字典 dics = {} dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64) dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string) #調用接口解析一行樣本 parsed_example = tf.parse_single_example(serialized=example_proto,features=dics) image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8) image = tf.reshape(image,shape=[28*28]) #這裏對圖像數據作歸一化,是關鍵,沒有這句話,精度不收斂,爲0.1左右, # 有了這裏的歸一化處理,精度與原始數據一致 image = tf.cast(image,tf.float32)*(1./255)-0.5 label = parsed_example['label'] label = tf.cast(label,tf.int32) label = tf.one_hot(label, depth=10, on_value=1) return image,label dataset = tf.data.TFRecordDataset(filenames=['train.tfrecords']) dataset = dataset.map(pares_tf) dataset = dataset.batch(32).repeat(1) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() #定義輸入數據mnist圖片大小28*28*1=784,None表示batch_size x = tf.placeholder(dtype=tf.float32,shape=[None,28*28],name="x") #定義標籤數據,mnist共10類 y_ = tf.placeholder(dtype=tf.float32,shape=[None,10],name="y_") #將數據調整爲二維數據,w*H*c---> 28*28*1,-1表示N張 image = tf.reshape(x,shape=[-1,28,28,1]) #第一層,卷積核={5*5*1*32},池化核={2*2*1,1*2*2*1} w1 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,1,32],stddev=0.1,dtype=tf.float32,name="w1")) b1= tf.Variable(initial_value=tf.zeros(shape=[32])) conv1 = tf.nn.conv2d(input=image,filter=w1,strides=[1,1,1,1],padding="SAME",name="conv1") relu1 = tf.nn.relu(tf.nn.bias_add(conv1,b1),name="relu1") pool1 = tf.nn.max_pool(value=relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME") #shape={None,14,14,32} #第二層,卷積核={5*5*32*64},池化核={2*2*1,1*2*2*1} w2 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,32,64],stddev=0.1,dtype=tf.float32,name="w2")) b2 = tf.Variable(initial_value=tf.zeros(shape=[64])) conv2 = tf.nn.conv2d(input=pool1,filter=w2,strides=[1,1,1,1],padding="SAME") relu2 = tf.nn.relu(tf.nn.bias_add(conv2,b2),name="relu2") pool2 = tf.nn.max_pool(value=relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool2") #shape={None,7,7,64} #FC1 w3 = tf.Variable(initial_value=tf.random_normal(shape=[7*7*64,1024],stddev=0.1,dtype=tf.float32,name="w3")) b3 = tf.Variable(initial_value=tf.zeros(shape=[1024])) #關鍵,進行reshape input3 = tf.reshape(pool2,shape=[-1,7*7*64],name="input3") fc1 = tf.nn.relu(tf.nn.bias_add(value=tf.matmul(input3,w3),bias=b3)) #shape={None,1024} #FC2 w4 = tf.Variable(initial_value=tf.random_normal(shape=[1024,10],stddev=0.1,dtype=tf.float32,name="w4")) b4 = tf.Variable(initial_value=tf.zeros(shape=[10])) fc2 = tf.nn.bias_add(value=tf.matmul(fc1,w4),bias=b4) #shape={None,10} #定義交叉熵損失 # 使用softmax將NN計算輸出值表示爲機率 y = tf.nn.softmax(fc2) # 定義交叉熵損失函數 cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y))) #定義solver train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss=cross_entropy) #定義正確值,判斷兩者下表index是否相等 correct_predict = tf.equal(tf.argmax(y,1),tf.argmax(y_,1)) #定義如何計算準確率 accuracy = tf.reduce_mean(tf.cast(correct_predict,dtype=tf.float32),name="accuracy") #定義初始化op init = tf.global_variables_initializer() with tf.Session() as sess: print("start") sess.run(fetches=init) i = 0 try: while True: #經過session每次從數據集中取值 image,label= sess.run(fetches=next_element) sess.run(fetches=train, feed_dict={x: image, y_: label}) if i % 100 == 0: train_accuracy = sess.run(fetches=accuracy, feed_dict={x: image, y_: label}) print(i, "accuracy=", train_accuracy) i = i + 1 except tf.errors.OutOfRangeError: print("end!") 數據來源以下圖:
訓練結果以下圖:
【五】總結 1.圖片數據進行與處理時,必定要歸一化,即將0-255處理到0-1 2.TFRcord文件的生成與讀取,原理就是序列化與反序列化的過程,名字、類型對上便可。 3.TFRcord能夠保存多個feature,只解析其中感興趣的部分。 4.使用TFRecordDataset讀取數據,其底層封裝了多線程,隊列等操做,簡單、便捷。 5.使用dataset讀取數據,不用修改網絡,只須要將數據送入網絡中用placeholder代替的輸入與標籤數據便可。 6.儘可能將原始數據轉化爲TFRecord格式的數據,並用dataset進行讀取。速度快、方便、簡單。