這篇薄荷主要是講了如何用tensorflow去訓練好一個模型,而後生成相應的pb文件。最後會將如何從新加載這個pb文件。
首先先放出PO主的github:python
https://github.com/ppplinday/tensorflow-vgg16-train-and-testgit
其中的pitcute文件是狗和貓的圖片分別15張一共30(別吐槽,只是爲了練手學習的233333), train那個就是訓練的文件,test這個就是測試的文件。
接着PO主會慢慢講解相應的步驟。
!!!ps:因爲PO主也是新手,因此不免會出現一點(不少)小錯誤,但願大嬸看了可以提出來讓PO主好好學習233333。github
def read_img(path): cate = [path + x for x in os.listdir(path) if os.path.isdir(path + x)] imgs = [] labels = [] for idx, folder in enumerate(cate): for im in glob.glob(folder + '/*.jpg'): print('reading the image: %s' % (im)) img = io.imread(im) img = transform.resize(img, (w, h, c)) imgs.append(img) labels.append(idx) return np.asarray(imgs, np.float32), np.asarray(labels, np.int32) data, label = read_img(path)
用io.imread來讀取每一張圖片,而後resize成vgg的輸入的大小(224,224,3),最後分別放入了data和label中。app
num_example = data.shape[0] arr = np.arange(num_example) np.random.shuffle(arr) data = data[arr] label = label[arr]
這裏是把圖片的順序打亂,先生成一個等差數列,而後打亂,最後賦值回原來的data和labeldom
ratio = 0.8 s = np.int(num_example * ratio) x_train = data[:s] y_train = label[:s] x_val = data[s:] y_val = label[s:]
所有的數據中百分之80的用來train,剩下20的用來test(雖然一共才30張圖片。。。。。)ide
def build_network(height, width, channel): x = tf.placeholder(tf.float32, shape=[None, height, width, channel], name='input') y = tf.placeholder(tf.int64, shape=[None, 2], name='labels_placeholder')
開始build相應的vgg model,這一步不難,可是每一層最好都給上相應的name。上面的x和y是相應的輸入和相應的標籤。學習
finaloutput = tf.nn.softmax(output_fc8, name="softmax") cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=finaloutput, labels=y)) optimize = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost) prediction_labels = tf.argmax(finaloutput, axis=1, name="output") read_labels = y correct_prediction = tf.equal(prediction_labels, read_labels) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) correct_times_in_batch = tf.reduce_sum(tf.cast(correct_prediction, tf.int32)) return dict( x=x, y=y, optimize=optimize, correct_prediction=correct_prediction, correct_times_in_batch=correct_times_in_batch, cost=cost, )
在build的最後,是須要進行偏差計算。finaloutput是最後的輸出,cost是計算偏差,optimize是定義訓練時候安什麼方式,也注意一下最後的return。測試
接着是訓練過程。ui
def train_network(graph, batch_size, num_epochs, pb_file_path): init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) epoch_delta = 2 for epoch_index in range(num_epochs): for i in range(12): sess.run([graph['optimize']], feed_dict={ graph['x']: np.reshape(x_train[i], (1, 224, 224, 3)), graph['y']: ([[1, 0]] if y_train[i] == 0 else [[0, 1]]) })
其實訓練的代碼就這些,定好了batchsize和numepoch進行訓練。下面的代碼主要是爲了看每幾回相應的正確率。code
constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, ["output"]) with tf.gfile.FastGFile(pb_file_path, mode='wb') as f: f.write(constant_graph.SerializeToString())
這兩句是重要的代碼,用來把訓練好的模型保存爲pb文件。運行完以後就會發現應該的文件夾多出了一個pb文件。
def recognize(jpg_path, pb_file_path): with tf.Graph().as_default(): output_graph_def = tf.GraphDef() with open(pb_file_path, "rb") as f: output_graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(output_graph_def, name="")
打開相應的pb文件。
img = io.imread(jpg_path) img = transform.resize(img, (224, 224, 3)) img_out_softmax = sess.run(out_softmax, feed_dict={input_x:np.reshape(img, [-1, 224, 224, 3])})
讀取圖片文件,resize以後放入模型的輸入位置,以後img_out_softmax就是相應輸出的結果。
這大概就是整個流程。目的是爲了練練手,PO主應該有挺多小錯誤,但願你們可以提出來讓PO主好好學習哈哈哈!!!
最後放出整個的train和test的代碼:
train
from PIL import Image import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg import tensorflow as tf import os import glob from skimage import io, transform from tensorflow.python.framework import graph_util import collections path = '/home/zhoupeilin/vgg16/picture/' w = 224 h = 224 c = 3 def read_img(path): cate = [path + x for x in os.listdir(path) if os.path.isdir(path + x)] imgs = [] labels = [] for idx, folder in enumerate(cate): for im in glob.glob(folder + '/*.jpg'): print('reading the image: %s' % (im)) img = io.imread(im) img = transform.resize(img, (w, h, c)) imgs.append(img) labels.append(idx) return np.asarray(imgs, np.float32), np.asarray(labels, np.int32) data, label = read_img(path) num_example = data.shape[0] arr = np.arange(num_example) np.random.shuffle(arr) data = data[arr] label = label[arr] ratio = 0.8 s = np.int(num_example * ratio) x_train = data[:s] y_train = label[:s] x_val = data[s:] y_val = label[s:] def build_network(height, width, channel): x = tf.placeholder(tf.float32, shape=[None, height, width, channel], name='input') y = tf.placeholder(tf.int64, shape=[None, 2], name='labels_placeholder') def weight_variable(shape, name="weights"): initial = tf.truncated_normal(shape, dtype=tf.float32, stddev=0.1) return tf.Variable(initial, name=name) def bias_variable(shape, name="biases"): initial = tf.constant(0.1, dtype=tf.float32, shape=shape) return tf.Variable(initial, name=name) def conv2d(input, w): return tf.nn.conv2d(input, w, [1, 1, 1, 1], padding='SAME') def pool_max(input): return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') def fc(input, w, b): return tf.matmul(input, w) + b # conv1 with tf.name_scope('conv1_1') as scope: kernel = weight_variable([3, 3, 3, 64]) biases = bias_variable([64]) output_conv1_1 = tf.nn.relu(conv2d(x, kernel) + biases, name=scope) with tf.name_scope('conv1_2') as scope: kernel = weight_variable([3, 3, 64, 64]) biases = bias_variable([64]) output_conv1_2 = tf.nn.relu(conv2d(output_conv1_1, kernel) + biases, name=scope) pool1 = pool_max(output_conv1_2) # conv2 with tf.name_scope('conv2_1') as scope: kernel = weight_variable([3, 3, 64, 128]) biases = bias_variable([128]) output_conv2_1 = tf.nn.relu(conv2d(pool1, kernel) + biases, name=scope) with tf.name_scope('conv2_2') as scope: kernel = weight_variable([3, 3, 128, 128]) biases = bias_variable([128]) output_conv2_2 = tf.nn.relu(conv2d(output_conv2_1, kernel) + biases, name=scope) pool2 = pool_max(output_conv2_2) # conv3 with tf.name_scope('conv3_1') as scope: kernel = weight_variable([3, 3, 128, 256]) biases = bias_variable([256]) output_conv3_1 = tf.nn.relu(conv2d(pool2, kernel) + biases, name=scope) with tf.name_scope('conv3_2') as scope: kernel = weight_variable([3, 3, 256, 256]) biases = bias_variable([256]) output_conv3_2 = tf.nn.relu(conv2d(output_conv3_1, kernel) + biases, name=scope) with tf.name_scope('conv3_3') as scope: kernel = weight_variable([3, 3, 256, 256]) biases = bias_variable([256]) output_conv3_3 = tf.nn.relu(conv2d(output_conv3_2, kernel) + biases, name=scope) pool3 = pool_max(output_conv3_3) # conv4 with tf.name_scope('conv4_1') as scope: kernel = weight_variable([3, 3, 256, 512]) biases = bias_variable([512]) output_conv4_1 = tf.nn.relu(conv2d(pool3, kernel) + biases, name=scope) with tf.name_scope('conv4_2') as scope: kernel = weight_variable([3, 3, 512, 512]) biases = bias_variable([512]) output_conv4_2 = tf.nn.relu(conv2d(output_conv4_1, kernel) + biases, name=scope) with tf.name_scope('conv4_3') as scope: kernel = weight_variable([3, 3, 512, 512]) biases = bias_variable([512]) output_conv4_3 = tf.nn.relu(conv2d(output_conv4_2, kernel) + biases, name=scope) pool4 = pool_max(output_conv4_3) # conv5 with tf.name_scope('conv5_1') as scope: kernel = weight_variable([3, 3, 512, 512]) biases = bias_variable([512]) output_conv5_1 = tf.nn.relu(conv2d(pool4, kernel) + biases, name=scope) with tf.name_scope('conv5_2') as scope: kernel = weight_variable([3, 3, 512, 512]) biases = bias_variable([512]) output_conv5_2 = tf.nn.relu(conv2d(output_conv5_1, kernel) + biases, name=scope) with tf.name_scope('conv5_3') as scope: kernel = weight_variable([3, 3, 512, 512]) biases = bias_variable([512]) output_conv5_3 = tf.nn.relu(conv2d(output_conv5_2, kernel) + biases, name=scope) pool5 = pool_max(output_conv5_3) #fc6 with tf.name_scope('fc6') as scope: shape = int(np.prod(pool5.get_shape()[1:])) kernel = weight_variable([shape, 4096]) biases = bias_variable([4096]) pool5_flat = tf.reshape(pool5, [-1, shape]) output_fc6 = tf.nn.relu(fc(pool5_flat, kernel, biases), name=scope) #fc7 with tf.name_scope('fc7') as scope: kernel = weight_variable([4096, 4096]) biases = bias_variable([4096]) output_fc7 = tf.nn.relu(fc(output_fc6, kernel, biases), name=scope) #fc8 with tf.name_scope('fc8') as scope: kernel = weight_variable([4096, 2]) biases = bias_variable([2]) output_fc8 = tf.nn.relu(fc(output_fc7, kernel, biases), name=scope) finaloutput = tf.nn.softmax(output_fc8, name="softmax") cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=finaloutput, labels=y)) optimize = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost) prediction_labels = tf.argmax(finaloutput, axis=1, name="output") read_labels = y correct_prediction = tf.equal(prediction_labels, read_labels) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) correct_times_in_batch = tf.reduce_sum(tf.cast(correct_prediction, tf.int32)) return dict( x=x, y=y, optimize=optimize, correct_prediction=correct_prediction, correct_times_in_batch=correct_times_in_batch, cost=cost, ) def train_network(graph, batch_size, num_epochs, pb_file_path): init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) epoch_delta = 2 for epoch_index in range(num_epochs): for i in range(12): sess.run([graph['optimize']], feed_dict={ graph['x']: np.reshape(x_train[i], (1, 224, 224, 3)), graph['y']: ([[1, 0]] if y_train[i] == 0 else [[0, 1]]) }) if epoch_index % epoch_delta == 0: total_batches_in_train_set = 0 total_correct_times_in_train_set = 0 total_cost_in_train_set = 0. for i in range(12): return_correct_times_in_batch = sess.run(graph['correct_times_in_batch'], feed_dict={ graph['x']: np.reshape(x_train[i], (1, 224, 224, 3)), graph['y']: ([[1, 0]] if y_train[i] == 0 else [[0, 1]]) }) mean_cost_in_batch = sess.run(graph['cost'], feed_dict={ graph['x']: np.reshape(x_train[i], (1, 224, 224, 3)), graph['y']: ([[1, 0]] if y_train[i] == 0 else [[0, 1]]) }) total_batches_in_train_set += 1 total_correct_times_in_train_set += return_correct_times_in_batch total_cost_in_train_set += (mean_cost_in_batch * batch_size) total_batches_in_test_set = 0 total_correct_times_in_test_set = 0 total_cost_in_test_set = 0. for i in range(3): return_correct_times_in_batch = sess.run(graph['correct_times_in_batch'], feed_dict={ graph['x']: np.reshape(x_val[i], (1, 224, 224, 3)), graph['y']: ([[1, 0]] if y_val[i] == 0 else [[0, 1]]) }) mean_cost_in_batch = sess.run(graph['cost'], feed_dict={ graph['x']: np.reshape(x_val[i], (1, 224, 224, 3)), graph['y']: ([[1, 0]] if y_val[i] == 0 else [[0, 1]]) }) total_batches_in_test_set += 1 total_correct_times_in_test_set += return_correct_times_in_batch total_cost_in_test_set += (mean_cost_in_batch * batch_size) acy_on_test = total_correct_times_in_test_set / float(total_batches_in_test_set * batch_size) acy_on_train = total_correct_times_in_train_set / float(total_batches_in_train_set * batch_size) print('Epoch - {:2d}, acy_on_test:{:6.2f}%({}/{}),loss_on_test:{:6.2f}, acy_on_train:{:6.2f}%({}/{}),loss_on_train:{:6.2f}'.format(epoch_index, acy_on_test*100.0,total_correct_times_in_test_set, total_batches_in_test_set * batch_size, total_cost_in_test_set, acy_on_train * 100.0, total_correct_times_in_train_set, total_batches_in_train_set * batch_size, total_cost_in_train_set)) constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, ["output"]) with tf.gfile.FastGFile(pb_file_path, mode='wb') as f: f.write(constant_graph.SerializeToString()) def main(): batch_size = 12 num_epochs = 50 pb_file_path = "vggs.pb" g = build_network(height=224, width=224, channel=3) train_network(g, batch_size, num_epochs, pb_file_path) main()
test
import tensorflow as tf import numpy as np import PIL.Image as Image from skimage import io, transform def recognize(jpg_path, pb_file_path): with tf.Graph().as_default(): output_graph_def = tf.GraphDef() with open(pb_file_path, "rb") as f: output_graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(output_graph_def, name="") with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) input_x = sess.graph.get_tensor_by_name("input:0") print input_x out_softmax = sess.graph.get_tensor_by_name("softmax:0") print out_softmax out_label = sess.graph.get_tensor_by_name("output:0") print out_label img = io.imread(jpg_path) img = transform.resize(img, (224, 224, 3)) img_out_softmax = sess.run(out_softmax, feed_dict={input_x:np.reshape(img, [-1, 224, 224, 3])}) print "img_out_softmax:",img_out_softmax prediction_labels = np.argmax(img_out_softmax, axis=1) print "label:",prediction_labels recognize("vgg16/picture/dog/dog3.jpg", "vgg16/vggs.pb")