筆者這幾天在跟着莫煩學習TensorFlow,正好到遷移學習(至於什麼是遷移學習,看這篇),莫煩老師作的是預測貓和老虎尺寸大小的學習。做爲一個有爲的學生,筆者固然不能再預測貓啊狗啊的大小啦,正好以前正好有作過貓狗大戰數據集的圖像分類,作好的數據都還在,二話不說,開擼。python
既然是VGG16模型,固然首先上模型代碼了:git
1 def conv_layers_simple_api(net_in): 2 with tf.name_scope('preprocess'): 3 # Notice that we include a preprocessing layer that takes the RGB image 4 # with pixels values in the range of 0-255 and subtracts the mean image 5 # values (calculated over the entire ImageNet training set). 6 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 7 net_in.outputs = net_in.outputs - mean 8 9 # conv1 10 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 11 name='conv1_1') 12 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 13 name='conv1_2') 14 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') 15 16 # conv2 17 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 18 name='conv2_1') 19 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 20 name='conv2_2') 21 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') 22 23 # conv3 24 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 25 name='conv3_1') 26 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 27 name='conv3_2') 28 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 29 name='conv3_3') 30 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') 31 32 # conv4 33 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 34 name='conv4_1') 35 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 36 name='conv4_2') 37 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 38 name='conv4_3') 39 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') 40 41 # conv5 42 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 43 name='conv5_1') 44 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 45 name='conv5_2') 46 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 47 name='conv5_3') 48 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') 49 return network`` 50 def conv_layers_simple_api(net_in): 51 with tf.name_scope('preprocess'): 52 # Notice that we include a preprocessing layer that takes the RGB image 53 # with pixels values in the range of 0-255 and subtracts the mean image 54 # values (calculated over the entire ImageNet training set). 55 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 56 net_in.outputs = net_in.outputs - mean 57 58 # conv1 59 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 60 name='conv1_1') 61 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 62 name='conv1_2') 63 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') 64 65 # conv2 66 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 67 name='conv2_1') 68 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 69 name='conv2_2') 70 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') 71 72 # conv3 73 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 74 name='conv3_1') 75 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 76 name='conv3_2') 77 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 78 name='conv3_3') 79 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') 80 81 # conv4 82 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 83 name='conv4_1') 84 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 85 name='conv4_2') 86 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 87 name='conv4_3') 88 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') 89 90 # conv5 91 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 92 name='conv5_1') 93 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 94 name='conv5_2') 95 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 96 name='conv5_3') 97 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') 98 return network`` 99 def conv_layers_simple_api(net_in): 100 with tf.name_scope('preprocess'): 101 # Notice that we include a preprocessing layer that takes the RGB image 102 # with pixels values in the range of 0-255 and subtracts the mean image 103 # values (calculated over the entire ImageNet training set). 104 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 105 net_in.outputs = net_in.outputs - mean 106 107 # conv1 108 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 109 name='conv1_1') 110 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 111 name='conv1_2') 112 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') 113 114 # conv2 115 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 116 name='conv2_1') 117 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 118 name='conv2_2') 119 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') 120 121 # conv3 122 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 123 name='conv3_1') 124 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 125 name='conv3_2') 126 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 127 name='conv3_3') 128 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') 129 130 # conv4 131 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 132 name='conv4_1') 133 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 134 name='conv4_2') 135 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 136 name='conv4_3') 137 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') 138 139 # conv5 140 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 141 name='conv5_1') 142 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 143 name='conv5_2') 144 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 145 name='conv5_3') 146 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') 147 return network
筆者偷懶直接用的是TensorLayer庫中的Vgg16模型,至於什麼是tensorlayer請移步這裏github
按照莫煩老師的教程,改寫最後的全鏈接層作二分類學習:api
def fc_layers(net): # 全鏈接層前的預處理 network = FlattenLayer(net, name='flatten') # tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6') network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc1_relu') # network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc2_relu') # self.out = tf.layers.dense(self.fc6, 1, name='out') network = DenseLayer(network, n_units=2, act=tf.identity, name='fc3_relu') return network
定義輸入輸出以及損失函數已及學習步驟:app
1 # 輸入 2 x = tf.placeholder(tf.float32, [None, 224, 224, 3]) 3 # 輸出 4 y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_') 5 net_in = InputLayer(x, name='input') 6 # net_cnn = conv_layers(net_in) # professional CNN APIs 7 net_cnn = conv_layers_simple_api(net_in) # simplified CNN APIs 8 network = fc_layers(net_cnn) 9 y = network.outputs 10 # probs = tf.nn.softmax(y) 11 y_op = tf.argmax(tf.nn.softmax(y), 1) 12 cost = tl.cost.cross_entropy(y, y_, name='cost') 13 correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.float32), tf.cast(y_, tf.float32)) 14 acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 15 # 定義 optimizer 16 train_params = network.all_params[26:] 17 # print(train_params) 18 global_step = tf.Variable(0) 19 # --------------學習速率的設置(學習速率呈指數降低)--------------------- #將 global_step/decay_steps 強制轉換爲整數 20 # learning_rate = tf.train.exponential_decay(1e-2, global_step, decay_steps=1000, decay_rate=0.98, staircase=True) 21 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, 22 epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)
讀取數據讀取訓練、驗證數據,加載模型參數:dom
1 img, label = read_and_decode("F:\\001-python\\train.tfrecords") 2 img_v, label_v = read_and_decode("F:\\001-python\\val.tfrecords") 3 # 使用shuffle_batch能夠隨機打亂輸入 4 X_train, y_train = tf.train.shuffle_batch([img, label], 5 batch_size=30, capacity=400, 6 min_after_dequeue=300) 7 X_Val, y_val = tf.train.shuffle_batch([img_v, label_v], 8 batch_size=30, capacity=400, 9 min_after_dequeue=300) 10 tl.layers.initialize_global_variables(sess) 11 network.print_params() 12 network.print_layers() 13 npz = np.load('vgg16_weights.npz') 14 params = [] 15 for val in sorted(npz.items())[0:25]: 16 # print(" Loading %s" % str(val[1].shape)) 17 params.append(val[1]) 18 加載預訓練的參數 19 tl.files.assign_params(sess, params, network)
加載好以後,開始訓練,200個epoch:ide
1 for epoch in range(n_epoch): 2 start_time = time.time() 3 val, l = sess.run([X_train, y_train]) 4 for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True): 5 sess.run(train_op, feed_dict={x: X_train_a, y_: y_train_a}) 6 if epoch + 1 == 1 or (epoch + 1) % 5 == 0: 7 print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) 8 train_loss, train_acc, n_batch = 0, 0, 0 9 for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True): 10 err, ac = sess.run([cost, acc], feed_dict={x: X_train_a, y_: y_train_a}) 11 train_loss += err 12 train_acc += ac 13 n_batch += 1 14 print(" train loss: %f" % (train_loss / n_batch)) 15 print(" train acc: %f" % (train_acc / n_batch))
保存訓練的參數:函數
1 tl.files.save_npz(network.all_params, name='model.npz', sess=sess)
下面就是開始訓練啦,筆者很高興的拿着本身的筆記本顯卡呼呼的跑了一遍:學習
~~~~~~~~~~~~~~~~~~~~~~~~下面是漫長的等待優化
....... [TL] Epoch 138 of 150 took 0.999402s [TL] val loss: 0.687194 [TL] val acc: 0.562500 [TL] Epoch 140 of 150 took 3.782207s [TL] val loss: 0.619966 [TL] val acc: 0.750000 [TL] Epoch 142 of 150 took 0.983802s [TL] val loss: 0.685686 [TL] val acc: 0.562500 [TL] Epoch 144 of 150 took 0.986604s [TL] val loss: 0.661224 [TL] val acc: 0.687500 [TL] Epoch 146 of 150 took 1.022403s [TL] val loss: 0.675885 [TL] val acc: 0.687500 [TL] Epoch 148 of 150 took 0.991802s [TL] val loss: 0.682124 [TL] val acc: 0.625000 [TL] Epoch 150 of 150 took 3.487811s [TL] val loss: 0.674932 [TL] val acc: 0.687500 [TL] Total training time: 319.859640s [TL] [*] model.npz saved
額~~~~~~~~~~~~~~~~~
0.68的正確率,羣裏一位朋友看了以後說:跟猜差很少了(一臉黑線)。問題出哪兒呢?難道是筆者訓練的次數不夠多?莫煩老師但是100次就能出很好的結果啊
無論怎麼樣,要試試,筆者因而加載剛剛保存的model.npz參數繼續跑100個epoch
~~~~~~~~~~~~~~~~~~~~~~~~又是漫長的等待
[TL] Epoch 1 of 100 took 8.477617s [TL] val loss: 0.685957 [TL] val acc: 0.562500 [TL] Epoch 2 of 100 took 0.999402s [TL] val loss: 0.661529 [TL] val acc: 0.625000 ...... [TL] Epoch 94 of 100 took 0.992208s [TL] val loss: 0.708815 [TL] val acc: 0.562500 [TL] Epoch 96 of 100 took 0.998406s [TL] val loss: 0.710636 [TL] val acc: 0.562500 [TL] Epoch 98 of 100 took 0.992807s [TL] val loss: 0.621505 [TL] val acc: 0.687500 [TL] Epoch 100 of 100 took 0.986405s [TL] val loss: 0.670647 [TL] val acc: 0.625000 [TL] Total training time: 156.734633s [TL] [*] model.npz saved
坑爹啊這是,還不如以前的結果。
筆者陷入深深的沉思中,難道是改了全鏈接層致使的?因而筆者又把以前去掉的全鏈接層加上:
1 def fc_layers(net): 2 # 全鏈接層前的預處理 3 network = FlattenLayer(net, name='flatten') 4 # tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6') 5 network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc1_relu') 6 network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc2_relu') 7 # self.out = tf.layers.dense(self.fc6, 1, name='out') 8 network = DenseLayer(network, n_units=2, act=tf.identity, name='fc3_relu') 9 return network
接着訓練
~~~~~~~~~~~~~~~~~~~~~~~~下面又是漫長的等待
1 [TL] Epoch 1 of 100 took 8.477229s 2 [TL] val loss: 2.370650 3 [TL] val acc: 0.562500 4 ... 5 [TL] Epoch 100 of 100 took 1.016002s 6 [TL] val loss: 0.762171 7 [TL] val acc: 0.437500 8 [TL] Total training time: 156.836465s 9 [TL] [*] model.npz saved
仍是同樣,筆者已崩潰了,必定是哪兒不對啊啊啊....因而筆者去翻莫煩老師的代碼,一點點對下來,每一層參數確定不會有錯,那就是在訓練設置的參數有問題。
1 self.train_op = tf.train.RMSPropOptimizer(0.001).minimize(self.loss) #莫煩的代碼 2 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, 3 epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)#筆者的
看到train_params難道是這個train_params?筆者只優化了最後的全鏈接層參數而莫煩老師優化的是所有參數
已經深夜了,筆者表示即便不睡覺也要跑一遍試試,因而改爲
1 # 定義 optimizer 2 train_params = network.all_params 3 ~~~~~~~~~~~~~~~~~~~~~~~~因而又是是漫長的等待 4 5 [TL] Epoch 1 of 100 took 20.286640s 6 [TL] val loss: 11.938850 7 [TL] val acc: 0.312500 8 [TL] Epoch 2 of 100 took 3.091806s 9 [TL] val loss: 2.890055 10 [TL] val acc: 0.625000 11 [TL] Epoch 4 of 100 took 3.074205s 12 [TL] val loss: 24.055895 13 [TL] val acc: 0.687500 14 [TL] .... 15 [TL] val loss: 0.699907 16 [TL] val acc: 0.500000 17 [TL] Epoch 98 of 100 took 3.089206s 18 [TL] val loss: 0.683627 19 [TL] val acc: 0.562500 20 [TL] Epoch 100 of 100 took 3.091806s 21 [TL] val loss: 0.708496 22 [TL] val acc: 0.562500 23 [TL] Total training time: 375.727307s 24 [TL] [*] model.npz saved
效果變得更差了....
排除參數的問題,已經深夜1點了,明天還要上班,不得不睡啦。
繼續崩潰第三天~~~
第四天~~~
第五天,今天供應商過來公司調試機器,正好是一個學圖像處理的小夥子,我提到這個說:我爲啥訓練了這麼多代爲啥仍是像猜同樣的機率....?小夥兒說:莫不是過擬合了吧?我說:不可能啊現成的數據現成的模型和參數,不該該的啊!
不過我仍是得檢查一下數據處理的代碼
1 # 生成是數據文件 2 def create_record(filelist): 3 random.shuffle(filelist) 4 i = 0 5 writer = tf.python_io.TFRecordWriter(recordpath) 6 for file in filelist: 7 name = file.split(sep='.') 8 lable_val = 0 9 if name[0] == 'cat': 10 lable_val = 0 11 else: 12 lable_val = 1 13 img_path = file_dir + file 14 img = Image.open(img_path) 15 img = img.resize((240, 240)) 16 img_raw = img.tobytes() # 將圖片轉化爲原生bytes 17 example = tf.train.Example(features=tf.train.Features(feature={ 18 "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[lable_val])), 19 'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])) 20 })) #example對象對label和image進行封裝 21 writer.write(example.SerializeToString()) 22 i=i+1 23 print(name[1]) 24 print(lable_val) 25 print(i) 26 writer.close() 27 # 用隊列形式讀取文件 28 def read_and_decode(filename): 29 # 根據文件名生成一個隊列 30 filename_queue = tf.train.string_input_producer([filename]) 31 reader = tf.TFRecordReader() 32 _, serialized_example = reader.read(filename_queue) # 返回文件名和文件 33 features = tf.parse_single_example(serialized_example, 34 features={ 35 'label': tf.FixedLenFeature([], tf.int64), 36 'img_raw': tf.FixedLenFeature([], tf.string), 37 }) 38 img = tf.decode_raw(features['img_raw'], tf.uint8) 39 img = tf.reshape(img, [224, 224, 3]) 40 img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 41 label = tf.cast(features['label'], tf.int32) 42 return img, label
img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 難道是這一步處理多餘?註銷以後,訓練模型
1 Epoch 85 of 200 took 1.234071s 2 train loss: 14.689816 3 train acc: 0.900000 4 [TL] [*] model3.npz saved 5 Epoch 90 of 200 took 1.241071s 6 train loss: 17.104382 7 train acc: 0.800000 8 [TL] [*] model3.npz saved 9 Epoch 95 of 200 took 1.236071s 10 train loss: 11.190630 11 train acc: 0.850000 12 [TL] [*] model3.npz saved 13 Epoch 100 of 200 took 1.238071s 14 train loss: 0.000000 15 train acc: 1.000000 16 [TL] [*] model3.npz saved 17 Epoch 105 of 200 took 1.236071s 18 train loss: 7.622324 19 train acc: 0.900000 20 [TL] [*] model3.npz saved 21 Epoch 110 of 200 took 1.234071s 22 train loss: 2.164670 23 train acc: 0.950000 24 [TL] [*] model3.npz saved 25 Epoch 115 of 200 took 1.237071s 26 train loss: 0.000000 27 train acc: 1.000000 28 [TL] [*] model3.npz saved
準確度1,停停停...不用跑完了,Perfect!
原來如此,必需要真實的像素值.......心好累......,筆者已經不記得哪兒抄來的這一行了。
嗯,VGG16模型的遷移學習到此結束,代碼見github