本次解決以下問題:如何對單張圖片多個標籤進行分類網絡的訓練?即圖片標註是image-level級別的。python
面臨的問題:api
1.圖片的標籤數目不是固定的,有的有一個標籤,有的有兩個標籤,但標籤的種類總數是固定的,好比爲5類。網絡
解決該問題,本次採用了標籤補齊的方法,即缺失的標籤所有使用0標記,這意味着,再也不使用one-hot編碼。
例如:標籤爲:-1,1,1,-1,1 ;-1表示該類標籤沒有,1表示該類標籤存在,則這張圖片的標籤編碼爲:session
0 0 0 0 0
0 1 0 0 0
0 0 1 0 0
0 0 0 0 0
0 0 0 0 1app
2.如何衡量損失?dom
本次計算出一張圖片各個標籤的損失,而後取平均值。ide
3.如何計算精度post
本次計算出一張圖片各個標籤的精度,而後取平均值。fetch
【數據集】ui
本次採用南京大學的數據集,具體見:
https://blog.csdn.net/u012936765/article/details/76944727
【數據預處理】
因爲該數據集是mat文件,因此首先將其轉化爲txt標籤文件
同時將圖片數據集轉化爲tfrecord文件,注意:label的編碼在這裏完成。具體以下:
import tensorflow as tf import scipy.io as sio import numpy as np import os from PIL import Image BASE_PATH=r"E:\miml-image-data\original" BASE_LABEL_PATH = r"E:\miml-image-data\processed" def mat2txt(): #使用scopy讀取mat文件 mat_data = sio.loadmat(BASE_LABEL_PATH+"\miml data.mat") #標籤數據存儲在targets中 label_data = mat_data['targets'] with open(BASE_LABEL_PATH+"\label.txt",'w') as f: labels = [] for i in range(len(label_data)): labels.append(label_data[i].tolist()) for j in range(len(labels[0])): line = [] line.append(labels[0][i]) line.append(labels[1][i]) line.append(labels[2][i]) line.append(labels[3][i]) line.append(labels[4][i]) line = ','.join(str(s) for s in line) jpg_name = str(j+1)+".jpg" f.write(jpg_name + ','+line+'\n') #mat2txt() train_list = [] test_list = [] with open(BASE_LABEL_PATH+"\label.txt") as f: i = 1 for line in f.readlines(): #print(line) if i % 5 == 0: test_list.append(line) else: train_list.append(line) i += 1 np.random.shuffle(train_list) np.random.shuffle(test_list) def int_2_one_hot(labels): r = [] if labels[0] == -1: r.append([0,0,0,0,0]) else: r.append([1,0,0,0,0]) if labels[1] == -1: r.append([0,0,0,0,0]) else: r.append([0,1,0,0,0]) if labels[2] == -1: r.append([0,0,0,0,0]) else: r.append([0,0,1,0,0]) if labels[3] == -1: r.append([0,0,0,0,0]) else: r.append([0,0,0,1,0]) if labels[4] == -1: r.append([0,0,0,0,0]) else: r.append([0,0,0,0,1]) return r def image_2_tfrecords(list,tf_record_path): tf_write = tf.python_io.TFRecordWriter(tf_record_path) for i in range(len(list)): item = list[i] item = item.strip('\n') items = item.split(',') image_name = items[0] image_path = os.path.join(BASE_PATH,image_name) if os.path.isfile(image_path): image = Image.open(image_path) image = image.resize((224,224)) image = image.tobytes() features ={} features['raw_image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])) labels = int_2_one_hot(items[1:]) features['label_1'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[0])) features['label_2'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[1])) features['label_3'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[2])) features['label_4'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[3])) features['label_5'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[4])) tf_features = tf.train.Features(feature=features) example = tf.train.Example(features=tf_features) tf_serialized = example.SerializeToString() tf_write.write(tf_serialized) else: print("not") tf_write.close() image_2_tfrecords(train_list,r"E:\miml-image-data\processed\train.tfrecords") image_2_tfrecords(test_list,r"E:\miml-image-data\processed\test.tfrecords")
【基於vgg16作fine-tuning】
這裏,直接使用公開的vgg16模型參數,將其加載到本次的網絡中作fine-tuning
模型文件地址爲:http://www.cs.toronto.edu/~frossard/post/vgg16/
代碼以下:
import tensorflow as tf import numpy as np #定義解析數據的api def parse_tf(example): dics = {} dics['label_1'] = tf.FixedLenFeature(shape=[5],dtype=tf.int64) dics['label_2'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64) dics['label_3'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64) dics['label_4'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64) dics['label_5'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64) dics['raw_image'] = tf.FixedLenFeature(shape=[],dtype=tf.string) parsed = tf.parse_single_example(example,features=dics) image = tf.decode_raw(parsed['raw_image'],out_type=tf.uint8) image = tf.reshape(image,shape=[224,224,3]) image = tf.image.per_image_standardization(image) label_1 = parsed['label_1'] label_2 = parsed['label_2'] label_3 = parsed['label_3'] label_4 = parsed['label_4'] label_5 = parsed['label_5'] label_1 = tf.cast(label_1,tf.int32) label_2 = tf.cast(label_2, tf.int32) label_3 = tf.cast(label_3, tf.int32) label_4 = tf.cast(label_4, tf.int32) label_5 = tf.cast(label_5, tf.int32) return image,label_1,label_2,label_3,label_4,label_5 def Conv(x,conv_shape,bias_shape,parameters,padding="SAME",strides=[1,1,1,1]): w = tf.Variable(initial_value=tf.random_normal(shape=conv_shape,dtype=tf.float32),trainable=False) b = tf.Variable(initial_value=tf.zeros(shape=bias_shape),trainable=False) parameters += [w,b] conv1 = tf.nn.conv2d(x,w,strides=strides,padding=padding) out = tf.nn.bias_add(conv1,b) return tf.nn.relu(out) def Max_Pooling(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME"): return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') def FC(x,w_shape,b_shape,parameters): w = tf.Variable(initial_value=tf.random_normal(shape=w_shape,dtype=tf.float32)) b = tf.Variable(initial_value=tf.zeros(shape=b_shape)) parameters += [w, b] fc = tf.matmul(x,w) fc = tf.nn.bias_add(fc,b) return tf.nn.relu(fc) def Last_FC(x,w_shape,b_shape): w = tf.Variable(initial_value=tf.random_normal(shape=w_shape,dtype=tf.float32)) b = tf.Variable(initial_value=tf.zeros(shape=b_shape)) fc = tf.matmul(x,w) fc = tf.nn.bias_add(fc,b) return fc my_parameters = [] x = tf.placeholder(dtype=tf.float32,shape=[None,224,224,3]) y1_ = tf.placeholder(dtype=tf.float32,shape=[None,5]) y2_ = tf.placeholder(dtype=tf.float32,shape=[None,5]) y3_ = tf.placeholder(dtype=tf.float32,shape=[None,5]) y4_ = tf.placeholder(dtype=tf.float32,shape=[None,5]) y5_ = tf.placeholder(dtype=tf.float32,shape=[None,5]) conv1_1 = Conv(x,conv_shape=[3,3,3,64],bias_shape=[64],parameters=my_parameters) conv1_2 = Conv(conv1_1,conv_shape=[3,3,64,64],bias_shape=[64],parameters=my_parameters) pool1 = Max_Pooling(conv1_2) conv2_1 = Conv(pool1,conv_shape=[3,3,64,128],bias_shape=[128],parameters=my_parameters) conv2_2 = Conv(conv2_1,conv_shape=[3,3,128,128],bias_shape=[128],parameters=my_parameters) pool2 = Max_Pooling(conv2_2) conv3_1 = Conv(pool2,conv_shape=[3,3,128,256],bias_shape=[256],parameters=my_parameters) conv3_2 = Conv(conv3_1,conv_shape=[3,3,256,256],bias_shape=[256],parameters=my_parameters) conv3_3 = Conv(conv3_2,conv_shape=[3,3,256,256],bias_shape=[256],parameters=my_parameters) pool3 = Max_Pooling(conv3_3) conv4_1 = Conv(pool3,conv_shape=[3,3,256,512],bias_shape=[512],parameters=my_parameters) conv4_2 = Conv(conv4_1,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters) conv4_3 = Conv(conv4_2,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters) pool4 = Max_Pooling(conv4_3) conv5_1 = Conv(pool4,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters) conv5_2 = Conv(conv5_1,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters) conv5_3 = Conv(conv5_2,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters) pool5 = Max_Pooling(conv5_3) pool5 = tf.reshape(pool5,shape=[-1,7*7*512]) fc1 = FC(pool5,w_shape=[7*7*512,4096],b_shape=[4096],parameters=my_parameters) fc2 = FC(fc1,w_shape=[4096,4096],b_shape=[4096],parameters=my_parameters) fc3 =Last_FC(fc2,w_shape=[4096,5],b_shape=[5]) #因爲一張圖片有5各可能類別,因此使用5各分類器 y1 = tf.nn.softmax(fc3) y2 = tf.nn.softmax(fc3) y3 = tf.nn.softmax(fc3) y4 = tf.nn.softmax(fc3) y5 = tf.nn.softmax(fc3) # y1_1 = tf.clip_by_value(y1,1e-8,tf.reduce_max(y1)) y2_1 = tf.clip_by_value(y2,1e-8,tf.reduce_max(y2)) y3_1 = tf.clip_by_value(y3,1e-8,tf.reduce_max(y3)) y4_1 = tf.clip_by_value(y4,1e-8,tf.reduce_max(y4)) y5_1 = tf.clip_by_value(y5,1e-8,tf.reduce_max(y5)) #定義5個損失 loss1 = tf.reduce_mean(-tf.reduce_sum(y1_*tf.log(y1_1))) loss2 = tf.reduce_mean(-tf.reduce_sum(y2_*tf.log(y2_1))) loss3 = tf.reduce_mean(-tf.reduce_sum(y3_*tf.log(y3_1))) loss4 = tf.reduce_mean(-tf.reduce_sum(y4_*tf.log(y4_1))) loss5 = tf.reduce_mean(-tf.reduce_sum(y5_*tf.log(y5_1))) #取個平均損失 loss = (loss1 + loss2 + loss3 + loss4 + loss5)/5 train = tf.train.AdamOptimizer(learning_rate=1e-6).minimize(loss) #定義各自的精確度 correct_predict1 = tf.equal(tf.argmax(y1_,1),tf.argmax(y1,1)) correct_predict2 = tf.equal(tf.argmax(y2_,1),tf.argmax(y2,1)) correct_predict3 = tf.equal(tf.argmax(y3_,1),tf.argmax(y3,1)) correct_predict4 = tf.equal(tf.argmax(y4_,1),tf.argmax(y4,1)) correct_predict5 = tf.equal(tf.argmax(y5_,1),tf.argmax(y5,1)) auc1 = tf.reduce_mean(tf.cast(correct_predict1,dtype=tf.float32)) auc2 = tf.reduce_mean(tf.cast(correct_predict2,dtype=tf.float32)) auc3 = tf.reduce_mean(tf.cast(correct_predict3,dtype=tf.float32)) auc4 = tf.reduce_mean(tf.cast(correct_predict4,dtype=tf.float32)) auc5 = tf.reduce_mean(tf.cast(correct_predict5,dtype=tf.float32)) #取個平均精度 auc = (auc1 + auc2 + auc3 + auc4 + auc5)/5 train_dataset = tf.data.TFRecordDataset(r"E:\miml-image-data\processed\train.tfrecords") train_dataset = train_dataset.map(parse_tf) train_dataset = train_dataset.batch(16).repeat(1) train_iter = train_dataset.make_one_shot_iterator() train_next_element = train_iter.get_next() test_dataset = tf.data.TFRecordDataset(r"E:\miml-image-data\processed\test.tfrecords") test_dataset = test_dataset.map(parse_tf) test_dataset = test_dataset.batch(16).repeat(1) test_iter = test_dataset.make_one_shot_iterator() test_next_element = test_iter.get_next() init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) weights = np.load(r"D:\vgg16_weight\vgg16_weights.npz") keys = sorted(weights.keys()) for i,k in enumerate(keys): if k == 'fc8_W' or k == 'fc8_b': continue else: session.run(my_parameters[i].assign(weights[k])) count = 0 try: while True: image,label1,label2,label3,label4,label5 = session.run(train_next_element) _,train_loss = session.run(fetches=[train,loss],feed_dict={ x:image,y1_:label1,y2_:label2,y3_:label3,y4_:label4,y5_:label5 }) print("loss=",train_loss) if count % 10 == 0: image, label1, label2, label3, label4, label5 = session.run(test_next_element) test_auc = session.run(fetches=auc,feed_dict={ x: image, y1_: label1, y2_: label2, y3_: label3, y4_: label4, y5_: label5 }) print("auc=",test_auc) count += 1 except tf.errors.OutOfRangeError: print("end!")