http://www.javashuo.com/article/p-hsbdkdyj-bv.html (使用tfrecords建立本身數據集)
本節主要是採用矩陣讀取方式
先來看一下咱們的目錄:
![](http://static.javashuo.com/static/loading.gif)
dataset1 和creat_dataset.py 屬於同一目錄 mergeImg1 和mergeImg2 爲Dataset1的兩子目錄(兩類爲例子)目錄中存儲圖像等文件
核心文件
creat_dataset.py 文件以下
#來生成訓練集和測試集的矩陣 import cv2 as cv import numpy as np import os dataset_path = ["mergeImg1","mergeImg2"] #這裏爲了增長限制,只讀取如下倆個(防止有其餘文件夾 干擾) #有效的path def gain_data(path): train_data = [];train_label = [] test_data = [];test_label = [] category = 0 for i in os.listdir(path): #dataset 目錄下的兩類 if i in dataset_path: #讀取指定的文件夾 由於會存在其餘文件夾 filepath = os.path.join(path,i) #目錄下/子目錄 if os.path.isdir(filepath): for file in os.listdir(filepath): #目錄下的文件 filename = os.path.join(filepath,file) img = cv.imread(filename) #打開文件 img = cv.resize(img,(160,160)) #將圖片進行大小設置 train_data.append(img) train_label.append(category) #存儲對應標籤 category = category + 1 #存儲類別變動 (0 , 1) data = np.array(train_data) label = train_label cv.destroyAllWindows() return data,label #標籤轉化函數 (0,0,1,1) --> ([1,0],[1,0],[0,1],[0,1]) def label_cov(train_label): result = [] calss_num = len(set(train_label)) label = [0] * calss_num for i in train_label: label[i-1] = 1 result.append(label) label = [0] * calss_num result = np.array(result) return result #將數據x 和標籤y 進行隨機排列(打亂) 注x和y 應該爲矩陣類型 def shuffle_data(x , y): num_example = x.shape[0] arr = np.arange(num_example) np.random.shuffle(arr) data_train = x[arr] label_train = y[arr] return data_train,label_train def gain_data1(path): #對於數據集1 進行獲取 train_data,train_label = gain_data(os.path.join(path,"Dataset1")) lab = label_cov(train_label) #標籤轉換 lab = np.array(lab) train_data, lab = shuffle_data(train_data, lab) #隨機打亂 return train_data,lab def gain_data2(path): #對於數據集2 獲取 train_data,train_label = gain_data(os.path.join(path,"Dataset2")) lab = label_cov(train_label) #標籤轉換 keras中不須要 tf須要 lab = np.array(lab) train_data, lab = shuffle_data(train_data, lab) #隨機打亂 return train_data,lab a,b= gain_data1("目錄") #a,b返回的就是咱們須要的數據 能夠直接傳入神經網絡中
若是你們有什麼不明白的能夠與我交流。 讀取數據所使用的函數都十分簡單。html