百度AI Studio平臺手勢識別數據集:https://aistudio.baidu.com/aistudio/datasetdetail/2182python
解壓數據集中Dataset.zip文件,刪除數據集文件夾中名爲.DS_Store的文件,獲得如圖目錄結構:
網絡
import os data_path = './Dataset' character_folders = os.listdir(data_path) if os.path.exists('./train_data.txt'): os.remove('./train_data.txt') if os.path.exists('./test_data.txt'): os.remove('./test_data.txt') for character_folder in character_folders: with open('./train_data.txt', 'a') as f_train: with open('./test_data.txt', 'a') as f_test: if character_folder == '.DS_Store': continue character_imgs = os.listdir(os.path.join(data_path, character_folder)) count = 0 for img in character_imgs: if img == '.DS_Store': continue if count % 10 == 0: f_test.write(os.path.join(data_path, character_folder, img) + '\t' + character_folder + '\n') else: f_train.write(os.path.join(data_path, character_folder, img) + '\t' + character_folder + '\n') count += 1 print('數據列表生成完成') 定義訓練集和測試集reader,生成data文件 import os import paddle import numpy as np from PIL import Image from multiprocessing import cpu_count # 定義訓練集和測試集的reader def data_mapper(sample): """讀取圖片,對圖片進行歸一化處理,返回圖片和標籤 """ img, label = sample img = Image.open(img) img = img.resize((100, 100), Image.ANTIALIAS) img = np.array(img).astype('float32') img = img.transpose((2, 0, 1)) # 讀出來的圖像爲rgb圖像,轉至成爲rrr、ggg、bbb img = img / 255.0 return img, label def data_reader(data_list_path): """按照train_list和test_list批量讀取圖片 """ def reader(): with open(data_list_path, 'r') as f: lines = f.readlines() for line in lines: img, label = line.split('\t') yield img, int(label) return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512) train_reader = paddle.batch( reader=paddle.reader.shuffle(reader=data_reader('./train_data.txt'), # shuffle()有一個亂序過程保證訓練結果具備較好泛化能力 buf_size=256), batch_size=32) test_reader = paddle.batch(reader=data_reader('./test_data.txt'), batch_size=32)
說明:當前目錄下生成的 train_data.txt 和 test_data.txt 文件主要用於存儲數據路徑和分類標籤,內容以下,test_data.txt ——>
train_data.txt ——>
app
說明:網絡模型爲簡單的DNN模型,代碼以下函數
import paddle.fluid as fluid from paddle.fluid.dygraph import Linear # 網絡模型定義 class MyDNN(fluid.dygraph.Layer): def __init__(self): super(MyDNN, self).__init__() self.hidden1 = Linear(100, 100, act='relu') self.hidden2 = Linear(100, 100, act='relu') self.hidden3 = Linear(100, 100, act='relu') self.hidden4 = Linear(3 * 100 * 100, 10, act='softmax') def forward(self, input): x = self.hidden1(input) x = self.hidden2(x) x = self.hidden3(x) x = fluid.layers.reshape(x, shape=[-1, 3 * 100 * 100]) y = self.hidden4(x) return y
import paddle import numpy as np import paddle.fluid as fluid import paddle.fluid.layers as layers from PIL import Image from multiprocessing import cpu_count from paddle.fluid.dygraph import Linear # 訓練 with fluid.dygraph.guard(): l_rate = 0.001 model = MyDNN() model.train() opt = fluid.optimizer.SGDOptimizer(learning_rate=l_rate, parameter_list=model.parameters()) # 梯度降低 epochs_num = 10 # 迭代次數 for pass_num in range(epochs_num): for batch_id, data in enumerate(train_reader()): images = np.array([x[0].reshape(3, 100, 100) for x in data], np.float32) labels = np.array([x[1] for x in data]).astype('int64') labels = labels[:, np.newaxis] # 將ny轉換成dygraph接收輸入,該函數實現從numpy.ndarray對象建立一個variable類型對象 image = fluid.dygraph.to_variable(images) label = fluid.dygraph.to_variable(labels) predict = model(image) loss = layers.cross_entropy(predict, label) # 交叉熵 avg_loss = layers.mean(loss) acc = layers.accuracy(predict, label) # 精度計算 if batch_id != 0 and batch_id % 15 == 0: print('train_pass:{}, batch_id:{}, train_loss:{}, acc:{}'.format(pass_num, batch_id, avg_loss.numpy(), acc.numpy()[0], )) avg_loss.backward() # 使用backward()方法執行反向網絡 opt.minimize(avg_loss) # 調用定義的優化器對象的minimize方法進行參數更新 model.clear_gradients() # 每一輪參數更新完成後調用clear_gradients()重置梯度,保證下一輪準確性 fluid.save_dygraph(model.state_dict(), 'MyDNN')
說明:模型訓練時簡單迭代10次,訓練出的模型效果並很差,這裏主要提供一種代碼思路,並未作任何優化。保存的模型文件名爲」MyDNN.pdparams",保存在當前目錄下。測試
訓練過程:
優化
項目目錄結構以下:
代碼以下:ui
import matplotlib.pyplot as plt import paddle.fluid.layers as layers import Gesture_Recognition as GR import paddle.fluid as fluid import numpy as np from PIL import Image from Gesture_Recognition import test_reader with fluid.dygraph.guard(): accs = [] model_dict, _ = fluid.load_dygraph('MyDNN.pdparams') model = GR.MyDNN() model.load_dict(model_dict) # 加載模型 model.eval() # 模型評估 for batch_id, data in enumerate(test_reader()): images = np.array([x[0].reshape(3, 100, 100) for x in data], np.float32) labels = np.array([x[1] for x in data]).astype('int64') labels = labels[:, np.newaxis] image = fluid.dygraph.to_variable(images) label = fluid.dygraph.to_variable(labels) predict = model(image) acc = layers.accuracy(predict, label) # 精度計算 accs.append(acc.numpy()[0]) avg_acc = np.mean(accs) print('平均acc:', avg_acc) # 讀取預測圖象進行預測 def load_image(path): img = Image.open(path) img = img.resize((100, 100), Image.ANTIALIAS) img = np.array(img).astype('float32') img = img.transpose((2, 0, 1)) img = img / 255.0 # print(img.shape) return img # 構建預測動態圖 with fluid.dygraph.guard(): infer_path = '手勢.JPG' model = GR.MyDNN() model_dict, _ = fluid.load_dygraph('MyDNN.pdparams') model.load_dict(model_dict) # 加載模型 model.eval() # 模型評估 infer_img = load_image(infer_path) infer_img = np.array(infer_img).astype('float32') infer_img = infer_img[np.newaxis, :, :, :] infer_img = fluid.dygraph.to_variable(infer_img) result = model(infer_img) print('預測值:', np.argmax(result.numpy())) plt.imshow(Image.open('手勢.JPG')) plt.show()
運行結果:
spa