http://www.javashuo.com/article/p-ejklmcnx-hp.htmlhtml
基於卷積神經網絡的人臉識別項目_使用Tensorflow-gpu+dilib+sklearnpython
學期末了啊,,,最後這我的臉識別的項目弄完了,,有不少的不足,,不少東西都是隻知道怎麼用,可是不知道其背後的邏輯,,感受本身學習東西對於那些潛意識優先級不高的就放棄了,,,emmmgit
這篇文章主要是大體介紹一下咱們最後弄出來的人臉識別項目的成果吧,,整個項目放到了個人github,,能夠直接下載運行的,,不過這也只是一個咱們的思路,,這塊東西仍是多閱讀別人的項目代碼,,而後本身寫一下好些,,根據本身的需求嘗試本身寫代碼,實在不行再看別人的思路,,才能提高本身的思惟吧,,,github
拋開那些花裏胡哨的東西,人臉識別的大體流程只有三個: 人臉數據的錄入、構建卷積神經網絡並訓練模型、識別網絡
不過這前要配置好環境,,我用的Anaconda來配置的總體環境,,,能夠看個人這篇博客 ,,(到能成功驗證環境便可app
具體的運行流程:框架
python init.py
python getFaceByCamera.py
python tensorflow_face.py
python tensorflow_face.py
,,,沒錯仍是這個,,第二次運行會調用識別模塊的,,,這部分的代碼在這裏less
這部分咱們的大體思路是:dom
這一部分的代碼:ide
getFaceByCamera.py
import cv2 import os import dlib import sys import random import shutil def make_video(name): # 錄製視頻 #shutil.rmtree(name) """使用opencv錄像""" cap = cv2.VideoCapture(0) # 默認的攝像頭 # 指定視頻代碼 fourcc = cv2.VideoWriter_fourcc(*"DIVX") out = cv2.VideoWriter('233.avi', fourcc, 20.0, (640,480)) # 設置錄製的視頻的格式 while(cap.isOpened()): ret, frame = cap.read() if ret: out.write(frame) # cv2.imshow('frame',frame) # 等待按鍵q操做關閉攝像頭 if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() # 釋放攝像頭資源 out.release() # 釋放文件資源 cv2.destroyAllWindows() # 關閉拍攝窗口 # 改變圖片的亮度與對比度 def relight(img, light=1, bias=0): w = img.shape[1] h = img.shape[0] #image = [] for i in range(0,w): for j in range(0,h): for c in range(3): tmp = int(img[j,i,c]*light + bias) if tmp > 255: tmp = 255 elif tmp < 0: tmp = 0 img[j,i,c] = tmp return img def getDataByDlib(name): # 利用dlib來實現 output_dir = name # 使用錄入的名字做爲文件夾的名字 size = 64 # 相片的大小爲64*64 if not os.path.exists(output_dir): # 沒有文件夾是主動建立一個 os.makedirs(output_dir) #使用dlib自帶的frontal_face_detector做爲咱們的特徵提取器 detector = dlib.get_frontal_face_detector() # 打開攝像頭 參數爲輸入流,能夠爲攝像頭或視頻文件 #camera = cv2.VideoCapture(0) camera = cv2.VideoCapture("233.avi") # 相片來自上一步拍攝的視頻 index = 1 while True: if (index <= 200): # 每一段視頻只取200張 print('Being processed picture %s' % index) # 顯示處理的過程 # 從攝像頭讀取照片 success, img = camera.read() # 從視頻流中讀取照片 # 轉爲灰度圖片 gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 使用detector進行人臉檢測 dets = detector(gray_img, 1) if success == False: break for i, d in enumerate(dets): x1 = d.top() if d.top() > 0 else 0 y1 = d.bottom() if d.bottom() > 0 else 0 x2 = d.left() if d.left() > 0 else 0 y2 = d.right() if d.right() > 0 else 0 face = img[x1:y1,x2:y2] # 調整圖片的對比度與亮度, 對比度與亮度值都取隨機數,這樣能增長樣本的多樣性 face = relight(face, random.uniform(0.5, 1.5), random.randint(-50, 50)) # 裁剪出人臉相片,大小爲64*64 face = cv2.resize(face, (size,size)) # 顯示最後裁剪出的人臉相片 cv2.imshow('image', face) # 保存到文件下,文件名爲1 - 200.jpg cv2.imwrite(output_dir+'/'+str(index)+'.jpg', face) index += 1 key = cv2.waitKey(30) & 0xff if key == 27: break else: print('Finished!') break # 刪除視頻 shutil.rmtree('./233.avi') def getDataByOpencv2(): # 利用opencv來實現 output_dir = './my_faces' size = 64 if not os.path.exists(output_dir): os.makedirs(output_dir) # 獲取分類器 haar = cv2.CascadeClassifier(r'G:\DIP\Anaconda3\envs\test1\Library\etc\haarcascades\haarcascade_frontalface_default.xml') # 打開攝像頭 參數爲輸入流,能夠爲攝像頭或視頻文件 camera = cv2.VideoCapture("233.avi") n = 1 while 1: if (n <= 10000): print('It`s processing %s image.' % n) # 讀幀 success, img = camera.read() gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = haar.detectMultiScale(gray_img, 1.3, 5) for f_x, f_y, f_w, f_h in faces: face = img[f_y:f_y+f_h, f_x:f_x+f_w] face = cv2.resize(face, (64,64)) ''' if n % 3 == 1: face = relight(face, 1, 50) elif n % 3 == 2: face = relight(face, 0.5, 0) ''' face = relight(face, random.uniform(0.5, 1.5), random.randint(-50, 50)) cv2.imshow('img', face) cv2.imwrite(output_dir+'/'+str(n)+'.jpg', face) n+=1 key = cv2.waitKey(30) & 0xff if key == 27: break else: break if __name__ == '__main__': name = input('please input yourename: ') # 獲取錄入者的名字 name = os.path.join('./image/trainfaces', name) # 生成保存的文件路徑名 make_video(name) # 拍攝視頻 getDataByDlib(name) # 利用dlib處理裁剪人臉原始相片
這一部分大概就是作人臉識別的主要目的了吧,,,
這一部分幾乎網上寫的代碼的框架都是差很少的,,,因此只要理解每一層是什麼意思,,怎麼用就好了,,固然能夠繼續深究下去,,理解每一層爲何這麼作,,這麼作的方法是什麼等等,,
咱們以前在這一部分投入的精力也最多,,出現的問題也是不少的,,其中我感受最印象深入的就是層數的疊加和那個神奇的 loss=0.69.....
層數的疊加就是說大部分的代碼都是作的3層的神經網絡,,可是不多有作多層的,,當時找了不少的資料才一點一點的大體會算怎麼計算下一層
關於那一個神奇的 loss=0.69
,,貌似不一樣的網絡、需求下可能不會出現這種狀況,,,可是若是出現無論比怎麼調參出來的loss都保持在0.69附近的話,,能夠試着查一下這一個問題,,咱們最後的解決方法是再添加一層 batch nomalization層 ,,具體幹什麼的網上有不少講解的,,,加了它就可使loss快速收斂,,,
這一部分的框架都是差很少的,,變化可能有參數的變化,,優化器的不一樣等等
代碼:
#!/usr/bin/python #coding=utf-8 ''' face detect convolution''' # pylint: disable=invalid-name import os import sys import logging as log import matplotlib.pyplot as plt import common import numpy as np from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf import cv2 from sklearn.model_selection import train_test_split import random SIZE = 64 x = tf.placeholder(tf.float32, [None, SIZE, SIZE, 3]) y_ = tf.placeholder(tf.float32, [None, None]) keep_prob_5 = tf.placeholder(tf.float32) keep_prob_75 = tf.placeholder(tf.float32) def weightVariable(shape): ''' build weight variable''' init = tf.random_normal(shape, stddev=0.01) #init = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(init) def biasVariable(shape): ''' build bias variable''' init = tf.random_normal(shape) #init = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(init) def conv2d(x, W): ''' conv2d by 1, 1, 1, 1''' return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def maxPool(x): ''' max pooling''' return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') def dropout(x, keep): ''' drop out''' return tf.nn.dropout(x, keep) def batch_norm_layer(value,is_training=False,name='batch_norm'): ''' 批量歸一化 返回批量歸一化的結果 args: value:表明輸入,第一個維度爲batch_size is_training:當它爲True,表明是訓練過程,這時會不斷更新樣本集的均值與方差。當測試時,要設置成False,這樣就會使用訓練樣本集的均值和方差。 默認測試模式 name:名稱。 ''' if is_training is True: #訓練模式 使用指數加權函數不斷更新均值和方差 return tf.contrib.layers.batch_norm(inputs=value,decay=0.9,updates_collections=None,is_training = True) else: #測試模式 不更新均值和方差,直接使用 return tf.contrib.layers.batch_norm(inputs=value,decay=0.9,updates_collections=None,is_training = False) def cnnLayer(classnum, isTrue): ''' create cnn layer''' # 第一層 W1 = weightVariable([3, 3, 3, 32]) # 卷積核大小(3,3), 輸入通道(3), 輸出通道(32) b1 = biasVariable([32]) conv1 = tf.nn.relu(batch_norm_layer(conv2d(x, W1) + b1, isTrue)) pool1 = maxPool(conv1) # 減小過擬合,隨機讓某些權重不更新 drop1 = dropout(pool1, keep_prob_5) # 32 * 32 * 32 多個輸入channel 被filter內積掉了 # 第二層 W2 = weightVariable([3, 3, 32, 64]) b2 = biasVariable([64]) conv2 = tf.nn.relu(batch_norm_layer(conv2d(drop1, W2) + b2, isTrue)) pool2 = maxPool(conv2) drop2 = dropout(pool2, keep_prob_5) # 64 * 16 * 16 # 第三層 W3 = weightVariable([3, 3, 64, 64]) b3 = biasVariable([64]) conv3 = tf.nn.relu(conv2d(drop2, W3) + b3) pool3 = maxPool(conv3) drop3 = dropout(pool3, keep_prob_5) # 64 * 8 * 8 # 全鏈接層 Wf = weightVariable([8*8*64,512]) bf = biasVariable([512]) drop3_flat = tf.reshape(drop3, [-1, 8*8*64]) dense = tf.nn.relu(tf.matmul(drop3_flat, Wf) + bf) dropf = dropout(dense, keep_prob_75) # 輸出層 Wout = weightVariable([512, classnum]) bout = weightVariable([classnum]) #out = tf.matmul(dropf, Wout) + bout out = tf.add(tf.matmul(dropf, Wout), bout) return out # # 第三層 # W3 = weightVariable([3, 3, 64, 128]) # b3 = biasVariable([128]) # conv3 = tf.nn.relu(batch_norm_layer(conv2d(drop2, W3) + b3, True)) # pool3 = maxPool(conv3) # drop3 = dropout(pool3, keep_prob_5) # 128 * 8 * 8 # # 第四層 # W4 = weightVariable([3, 3, 128, 512]) # b4 = biasVariable([512]) # conv4 = tf.nn.relu(batch_norm_layer(conv2d(drop3, W4) + b4, True)) # pool4 = maxPool(conv4) # drop4 = dropout(pool4, keep_prob_5) # 512 * 4 * 4 # # 第五層 # W5 = weightVariable([3, 3, 512, 1024]) # b5 = biasVariable([1024]) # conv5 = tf.nn.relu(batch_norm_layer(conv2d(drop4, W5) + b5, True)) # pool5 = maxPool(conv5) # drop5 = dropout(pool5, keep_prob_5) # 1024 * 2 * 2 # # 第六層 # W6 = weightVariable([3, 3, 1024, 1024]) # b6 = biasVariable([1024]) # conv6 = tf.nn.relu(conv2d(drop5, W6) + b6) # pool6 = maxPool(conv6) # drop6 = dropout(pool6, keep_prob_5) # 2048 * 1 * 1 # # 全鏈接層 # Wf = weightVariable([1*1*1024, 2048]) # bf = biasVariable([2048]) # drop3_flat = tf.reshape(drop6, [-1, 1*1*1024]) # dense = tf.nn.relu(tf.matmul(drop3_flat, Wf) + bf) # # dense = tf.nn.relu(tf.matmul(max_pool22_flat, Wf) + bf) # dropf = dropout(dense, keep_prob_75) # # 輸出層 # Wout = weightVariable([2048, classnum]) # bout = weightVariable([classnum]) # #out = tf.matmul(dropf, Wout) + bout # out = tf.add(tf.matmul(dropf, Wout), bout) # #return out def train(train_x, train_y, tfsavepath): ''' train''' ##### log.debug('train') # 隨機劃分測試集與訓練集 train_x,test_x,train_y,test_y = train_test_split(train_x, train_y, test_size=0.05, random_state=random.randint(0,100)) # 獲得卷積結果 out = cnnLayer(train_y.shape[1],True) # 參數:圖片數據的總數,圖片的高、寬、通道 train_x = train_x.reshape(train_x.shape[0], SIZE, SIZE, 3) test_x = test_x.reshape(test_x.shape[0], SIZE, SIZE, 3) print('train size:%s, test size:%s' % (len(train_x), len(test_x))) sys.stdout.flush() # 圖片塊,每次取32張圖片 batch_size = 32 num_batch = len(train_x) // batch_size # 交叉熵 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=y_)) # Adam優化器,學習速率:0.001 train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy) # train_step = tf.train.AdadeltaOptimizer(0.001).minimize(cross_entropy) # 比較標籤是否相等,再求的全部數的平均值,tf.cast(強制轉換類型) # 準確率計算公式 accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(y_, 1)), tf.float32)) # 將loss與accuracy保存以供tensorboard使用 tf.summary.scalar('loss', cross_entropy) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() # 數據保存器的初始化 saver = tf.train.Saver() with tf.Session() as sess: # tensorflow初始化 sess.run(tf.global_variables_initializer()) # tensorboard數據保存 summary_writer = tf.summary.FileWriter('./tmp', graph=tf.get_default_graph()) # 迭代80次 for n in range(80): # 每次取32(batch_size)張圖片 for i in range(num_batch): # 訓練集、測試集分塊 batch_x = train_x[i*batch_size : (i+1)*batch_size] batch_y = train_y[i*batch_size : (i+1)*batch_size] # 開始訓練數據,同時訓練三個變量,返回三個數據 _,loss,summary = sess.run([train_step, cross_entropy, merged_summary_op], feed_dict={x:batch_x,y_:batch_y, keep_prob_5:0.5,keep_prob_75:0.75}) # tensorboard記錄數據 summary_writer.add_summary(summary, n*num_batch+i) # 打印損失 print(n*num_batch+i, loss) sys.stdout.flush() # if (n*num_batch+i) % batch_size == 0: # # 獲取測試數據的準確率 # acc = accuracy.eval({x:test_x, y_:test_y, keep_prob_5:1.0, keep_prob_75:1.0}) # print(n*num_batch+i, acc, '--', n) # accc = acc # # 準確率大於0.98時保存並退出 # if acc > 0.95 and n > 2: # # saver.save(sess, './train_faces.model', global_step=n*num_batch+i) # saver.save(sess, tfsavepath) # # saver.save(sess, tfsavepath) # sys.exit(0) # # saver.save(sess, './train_faces.model', global_step=n*num_batch+i) # # saver.save(sess, tfsavepath) # print('accuracy less 0.98, exited!') # 準確率計算表達式 acc = accuracy.eval({x:test_x, y_:test_y, keep_prob_5:1.0, keep_prob_75:1.0}) print('after 80 times run: accuracy is ', acc) sys.stdout.flush() # 模型保存 saver.save(sess, tfsavepath) if __name__ == '__main__': pass
這部分就是將訓練好的模型加載,,而後實時的獲取每一張相片處理等等,,,
tensorflow_face.py 這部分代碼第一次運行會給每個人建立一個標籤,,得到標籤集,,而後和訓練集一同訓練,,,而第二次運行就是識別,,,
import os import logging as log import matplotlib.pyplot as plt import common import numpy as np from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf import cv2 # import convert as myconv import tensorflow_face_conv as myconv import dlib import random from sklearn.model_selection import train_test_split import time import sys # rm import shutil def createdir(*args): # 建立一個文件夾 ''' create dir''' for item in args: if not os.path.exists(item): os.makedirs(item) IMGSIZE = 64 SIZE = 64 rootpath = 'G:\\DIP\\mine' def getpaddingSize(shape): # 將相片兩側填充爲正方形 ''' get size to make image to be a square rect ''' h, w = shape longest = max(h, w) result = (np.array([longest]*4, int) - np.array([h, h, w, w], int)) // 2 return result.tolist() def dealwithimage(img, h=64, w=64): # 裁剪出人臉的圖片 ''' dealwithimage ''' #img = cv2.imread(imgpath) top, bottom, left, right = getpaddingSize(img.shape[0:2]) img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]) img = cv2.resize(img, (h, w)) return img def relight(imgsrc, alpha=1, bias=0): # 更改圖片爲二值圖 '''relight''' imgsrc = imgsrc.astype(float) imgsrc = imgsrc * alpha + bias imgsrc[imgsrc < 0] = 0 imgsrc[imgsrc > 255] = 255 imgsrc = imgsrc.astype(np.uint8) return imgsrc def getfilesinpath(filedir): # 獲得一個文件夾下的全部文件 ''' get all file from file directory''' for (path, dirnames, filenames) in os.walk(filedir): for filename in filenames: if filename.endswith('.jpg'): yield os.path.join(path, filename) for diritem in dirnames: getfilesinpath(os.path.join(path, diritem)) # 遞歸調用獲得改文件夾下的文件 def readimage(pairpathlabel): # 獲得一個文件夾下的照片文件名和標記labels, 返回一個列表 '''read image to list''' imgs = [] labels = [] for filepath, label in pairpathlabel: for fileitem in getfilesinpath(filepath): img = cv2.imread(fileitem) imgs.append(img) labels.append(label) return np.array(imgs), np.array(labels) def onehot(numlist): # 用於獲得一我的的標籤 ''' get one hot return host matrix is len * max+1 demensions''' b = np.zeros([len(numlist), max(numlist)+1]) b[np.arange(len(numlist)), numlist] = 1 return b.tolist() def getfileandlabel(filedir): # 用字典保存一我的名的照片和對應的labels ''' get path and host paire and class index to name''' dictdir = dict([[name, os.path.join(filedir, name)] \ for name in os.listdir(filedir) if os.path.isdir(os.path.join(filedir, name))]) #for (path, dirnames, _) in os.walk(filedir) for dirname in dirnames]) dirnamelist, dirpathlist = dictdir.keys(), dictdir.values() indexlist = list(range(len(dirnamelist))) return list(zip(dirpathlist, onehot(indexlist))), dict(zip(indexlist, dirnamelist)) def main(_): ''' main ''' ''' 人臉識別項目主main函數 + 在第一次運行該文件時,會將上一次拍照保存的多我的的照片數據進行處理,卷積、訓練等獲得一個適合的模型 + 在第二次運行該文件時,會打開攝像頭獲取一個照片,而後根據上一步獲得的模型處理後分類(識別)出照片上出現的人臉是以前錄入的全部人中哪個 + 目前沒法判斷其餘未錄入人的人臉,即others ''' #shutil.rmtree('./checkpoint') savepath = './checkpoint/face.ckpt' # 記錄下模型的索引路徑 isneedtrain = False # 不存在時認爲時第一次運行,即進行卷積訓練 if os.path.exists(savepath+'.meta') is False: isneedtrain = True # 根據錄入保存的照片獲得一個label和字典的路徑的列表 pathlabelpair, indextoname = getfileandlabel(rootpath + '\\image\\trainfaces') print(indextoname) sys.stdout.flush() print(pathlabelpair) sys.stdout.flush() # 獲得訓練集、測試集的照片和labels的列表 train_x, train_y = readimage(pathlabelpair) # 將數據集歸一化 ??? train_x = train_x.astype(np.float32) / 255.0 #### log.debug('len of train_x : %s', train_x.shape) if isneedtrain: # first generate all face # 調用另外一文件進行卷積訓練模型 myconv.train(train_x, train_y, savepath) #### # log.debug('training is over, please run again') else: # second recognize faces # 調用下面的函數進行實時識別 testfromcamera(train_x, train_y, savepath) #print(np.column_stack((out, argmax))) def testfromcamera(train_x, train_y, chkpoint): # 打開默認攝像頭 camera = cv2.VideoCapture(0) #haar = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') pathlabelpair, indextoname = getfileandlabel(rootpath + '\\image\\trainfaces') # 獲得預測值 output = myconv.cnnLayer(len(pathlabelpair),False) predict = output # 獲得dlib的人臉檢測器 detector = dlib.get_frontal_face_detector() # 加載模型 saver = tf.train.Saver() with tf.Session() as sess: #sess.run(tf.global_variables_initializer()) saver.restore(sess, chkpoint) n = 1 while 1: if (n <= 20000): print('It`s processing %s image.' % n) sys.stdout.flush() # 間隔0.2s time.sleep(0.2) # 讀幀 success, img = camera.read() # 獲得灰度圖 gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 使用detector進行人臉檢測 faces = detector(gray_img, 1) #faces = haar.detectMultiScale(gray_img, 1.3, 5) for i, d in enumerate(faces): x1 = d.top() if d.top() > 0 else 0 y1 = d.bottom() if d.bottom() > 0 else 0 x2 = d.left() if d.left() > 0 else 0 y2 = d.right() if d.right() > 0 else 0 face = img[x1:y1,x2:y2] face = cv2.resize(face, (IMGSIZE, IMGSIZE)) #could deal with face to train test_x = np.array([face]) test_x = test_x.astype(np.float32) / 255.0 res = sess.run([predict, tf.argmax(output, 1)],\ feed_dict={myconv.x: test_x,\ myconv.keep_prob_5:1.0, myconv.keep_prob_75: 1.0}) print(res, indextoname[res[1][0]], res[0][0][res[1][0]]) sys.stdout.flush() # 獲得一組隨機的顏色值 r = random.randint(0, 255) g = random.randint(0, 255) b = random.randint(0, 255) # 繪製檢測到的人臉的方框 cv2.rectangle(img, (x2,x1),(y2,y1), (r, g, b),3) # if res[0][0][res[1][0]] >= 500: # cv2.putText(img, 'others', (x1, y1 + 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (col, col, col), 2) #顯示名字 # else: # cv2.putText(img, indextoname[res[1][0]], (x1, y1 - 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (col, col, col), 2) #顯示名字 cv2.putText(img, indextoname[res[1][0]], (x2 + 25, y1 + 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (r, g, b), 2) #顯示名字 n+=1 cv2.imshow('img', img) key = cv2.waitKey(30) & 0xff if key == 27: break else: break camera.release() cv2.destroyAllWindows() if __name__ == '__main__': # first generate all face main(0) #onehot([1, 3, 9]) #print(getfileandlabel('./image/trainimages')) #generateface([['./image/trainimages', './image/trainfaces']])
最後的模型的準確度還行,,可是不能識別未錄入者,,它會認爲是與已錄入者最接近的人,,,
acc:
loss:
這個數據是經過tensorboard生成的,,
項目裏還有一個簡單的gui的實現,,由於沒有時間了,,全部沒有好好的優化一下,,,
(end 困。。。。)