OpenCV圖像處理以及人臉識別

OpenCV基礎

OpenCV是一個開源的計算機視覺庫。提供了不少圖像處理經常使用的工具git

批註:本文全部圖片數據都在個人GitHub倉庫github

讀取圖片並顯示

import numpy as np
import cv2 as cv

original = cv.imread('../machine_learning_date/forest.jpg')
cv.imshow('Original', original)

顯示圖片某個顏色通道的圖像

blue = np.zeros_like(original)
blue[:, :, 0] = original[:, :, 0]   # 0 - 藍色通道
cv.imshow('Blue', blue)
green = np.zeros_like(original)
green[:, :, 1] = original[:, :, 1]  # 1 - 綠色通道
cv.imshow('Green', green)
red = np.zeros_like(original)
red[:, :, 2] = original[:, :, 2]    # 2 - 紅色通道
cv.imshow('Red', red)

  

圖像剪裁

h, w = original.shape[:2]       # (397, 600)
l, t = int(w / 4), int(h / 4)           # 左上
r, b = int(w * 3 / 4), int(h * 3 / 4)   # 右下
cropped = original[t:b, l:r]
cv.imshow('Cropped', cropped)

圖像縮放

cv2.resize(src,dsize,dst=None,fx=None,fy=None,interpolation=None)app

參數ide

  • scr:原圖
  • dsize:輸出圖像尺寸
  • fx:沿水平軸的比例因子
  • fy:沿垂直軸的比例因子
  • interpolation:插值方法
# 輸出圖像大小=輸入圖像大小/4
scaled1 = cv.resize(original, (int(w / 4), int(h / 4)), interpolation=cv.INTER_LINEAR)
cv.imshow('Scaled1', scaled1)

# 原圖像大小,沿x軸,y軸的縮放係數
scaled2 = cv.resize(scaled1, None, fx=4, fy=4, interpolation=cv.INTER_LINEAR)
cv.imshow('Scaled2', scaled2)
cv.waitKey()        # 等待用戶按鍵觸發,或者按 Ese 鍵 中止等待

圖像文件保存

cv.imwrite('../ml_data/blue.jpg', blue)

邊緣檢測

物體的邊緣檢測是物體識別經常使用的手段。邊緣檢測經常使用亮度梯度方法。經過識別亮度梯度變化最大的像素點從而檢測出物體的邊緣。工具

import cv2 as cv
# 讀取並展現圖像
original = cv.imread('../machine_learning_date/chair.jpg', cv.IMREAD_GRAYSCALE)
cv.imshow('Original', original)

索貝爾邊緣識別

cv.Sobel(original, cv.CV_64F, 1, 0, ksize=5)測試

參數編碼

  • src:源圖像
  • ddepth:cv.CV_64F:卷積運算使用數據類型爲64位浮點型(保證微分的精度)
  • dx:1表示取水平方向索貝爾偏微分
  • dy:0表示不取垂直方向索貝爾偏微分
  • ksize:卷積核爲5*5的方陣

水平方向索貝爾偏微分spa

hsobel = cv.Sobel(original, cv.CV_64F, 1, 0, ksize=5)
cv.imshow('H-Sobel', hsobel)

垂直方向索貝爾偏微分3d

vsobel = cv.Sobel(original, cv.CV_64F, 0, 1, ksize=5)
cv.imshow('V-Sobel', vsobel)

水平和垂直方向索貝爾偏微分rest

sobel = cv.Sobel(original, cv.CV_64F, 1, 1, ksize=5)
cv.imshow('Sobel', sobel)

拉普拉斯邊緣識別

cv.Laplacian(original, cv.CV_64F)

laplacian = cv.Laplacian(original, cv.CV_64F)
cv.imshow('Laplacian', laplacian)

Canny邊緣識別

cv.Canny(original, 50, 240)

  • image:輸入圖像
  • threshold1:50,水平方向閾值

  • threshold1:240,垂直方向閾值
canny = cv.Canny(original, 50, 80)
cv.imshow('Canny', canny)
cv.waitKey()

亮度提高

OpenCV提供了直方圖均衡化的方式實現亮度提高,更有利於邊緣識別與物體識別模型的訓練。

彩色圖轉爲灰度圖

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)

直方圖均衡化

equalized_gray = cv.equalizeHist(gray)

案例:

讀取圖像

import cv2 as cv
# 讀取圖片
original = cv.imread('../machine_learning_date/sunrise.jpg')
cv.imshow('Original', original)     # 顯示圖片

彩色圖轉爲灰度圖

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)

灰度圖直方圖均衡化

equalized_gray = cv.equalizeHist(gray)
cv.imshow('Equalized Gray', equalized_gray)

YUV:亮度,色度,飽和度

yuv = cv.cvtColor(original, cv.COLOR_BGR2YUV)
yuv[..., 0] = cv.equalizeHist(yuv[..., 0])  # 亮度 直方圖均衡化
yuv[..., 1] = cv.equalizeHist(yuv[..., 1])  # 色度 直方圖均衡化
yuv[..., 2] = cv.equalizeHist(yuv[..., 2])  # 飽和度 直方圖均衡化
equalized_color = cv.cvtColor(yuv, cv.COLOR_YUV2BGR)
cv.imshow('Equalized Color', equalized_color)
cv.waitKey()

角點檢測

平直棱線的交匯點(顏色梯度方向改變的像素點的位置)

Harris角點檢測器

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)

corners = cv.cornerHarris(gray, 7, 5, 0.04)

  • src:輸入單通道8位或浮點圖像。
  • blockSize:角點檢測區域大小
  • ksize:Sobel求導中使用的窗口大小
  • k:邊緣線方向改變超過閾值0.04弧度即爲一個角點,通常取[0.04 0.06]

案例:

import cv2 as cv

original = cv.imread('../machine_learning_date/box.png')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)     # 轉換成灰度,減小計算量
cv.imshow('Gray', gray)
corners = cv.cornerHarris(gray, 7, 5, 0.04)         # Harris角點檢測器
# 圖像混合
mixture = original.copy()
mixture[corners > corners.max() * 0.01] = [0, 0, 255]   # BGR [0, 0, 255]變紅
cv.imshow('Corner', mixture)
cv.waitKey()

圖像識別

特徵點檢測

經常使用特徵點檢測有:STAR特徵點檢測 / SIFT特徵點檢測

特徵點檢測結合了 邊緣檢測 與 角點檢測 從而識別出圖形的特徵點

STAR特徵點檢測相關API以下:

star = cv.xfeatures2d.StarDetector_create()  # 建立STAR特徵點檢測器

keypoints = star.detect(gray)     # 檢測出gray圖像全部的特徵點

把全部的特徵點繪製在mixture圖像中

cv.drawKeypoints(original, keypoints, mixture, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

參數:

  • image:原圖片
  • keypoints:源圖像中的關鍵點
  • outImage:輸出圖片
  • flags:標誌設置圖形特徵

案例:

import cv2 as cv

original = cv.imread('../machine_learning_date/table.jpg')
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)  # 變成灰度圖,減小計算
cv.imshow('Gray', gray)
star = cv.xfeatures2d.StarDetector_create()  # 建立STAR特徵點檢測器
keypoints = star.detect(gray)  # 檢測出gray圖像全部的特徵點
mixture = original.copy()
# drawKeypoints方法能夠把全部的特徵點繪製在mixture圖像中
cv.drawKeypoints(original, keypoints, mixture,
                 flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv.imshow('Mixture', mixture)
cv.waitKey()

 

SIFT特徵點檢測相關API:

sift = cv.xfeatures2d.SIFT_create()   # 建立SIFT特徵點檢測器
keypoints = sift.detect(gray)        # 檢測出gray圖像全部的特徵點

案例:

import cv2 as cv

original = cv.imread('../machine_learning_date/table.jpg')
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
sift = cv.xfeatures2d.SIFT_create()  # 建立SIFT特徵點檢測器
keypoints = sift.detect(gray)  # 檢測出gray圖像全部的特徵點
mixture = original.copy()
# 把全部的特徵點繪製在mixture圖像中
cv.drawKeypoints(original, keypoints, mixture,
                 flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv.imshow('Mixture', mixture)
cv.waitKey()

  

特徵值矩陣

圖像特徵值矩陣(描述)記錄了圖像的特徵點以及每一個特徵點的梯度信息,類似圖像的特徵值矩陣也類似。這樣只要有足夠多的樣本,就能夠基於隱馬爾科夫模型進行圖像內容的識別。

特徵值矩陣相關API:

sift = cv.xfeatures2d.SIFT_create()
keypoints = sift.detect(gray)
_, desc = sift.compute(gray, keypoints)

案例:

import cv2 as cv
import matplotlib.pyplot as plt

original = cv.imread('../machine_learning_date/table.jpg')
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
sift = cv.xfeatures2d.SIFT_create()     # 建立SIFT特徵點檢測器
keypoints = sift.detect(gray)           # 檢測出gray圖像全部的特徵點
_, desc = sift.compute(gray, keypoints)
print(desc.shape)       # (454, 128)
plt.matshow(desc.T, cmap='jet', fignum='Description')
plt.title('Description')
plt.xlabel('Feature')
plt.ylabel('Sample')
plt.tick_params(which='both', top=False, labeltop=False, labelbottom=True, labelsize=10)
plt.show()

物體識別

一、讀取training文件夾中的訓練圖片樣本,每一個圖片對應一個desc矩陣,每一個desc都有一個類別(car)

二、把全部類別爲car的desc合併在一塊兒,造成訓練集

| desc |       |
| desc | car |
| desc | |
.....

  由上述訓練集樣本能夠訓練一個用於匹配car的HMM。

三、訓練3個HMM分別對應每一個物體類別。 保存在列表中。

四、讀取testing文件夾中的測試樣本,整理測試樣本

| desc | car   |
| desc | moto |

五、針對每個測試樣本:

  1. 分別使用3個HMM模型,對測試樣本計算score得分。
  2. 取3個模型中得分最高的模型所屬類別做爲預測類別。
import os
import numpy as np
import cv2 as cv
import hmmlearn.hmm as hl


def search_files(directory):
    directory = os.path.normpath(directory)

    objects = {}
    for curdir, subdirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.jpg'):
                label = curdir.split(os.path.sep)[-1]
                if label not in objects:
                    objects[label] = []
                path = os.path.join(curdir, file)
                objects[label].append(path)
    return objects


# 加載訓練集樣本數據,訓練模型,模型存儲
train_objects = search_files('../machine_learning_date/objects/training')
train_x, train_y = [], []
for label, filenames in train_objects.items():
    descs = np.array([])
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        # 範圍縮放,使特徵描述矩陣樣本數量一致
        h, w = gray.shape[:2]
        f = 200 / min(h, w)
        gray = cv.resize(gray, None, fx=f, fy=f)
        sift = cv.xfeatures2d.SIFT_create()     # 建立SIFT特徵點檢測器
        keypoints = sift.detect(gray)       # 檢測出gray圖像全部的特徵點
        _, desc = sift.compute(gray, keypoints)     # 轉換成特徵值矩陣
        if len(descs) == 0:
            descs = desc
        else:
            descs = np.append(descs, desc, axis=0)
    train_x.append(descs)
    train_y.append(label)
models = {}
for descs, label in zip(train_x, train_y):
    model = hl.GaussianHMM(n_components=4, covariance_type='diag', n_iter=100)
    models[label] = model.fit(descs)

# 測試模型
test_objects = search_files('../machine_learning_date/objects/testing')
test_x, test_y = [], []
for label, filenames in test_objects.items():
    descs = np.array([])
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        sift = cv.xfeatures2d.SIFT_create()
        keypoints = sift.detect(gray)
        _, desc = sift.compute(gray, keypoints)
        if len(descs) == 0:
            descs = desc
        else:
            descs = np.append(descs, desc, axis=0)
    test_x.append(descs)
    test_y.append(label)

# 遍歷全部測試樣本  使用model匹配測試樣本查看每一個模型的匹配分數
pred_y = []
for descs, test_label in zip(test_x, test_y):
    best_score, best_label = None, None
    for pred_label, model in models.items():
        score = model.score(descs)
        if (best_score == None) or (best_score < score):
            best_score = score
            best_label = pred_label
        print(test_label, '->', pred_label, score)
        # airplane -> airplane -373374.23370679974
        # airplane -> car -374022.20182585815
        # airplane -> motorbike -374127.46289302857
        # car -> airplane -163638.3153800373
        # car -> car -163691.52001099114
        # car -> motorbike -164410.0557508754
        # motorbike -> airplane -467472.6294620241
        # motorbike -> car -470149.6143097087
        # motorbike -> motorbike -464606.0040570249
    pred_y.append(best_label)

print(test_y)       # ['airplane', 'car', 'motorbike']
print(pred_y)       # ['airplane', 'airplane', 'motorbike']

人臉識別

人臉識別與圖像識別的區別在於人臉識別須要識別出兩我的的不一樣點。

視頻捕捉

經過OpenCV訪問視頻捕捉設備(視頻頭),從而獲取圖像幀。

視頻捕捉相關API:

import cv2 as cv
​
# 獲取視頻捕捉設備
video_capture = cv.VideoCapture(0)
# 讀取一幀
frame = video_capture.read()[1]
cv.imshow('VideoCapture', frame)
# 釋放視頻捕捉設備
video_capture.release()
# 銷燬cv的全部窗口
cv.destroyAllWindows()

案例:

import cv2 as cv

# 獲取視頻捕獲設備
video_capture = cv.VideoCapture(0)

# 讀取一幀
while True:
    frame = video_capture.read()[1]
    cv.imshow('frame', frame)
    # 每隔33毫秒自動更新圖像
    if cv.waitKey(33) == 27:  # 退出鍵是27(Esc)
        break

video_capture.release()
cv.destroyAllWindows()

人臉定位

哈爾級聯人臉定位

import cv2 as cv
# 經過特徵描述文件構建哈爾級聯人臉識別器
fd = cv.CascadeClassifier('../data/haar/face.xml')
# 從一個圖像中識別出全部的人臉區域
#   1.3:爲最小的人臉尺寸
#   5:最多找5張臉
# 返回:
#   faces: 抓取人臉(矩形區域)列表 [(l,t,w,h),(),()..]
faces = fd.detectMultiScale(frame, 1.3, 5)
face = faces[0] # 第一張臉
# 繪製橢圓
cv.ellipse(
    frame,              # 圖像
    (l + a, t + b),     # 橢圓心
    (a, b),             # 半徑
    0,                  # 橢圓旋轉角度
    0, 360,             # 起始角, 終止角
    (255, 0, 255),      # 顏色
    2                   # 線寬
)

案例:

import cv2 as cv
# 哈爾級聯人臉定位器
fd = cv.CascadeClassifier('../../data/haar/face.xml')
ed = cv.CascadeClassifier('../../data/haar/eye.xml')
nd = cv.CascadeClassifier('../../data/haar/nose.xml')
vc = cv.VideoCapture(0)
while True:
    frame = vc.read()[1]
    faces = fd.detectMultiScale(frame, 1.3, 5)
    for l, t, w, h in faces:
        a, b = int(w / 2), int(h / 2)
        cv.ellipse(frame, (l + a, t + b), (a, b), 0, 0, 360, (255, 0, 255), 2)
        face = frame[t:t + h, l:l + w]
        eyes = ed.detectMultiScale(face, 1.3, 5)
        for l, t, w, h in eyes:
            a, b = int(w / 2), int(h / 2)
            cv.ellipse(face, (l + a, t + b), (a, b), 0, 0, 360, (0, 255, 0), 2)
        noses = nd.detectMultiScale(face, 1.3, 5)
        for l, t, w, h in noses:
            a, b = int(w / 2), int(h / 2)
            cv.ellipse(face, (l + a, t + b), (a, b), 0, 0, 360, (0, 255, 255), 2)
    cv.imshow('VideoCapture', frame)
    if cv.waitKey(33) == 27:
        break
vc.release()
cv.destroyAllWindows()

人臉識別

簡單人臉識別:OpenCV的LBPH(局部二值模式直方圖)

  1. 讀取樣本圖片數據,整理圖片的路徑列表
  2. 讀取每張圖片,基於haar裁剪每張人臉,把人臉數據放入train_x,做爲訓練數據。在整理train_y時,因爲Bob、Sala、Roy是字符串,須要把字符串作一個標籤編碼 LabelEncoder
  3. 遍歷訓練集,把訓練集交給LBPH人臉識別模型進行訓練。
  4. 讀取測試集數據,整理圖片的路徑列表
  5. 遍歷每張圖片,把圖片中的人臉使用相同的方式裁剪,把人臉數據交給LBPH模型進行類別預測,獲得預測結果。
  6. 以圖像的方式輸出結果。
# -*- coding: utf-8 -*-
import os
import numpy as np
import cv2 as cv
import sklearn.preprocessing as sp

fd = cv.CascadeClassifier('../machine_learning_date/haar/face.xml')


def search_faces(directory):
    directory = os.path.normpath(directory)

    faces = {}
    for curdir, subdirs, files in os.walk(directory):
        for jpeg in (file for file in files
                     if file.endswith('.jpg')):
            path = os.path.join(curdir, jpeg)
            label = path.split(os.path.sep)[-2]
            if label not in faces:
                faces[label] = []
            faces[label].append(path)
    return faces


train_faces = search_faces('../machine_learning_date/faces/training')
codec = sp.LabelEncoder()
codec.fit(list(train_faces.keys()))

train_x, train_y = [], []
for label, filenames in train_faces.items():
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100))
        for l, t, w, h in faces:
            train_x.append(gray[t:t + h, l:l + w])
            train_y.append(codec.transform([label])[0])
train_y = np.array(train_y)
'''
訓練集結構:
   train_x  train_y
 -------------------
 | face    | 0     |
 -------------------
 | face    | 1     |
 -------------------
 | face    | 2     |
 -------------------
 | face    | 1     |
 -------------------
'''
# 局部二值模式直方圖人臉識別分類器
model = cv.face.LBPHFaceRecognizer_create()
model.train(train_x, train_y)

# 測試
test_faces = search_faces(
    '../ml_data/faces/testing')
test_x, test_y, test_z = [], [], []
for label, filenames in test_faces.items():
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100))
        for l, t, w, h in faces:
            test_x.append(gray[t:t + h, l:l + w])
            test_y.append(codec.transform([label])[0])
            a, b = int(w / 2), int(h / 2)
            cv.ellipse(image, (l + a, t + b), (a, b), 0, 0, 360, (255, 0, 255), 2)
            test_z.append(image)
test_y = np.array(test_y)
pred_test_y = []
for face in test_x:
    pred_code = model.predict(face)[0]
    pred_test_y.append(pred_code)

print(codec.inverse_transform(test_y))
print(codec.inverse_transform(pred_test_y))

escape = False
while not escape:
    for code, pred_code, image in zip(test_y, pred_test_y, test_z):
        label, pred_label = codec.inverse_transform([code, pred_code])
        text = '{} {} {}'.format(label, '==' if code == pred_code else '!=', pred_label)
        cv.putText(image, text, (10, 60), cv.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 6)
        cv.imshow('Recognizing...', image)
        if cv.waitKey(1000) == 27:
            escape = True
            break
相關文章
相關標籤/搜索