TensorFlow提供了用於檢測圖片或視頻中所包含物體的API,詳情可參考如下連接python
https://github.com/tensorflow/models/tree/master/research/object_detectiongit
物體檢測和圖片分類不一樣github
經過多個例子,瞭解TensorFlow物體檢測API的使用方法api
這裏使用預訓練好的ssd_mobilenet_v1_coco
模型(Single Shot MultiBox Detector),更多可用的物體檢測模型能夠參考這裏數組
加載庫函數
# -*- coding: utf-8 -*- import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from PIL import Image from utils import label_map_util from utils import visualization_utils as vis_util
定義一些常量測試
PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb' PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt' NUM_CLASSES = 90
加載預訓練好的模型ui
detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: od_graph_def.ParseFromString(fid.read()) tf.import_graph_def(od_graph_def, name='')
加載分類標籤數據.net
label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories)
一個將圖片轉爲數組的輔助函數,以及測試圖片路徑
def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8) TEST_IMAGE_PATHS = ['test_images/image1.jpg', 'test_images/image2.jpg']
使用模型進行物體檢測
with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) plt.figure(figsize=[12, 8]) plt.imshow(image_np) plt.show()
檢測結果以下,第一張圖片檢測出了兩隻狗狗
第二張圖片檢測出了一些人和風箏
安裝OpenCV
,用於實現和計算機視覺相關的功能,版本爲3.3.0.10
pip install opencv-python opencv-contrib-python -i https://pypi.tuna.tsinghua.edu.cn/simple
查看是否安裝成功,沒有報錯便可
import cv2 tracker = cv2.TrackerMedianFlow_create()
在以上代碼的基礎上進行修改
cv2
並獲取攝像頭完整代碼以下
# -*- coding: utf-8 -*- import numpy as np import tensorflow as tf from utils import label_map_util from utils import visualization_utils as vis_util import cv2 cap = cv2.VideoCapture(0) PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb' PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt' NUM_CLASSES = 90 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: od_graph_def.ParseFromString(fid.read()) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') while True: ret, image_np = cap.read() image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) image_np_expanded = np.expand_dims(image_np, axis=0) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) cv2.imshow('object detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == ord('q'): cap.release() cv2.destroyAllWindows() break
使用cv2
讀取視頻並獲取每一幀圖片,而後將檢測後的每一幀寫入新的視頻文件
生成的視頻文件只有圖像、沒有聲音,關於音頻的處理以及視頻和音頻的合成,後面再進一步探索
完整代碼以下
# -*- coding: utf-8 -*- import numpy as np import tensorflow as tf from utils import label_map_util from utils import visualization_utils as vis_util import cv2 cap = cv2.VideoCapture('絕地逃亡.mov') ret, image_np = cap.read() out = cv2.VideoWriter('output.mov', -1, cap.get(cv2.CAP_PROP_FPS), (image_np.shape[1], image_np.shape[0])) PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb' PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt' NUM_CLASSES = 90 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: od_graph_def.ParseFromString(fid.read()) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') while cap.isOpened(): ret, image_np = cap.read() if len((np.array(image_np)).shape) == 0: break image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) image_np_expanded = np.expand_dims(image_np, axis=0) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) out.write(cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) cap.release() out.release() cv2.destroyAllWindows()
播放處理好的視頻,能夠看到不少地方都有相應的檢測結果