深度有趣 | 11 TensorFlow物體檢測

時間 2019-11-30

標籤深度有趣 tensorflow 物體檢測简体版

原文原文鏈接

簡介

TensorFlow提供了用於檢測圖片或視頻中所包含物體的API，詳情可參考如下連接python

github.com/tensorflow/…git

物體檢測和圖片分類不一樣github

圖片分類是將圖片分爲某一類別，即從多個可能的分類中選擇一個，即便能夠按照機率輸出最可能的多個分類，但理論上的正確答案只有一個
物體檢測是檢測圖片中所出現的所有物體而且用矩形（Anchor Box）進行標註，物體的類別能夠包括多種，例如人、車、動物、路標等，即正確答案能夠是多個

經過多個例子，瞭解TensorFlow物體檢測API的使用方法api

這裏使用預訓練好的ssd_mobilenet_v1_coco模型（Single Shot MultiBox Detector），更多可用的物體檢測模型能夠參考這裏數組

github.com/tensorflow/…bash

舉個例子

加載庫ide

# -*- coding: utf-8 -*-

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image

from utils import label_map_util
from utils import visualization_utils as vis_util
複製代碼

定義一些常量函數

PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'
PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt'
NUM_CLASSES = 90
複製代碼

加載預訓練好的模型測試

detection_graph = tf.Graph()
with detection_graph.as_default():
	od_graph_def = tf.GraphDef()
	with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
		od_graph_def.ParseFromString(fid.read())
		tf.import_graph_def(od_graph_def, name='')
複製代碼

加載分類標籤數據ui

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
複製代碼

一個將圖片轉爲數組的輔助函數，以及測試圖片路徑

def load_image_into_numpy_array(image):
	(im_width, im_height) = image.size
	return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
	
TEST_IMAGE_PATHS = ['test_images/image1.jpg', 'test_images/image2.jpg']
複製代碼

使用模型進行物體檢測

with detection_graph.as_default():
	with tf.Session(graph=detection_graph) as sess:
	    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
	    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
	    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
	    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
	    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
	    for image_path in TEST_IMAGE_PATHS:
	    	image = Image.open(image_path)
	    	image_np = load_image_into_numpy_array(image)
	    	image_np_expanded = np.expand_dims(image_np, axis=0)
	    	(boxes, scores, classes, num) = sess.run(
	    		[detection_boxes, detection_scores, detection_classes, num_detections], 
	    		feed_dict={image_tensor: image_np_expanded})
	    	
	    	vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8)
	    	plt.figure(figsize=[12, 8])
	    	plt.imshow(image_np)
	    	plt.show()
複製代碼

檢測結果以下，第一張圖片檢測出了兩隻狗狗

第二張圖片檢測出了一些人和風箏

攝像頭檢測

安裝OpenCV，用於實現和計算機視覺相關的功能，版本爲3.3.0.10

pip install opencv-python opencv-contrib-python -i https://pypi.tuna.tsinghua.edu.cn/simple
複製代碼

查看是否安裝成功，沒有報錯便可

import cv2
tracker = cv2.TrackerMedianFlow_create()
複製代碼

在以上代碼的基礎上進行修改

加載cv2並獲取攝像頭
不斷地從攝像頭獲取圖片
將檢測後的結果輸出

完整代碼以下

# -*- coding: utf-8 -*-

import numpy as np
import tensorflow as tf

from utils import label_map_util
from utils import visualization_utils as vis_util

import cv2
cap = cv2.VideoCapture(0)

PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'
PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt'
NUM_CLASSES = 90

detection_graph = tf.Graph()
with detection_graph.as_default():
	od_graph_def = tf.GraphDef()
	with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
		od_graph_def.ParseFromString(fid.read())
		tf.import_graph_def(od_graph_def, name='')

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

with detection_graph.as_default():
	with tf.Session(graph=detection_graph) as sess:
	    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
	    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
	    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
	    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
	    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
	    while True:
	    	ret, image_np = cap.read()
	    	image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
	    	image_np_expanded = np.expand_dims(image_np, axis=0)
	    	(boxes, scores, classes, num) = sess.run(
	    		[detection_boxes, detection_scores, detection_classes, num_detections], 
	    		feed_dict={image_tensor: image_np_expanded})
	    	
	    	vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8)
	    	
	    	cv2.imshow('object detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))
	    	if cv2.waitKey(25) & 0xFF == ord('q'):
	    		cap.release()
	    		cv2.destroyAllWindows()
	    		break
複製代碼

視頻檢測

使用cv2讀取視頻並獲取每一幀圖片，而後將檢測後的每一幀寫入新的視頻文件

生成的視頻文件只有圖像、沒有聲音，關於音頻的處理以及視頻和音頻的合成，後面再進一步探索

完整代碼以下

# -*- coding: utf-8 -*-

import numpy as np
import tensorflow as tf

from utils import label_map_util
from utils import visualization_utils as vis_util

import cv2
cap = cv2.VideoCapture('絕地逃亡.mov')
ret, image_np = cap.read()
out = cv2.VideoWriter('output.mov', -1, cap.get(cv2.CAP_PROP_FPS), (image_np.shape[1], image_np.shape[0]))

PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'
PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt'
NUM_CLASSES = 90

detection_graph = tf.Graph()
with detection_graph.as_default():
	od_graph_def = tf.GraphDef()
	with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
		od_graph_def.ParseFromString(fid.read())
		tf.import_graph_def(od_graph_def, name='')

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

with detection_graph.as_default():
	with tf.Session(graph=detection_graph) as sess:
	    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
	    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
	    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
	    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
	    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
	    while cap.isOpened():
	    	ret, image_np = cap.read()
	    	if len((np.array(image_np)).shape) == 0:
	    		break

	    	image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
	    	image_np_expanded = np.expand_dims(image_np, axis=0)
	    	
	    	(boxes, scores, classes, num) = sess.run(
	    		[detection_boxes, detection_scores, detection_classes, num_detections], 
	    		feed_dict={image_tensor: image_np_expanded})
	    	
	    	vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8)
	    	out.write(cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))
	    	
cap.release()
out.release()
cv2.destroyAllWindows()
複製代碼

播放處理好的視頻，能夠看到不少地方都有相應的檢測結果

參考

Introduction and Use - Tensorflow Object Detection API Tutorial：pythonprogramming.net/introductio…
Tensorflow Object Detection API：github.com/tensorflow/…
SSD - Single Shot MultiBox Detector：arxiv.org/pdf/1512.02…

視頻講解課程

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。