本文是基於TensorRT 5.0.2基礎上,關於其內部的yolov3_onnx例子的分析和介紹。
本例子展現一個完整的ONNX的pipline,在tensorrt 5.0的ONNX-TensorRT基礎上,基於Yolov3-608網絡進行inference,包含預處理和後處理。node
- 首先,從做者網站下載yolov3,而後將其轉換成onnx形式,接着基於onnx的graph生成一個tensorrt engine;
- 而後,在樣本圖片上進行預處理,並將結果做爲engine的輸入;
- 在inference以後,開始關於包含bounding-box聚類的後處理,而後最終獲得一個新的圖像文件,並將其存放在磁盤上,以便後續肉眼觀察。
假設當前路徑爲:python
TensorRT-5.0.2.6/samples
其對應當前例子文件目錄樹爲:git
# tree python python ├── common.py └── yolov3_onnx ├── coco_labels.txt ├── data_processing.py ├── onnx_to_tensorrt.py ├── README.md ├── requirements.txt ├── yolov3.cfg ├── yolov3.weights ├── dog.jpg └── yolov3_to_onnx.py
其中:github
- yolov3_to_onnx.py:將原始yolov3模型轉換成onnx結構。該腳本會自動下載所須要依賴文件;
- onnx_to_tensorrt.py:將onnx的yolov3轉換成engine而後進行inference。
首先運行:算法
python yolov3_to_onnx.py
就會自動從做者網站下載yolo3的所需依賴api
from __future__ import print_function from collections import OrderedDict import hashlib import os.path import wget import onnx # github網址爲https://github.com/onnx/onnx from onnx import helper from onnx import TensorProto import numpy as np import sys '''main第二步:解析yolov3.cfg ''' class DarkNetParser(object): """定義一個基於DarkNet YOLOv3-608的解析器.""" def __init__(self, supported_layers): """初始化DarkNetParser對象. Keyword argument: supported_layers -- 一個list,其中每一個元素爲字符串,表示支持的層,以DarkNet的命名習慣, """ self.layer_configs = OrderedDict() self.supported_layers = supported_layers self.layer_counter = 0 def parse_cfg_file(self, cfg_file_path): """逐層解析yolov3.cfg文件,以字典形式追加每層的參數到layer_configs Keyword argument: cfg_file_path -- yolov3.cfg文件的路徑 """ with open(cfg_file_path, 'rb') as cfg_file: remainder = cfg_file.read() remainder = remainder.decode('utf-8') # 這一行for py3 while remainder: # 一次次的去處理字符串,若是返回的layer_dict有值,則表示當前已經處理完一個字典 layer_dict, layer_name, remainder = self._next_layer(remainder) if layer_dict: self.layer_configs[layer_name] = layer_dict return self.layer_configs def _next_layer(self, remainder): """將其視爲一個字符串,而後以DarkNet的分隔符來逐段處理. 在最近的分隔符以後,返回層參數和剩下的字符串 如文件中第一個Conv層 ... [convolutional] batch_normalize=1 filters=32 size=3 stride=1 pad=1 activation=leaky ... 會變成以下形式字典: {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32, 'batch_normalize': 1, 'type': 'convolutional', 'size': 3}. '001_convolutional' 是層名layer_name, 後續全部字符以remainder表示的字符串返回 Keyword argument: remainder -- 仍須要處理的字符串 """ # head,tail方式 # 讀取'[',而後獲取tail remainder = remainder.split('[', 1) if len(remainder) == 2: remainder = remainder[1] else: return None, None, None # 讀取‘]’,而後獲取tail remainder = remainder.split(']', 1) if len(remainder) == 2: layer_type, remainder = remainder else: return None, None, None # 過濾註釋行 if remainder.replace(' ', '')[0] == '#': remainder = remainder.split('\n', 1)[1] # 1空行視爲分塊的分隔符,這裏讀取head表示的分塊 layer_param_block, remainder = remainder.split('\n\n', 1) # 處理獲得的分塊,並以'\n'將該塊劃分紅行爲元素的列表,等待處理 layer_param_lines = layer_param_block.split('\n')[1:] layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type # 當前塊命名 layer_dict = dict(type=layer_type) # 若是當前層是支持的,則進行處理,如yolo就不支持 if layer_type in self.supported_layers: for param_line in layer_param_lines: if param_line[0] == '#': continue # 解析每一行 param_type, param_value = self._parse_params(param_line) layer_dict[param_type] = param_value self.layer_counter += 1 return layer_dict, layer_name, remainder def _parse_params(self, param_line): """解析每一行參數,當遇到layers時,返回list,其他返回字符串,整數,浮點數類型. Keyword argument: param_line -- 塊中的一行須要解析的參數行 """ param_line = param_line.replace(' ', '') # 緊湊一下 param_type, param_value_raw = param_line.split('=') # 以‘=’劃分 param_value = None # 若是當前參數是layers,則以列表形式返回 if param_type == 'layers': layer_indexes = list() for index in param_value_raw.split(','): layer_indexes.append(int(index)) param_value = layer_indexes # 不然先檢測是不是整數,仍是浮點數,否則就返回字符串類型 elif isinstance(param_value_raw, str) and not param_value_raw.isalpha(): condition_param_value_positive = param_value_raw.isdigit() condition_param_value_negative = param_value_raw[0] == '-' and \ param_value_raw[1:].isdigit() if condition_param_value_positive or condition_param_value_negative: param_value = int(param_value_raw) else: param_value = float(param_value_raw) else: param_value = str(param_value_raw) return param_type, param_value '''main第四步:被第三步類的_make_onnx_node方法調用 ''' class MajorNodeSpecs(object): """Helper class用於存儲ONNX輸出節點的信息,對應DarkNet 層的輸出和該層輸出通道, 一些DarkNet層並未被建立,所以沒有對應的ONNX 節點, 不過仍然須要對其進行追蹤以創建skip 鏈接 """ def __init__(self, name, channels): """ 初始化一個MajorNodeSpecs對象 Keyword arguments: name -- ONNX節點的名稱 channels -- 該節點的輸出通道的數量 """ self.name = name self.channels = channels # 對於yolov3.cfg中三層yolo層,這裏表示該節點並不是onnx節點,默認複製false # 其餘如卷積,上採樣等都是被賦予true self.created_onnx_node = False if name is not None and isinstance(channels, int) and channels > 0: self.created_onnx_node = True '''main第四步:被第三步類的_make_conv_node方法調用 ''' class ConvParams(object): """Helper class用於存儲卷積層的超參數,包括在ONNX graph中的前置name和 爲了卷積,偏置,BN等權重指望的維度 另外該類還扮演着爲全部權重生成安全名稱的封裝,並檢查合適的組合搭配 """ def __init__(self, node_name, batch_normalize, conv_weight_dims): """基於base 節點名稱 (e.g. 101_convolutional),BN設置,卷積權重shape的構造器 Keyword arguments: node_name -- YOLO卷積層的base名稱 batch_normalize -- bool值,表示是否使用BN conv_weight_dims -- 該層的卷積權重的維度 """ self.node_name = node_name self.batch_normalize = batch_normalize assert len(conv_weight_dims) == 4 self.conv_weight_dims = conv_weight_dims def generate_param_name(self, param_category, suffix): """基於兩個字符串輸入生成一個名稱,並檢查組合搭配是否合理""" assert suffix assert param_category in ['bn', 'conv'] assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias']) if param_category == 'bn': assert self.batch_normalize assert suffix in ['scale', 'bias', 'mean', 'var'] elif param_category == 'conv': assert suffix in ['weights', 'bias'] if suffix == 'bias': assert not self.batch_normalize param_name = self.node_name + '_' + param_category + '_' + suffix return param_name '''man第四步:被第三步類的build_onnx_graph方法調用 ''' class WeightLoader(object): """Helper class用於載入序列化的權重, """ def __init__(self, weights_file_path): """讀取YOLOv3權重文件 Keyword argument: weights_file_path --權重文件的路徑. """ self.weights_file = self._open_weights_file(weights_file_path) def load_conv_weights(self, conv_params): """返回權重文件的初始化器和卷積層的輸入tensor Keyword argument: conv_params -- a ConvParams object """ initializer = list() inputs = list() if conv_params.batch_normalize: # 建立BN須要的bias,scale,mean,var等參數 bias_init, bias_input = self._create_param_tensors( conv_params, 'bn', 'bias') bn_scale_init, bn_scale_input = self._create_param_tensors( conv_params, 'bn', 'scale') bn_mean_init, bn_mean_input = self._create_param_tensors( conv_params, 'bn', 'mean') bn_var_init, bn_var_input = self._create_param_tensors( conv_params, 'bn', 'var') # 初始化器擴展; 當前層輸入的擴展 initializer.extend( [bn_scale_init, bias_init, bn_mean_init, bn_var_init]) inputs.extend([bn_scale_input, bias_input, bn_mean_input, bn_var_input]) else: # 處理卷積層; 初始化器擴展; 當前層輸入的擴展 bias_init, bias_input = self._create_param_tensors( conv_params, 'conv', 'bias') initializer.append(bias_init) inputs.append(bias_input) # 建立卷積層權重; 初始化器擴展; 當前層輸入的擴展 conv_init, conv_input = self._create_param_tensors( conv_params, 'conv', 'weights') initializer.append(conv_init) inputs.append(conv_input) return initializer, inputs def _open_weights_file(self, weights_file_path): """打開Yolov3 DarkNet文件流,並跳過開頭. Keyword argument: weights_file_path -- 權重文件路徑 """ weights_file = open(weights_file_path, 'rb') length_header = 5 np.ndarray( shape=(length_header, ), dtype='int32', buffer=weights_file.read( length_header * 4)) return weights_file def _create_param_tensors(self, conv_params, param_category, suffix): """用權重文件中,與輸入tensors一塊兒的權重去初始化一個初始化器. Keyword arguments: conv_params -- a ConvParams object param_category -- the category of parameters to be created ('bn' or 'conv') suffix -- a string determining the sub-type of above param_category (e.g., 'weights' or 'bias') """ param_name, param_data, param_data_shape = self._load_one_param_type( conv_params, param_category, suffix) # 調用onnx.helper.make_tensor initializer_tensor = helper.make_tensor( param_name, TensorProto.FLOAT, param_data_shape, param_data) # 調用onnx.helper.make_tensor_value_info input_tensor = helper.make_tensor_value_info( param_name, TensorProto.FLOAT, param_data_shape) return initializer_tensor, input_tensor def _load_one_param_type(self, conv_params, param_category, suffix): """基於DarkNet順序進行文件流的反序列化. Keyword arguments: conv_params -- a ConvParams object param_category -- the category of parameters to be created ('bn' or 'conv') suffix -- a string determining the sub-type of above param_category (e.g., 'weights' or 'bias') """ # 生成合理的名稱 param_name = conv_params.generate_param_name(param_category, suffix) channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims if param_category == 'bn': param_shape = [channels_out] elif param_category == 'conv': if suffix == 'weights': param_shape = [channels_out, channels_in, filter_h, filter_w] elif suffix == 'bias': param_shape = [channels_out] param_size = np.product(np.array(param_shape)) # 計算參數的size # 用weights_file.read去逐字節的讀取數據並轉換 param_data = np.ndarray( shape=param_shape, dtype='float32', buffer=self.weights_file.read(param_size * 4)) param_data = param_data.flatten().astype(float) return param_name, param_data, param_shape '''main第三步 ''' class GraphBuilderONNX(object): """用於建立ONNX graph的類,基於以前從yolov3.cfg讀取的網絡結構。該類函數方法有: build_onnx_graph : 構建 _make_onnx_node _make_input_tensor _get_previous_node_specs _make_conv_node _make_shortcut_node _make_route_node _make_upsample_node """ def __init__(self, output_tensors): """用全部DarkNet默認參數來初始化; 而後基於output_tensors指定輸出維度; 以他們的name爲key Keyword argument: output_tensors -- 一個 OrderedDict類型 """ self.output_tensors = output_tensors self._nodes = list() self.graph_def = None self.input_tensor = None self.epsilon_bn = 1e-5 self.momentum_bn = 0.99 self.alpha_lrelu = 0.1 self.param_dict = OrderedDict() self.major_node_specs = list() self.batch_size = 1 def build_onnx_graph( self, layer_configs, weights_file_path, verbose=True): """基於全部的層配置進行迭代,建立一個ONNX graph, 而後用下載的yolov3 權重文件進行填充,最後返回該graph定義. Keyword arguments: layer_configs -- OrderedDict對象,包含全部解析的層的配置 weights_file_path -- 權重文件的位置 verbose -- 是否在建立以後顯示該graph(default: True) """ for layer_name in layer_configs.keys(): layer_dict = layer_configs[layer_name] # 讀取yolov3.cfg中每一層,並將其做爲onnx的節點 major_node_specs = self._make_onnx_node(layer_name, layer_dict) # 若是當前爲主要節點,則追加起來 if major_node_specs.name: self.major_node_specs.append(major_node_specs) outputs = list() for tensor_name in self.output_tensors.keys(): # 將輸出節點進行維度擴充 output_dims = [self.batch_size, ] + \ self.output_tensors[tensor_name] # 調用onnx的helper.make_tensor_value_info構建onnx張量,此時並未填充權重 output_tensor = helper.make_tensor_value_info( tensor_name, TensorProto.FLOAT, output_dims) outputs.append(output_tensor) inputs = [self.input_tensor] weight_loader = WeightLoader(weights_file_path) initializer = list() # self.param_dict在_make_onnx_node中已處理 for layer_name in self.param_dict.keys(): _, layer_type = layer_name.split('_', 1) # 如001_convolutional conv_params = self.param_dict[layer_name] assert layer_type == 'convolutional' initializer_layer, inputs_layer = weight_loader.load_conv_weights( conv_params) initializer.extend(initializer_layer) inputs.extend(inputs_layer) del weight_loader # 調用onnx的helper.make_graph進行onnx graph的構建 self.graph_def = helper.make_graph( nodes=self._nodes, name='YOLOv3-608', inputs=inputs, outputs=outputs, initializer=initializer ) if verbose: print(helper.printable_graph(self.graph_def)) # 調用onnx的helper.make_model進行模型的構建 model_def = helper.make_model(self.graph_def, producer_name='NVIDIA TensorRT sample') return model_def def _make_onnx_node(self, layer_name, layer_dict): """輸入一個layer參數字典,選擇對應的函數來建立ONNX節點,而後將爲圖建立的重要的信息存儲爲 MajorNodeSpec對象 Keyword arguments: layer_name -- layer的名稱 (即layer_configs中的key) layer_dict -- 一個layer參數字典 (layer_configs的value) """ layer_type = layer_dict['type'] # 先檢查self.input_tensor是否爲空,爲空且第一個塊不是net,則報錯,不然處理該net # 能夠看出 這裏只在最開始執行一次,由於後續self.input_tensor都不爲空。 if self.input_tensor is None: if layer_type == 'net': major_node_output_name, major_node_output_channels = self._make_input_tensor( layer_name, layer_dict) major_node_specs = MajorNodeSpecs(major_node_output_name, major_node_output_channels) else: raise ValueError('The first node has to be of type "net".') else: node_creators = dict() node_creators['convolutional'] = self._make_conv_node node_creators['shortcut'] = self._make_shortcut_node node_creators['route'] = self._make_route_node node_creators['upsample'] = self._make_upsample_node # 依次處理不一樣的層,並調用對應node_creators[layer_type]()函數進行處理 if layer_type in node_creators.keys(): major_node_output_name, major_node_output_channels = \ node_creators[layer_type](layer_name, layer_dict) major_node_specs = MajorNodeSpecs(major_node_output_name, major_node_output_channels) else: # 跳過三個yolo層 print( 'Layer of type %s not supported, skipping ONNX node generation.' % layer_type) major_node_specs = MajorNodeSpecs(layer_name, None) return major_node_specs def _make_input_tensor(self, layer_name, layer_dict): """爲net layer建立輸入tensor,並存儲對應batch size.能夠看出,該函數只被調用一次 Keyword arguments: layer_name -- 層的名字 (如 layer_configs中key) layer_dict -- 一個layer參數字典( layer_configs中的value) """ batch_size = layer_dict['batch'] channels = layer_dict['channels'] height = layer_dict['height'] width = layer_dict['width'] self.batch_size = batch_size # 用onnx.helper.make_tensor_value_info構建onnx張量節點 input_tensor = helper.make_tensor_value_info( str(layer_name), TensorProto.FLOAT, [ batch_size, channels, height, width]) self.input_tensor = input_tensor return layer_name, channels def _get_previous_node_specs(self, target_index=-1): """獲取以前建立好的onnx節點(跳過那些沒生成的節點,好比yolo節點). target_index能夠可以直接跳到對應節點. Keyword arguments: target_index -- 可選的參數,幫助跳到具體索引(default: -1 表示跳到前一個元素) """ # 經過反向遍歷,找到最後一個(這裏是第一個)created_onnx_node爲真的節點 previous_node = None for node in self.major_node_specs[target_index::-1]: if node.created_onnx_node: previous_node = node break assert previous_node is not None return previous_node def _make_conv_node(self, layer_name, layer_dict): """用可選的bn和激活函數nonde去建立一個onnx的卷積node Keyword arguments: layer_name -- 層的名字 (如 layer_configs中key) layer_dict -- 一個layer參數字典( layer_configs中的value) """ # 先找最近的一個節點 previous_node_specs = self._get_previous_node_specs() ''' i) 處理卷積層''' # 構建該層的inputs,通道等等信息 inputs = [previous_node_specs.name] previous_channels = previous_node_specs.channels kernel_size = layer_dict['size'] stride = layer_dict['stride'] filters = layer_dict['filters'] # 檢測該層是否有bn batch_normalize = False if 'batch_normalize' in layer_dict.keys( ) and layer_dict['batch_normalize'] == 1: batch_normalize = True kernel_shape = [kernel_size, kernel_size] weights_shape = [filters, previous_channels] + kernel_shape # 構建卷積層的參數層的實例 conv_params = ConvParams(layer_name, batch_normalize, weights_shape) strides = [stride, stride] dilations = [1, 1] # 調用ConvParams.generate_param_name生成合適的參數名稱 weights_name = conv_params.generate_param_name('conv', 'weights') inputs.append(weights_name) if not batch_normalize: bias_name = conv_params.generate_param_name('conv', 'bias') inputs.append(bias_name) # 用onnx.helper.make_node構建onnx的卷積節點 conv_node = helper.make_node( 'Conv', inputs=inputs, outputs=[layer_name], kernel_shape=kernel_shape, strides=strides, auto_pad='SAME_LOWER', dilations=dilations, name=layer_name ) self._nodes.append(conv_node) inputs = [layer_name] layer_name_output = layer_name ''' ii) 處理BN層''' if batch_normalize: layer_name_bn = layer_name + '_bn' bn_param_suffixes = ['scale', 'bias', 'mean', 'var'] for suffix in bn_param_suffixes: bn_param_name = conv_params.generate_param_name('bn', suffix) inputs.append(bn_param_name) batchnorm_node = helper.make_node( 'BatchNormalization', inputs=inputs, outputs=[layer_name_bn], epsilon=self.epsilon_bn, momentum=self.momentum_bn, name=layer_name_bn ) self._nodes.append(batchnorm_node) inputs = [layer_name_bn] layer_name_output = layer_name_bn ''' iii) 處理激活函數''' if layer_dict['activation'] == 'leaky': layer_name_lrelu = layer_name + '_lrelu' lrelu_node = helper.make_node( 'LeakyRelu', inputs=inputs, outputs=[layer_name_lrelu], name=layer_name_lrelu, alpha=self.alpha_lrelu ) self._nodes.append(lrelu_node) inputs = [layer_name_lrelu] layer_name_output = layer_name_lrelu elif layer_dict['activation'] == 'linear': pass else: print('Activation not supported.') self.param_dict[layer_name] = conv_params return layer_name_output, filters def _make_shortcut_node(self, layer_name, layer_dict): """從DarkNet graph中讀取信息,基於onnx 的add 節點建立shortcut 節點. Keyword arguments: layer_name -- 層的名字 (如 layer_configs中key) layer_dict -- 一個layer參數字典( layer_configs中的value) """ shortcut_index = layer_dict['from'] # 當前層與前面哪層shorcut activation = layer_dict['activation'] assert activation == 'linear' first_node_specs = self._get_previous_node_specs() # 最近一層 second_node_specs = self._get_previous_node_specs( target_index=shortcut_index) # 前面具體須要shorcut的層 assert first_node_specs.channels == second_node_specs.channels channels = first_node_specs.channels inputs = [first_node_specs.name, second_node_specs.name] # 用onnx.helper.make_node建立節點 shortcut_node = helper.make_node( 'Add', inputs=inputs, outputs=[layer_name], name=layer_name, ) self._nodes.append(shortcut_node) return layer_name, channels def _make_route_node(self, layer_name, layer_dict): """若是來自DarkNet配置的layer參數只有一個因此,那麼接着在指定(負)索引上建立節點 不然,建立一個onnx concat 節點來實現路由特性. Keyword arguments: layer_name -- 層的名字 (如 layer_configs中key) layer_dict -- 一個layer參數字典( layer_configs中的value) """ # 處理yolov3.cfg中[route] route_node_indexes = layer_dict['layers'] if len(route_node_indexes) == 1: split_index = route_node_indexes[0] assert split_index < 0 # Increment by one because we skipped the YOLO layer: split_index += 1 self.major_node_specs = self.major_node_specs[:split_index] layer_name = None channels = None else: inputs = list() channels = 0 for index in route_node_indexes: if index > 0: # Increment by one because we count the input as a node (DarkNet # does not) index += 1 route_node_specs = self._get_previous_node_specs( target_index=index) inputs.append(route_node_specs.name) channels += route_node_specs.channels assert inputs assert channels > 0 route_node = helper.make_node( 'Concat', axis=1, inputs=inputs, outputs=[layer_name], name=layer_name, ) self._nodes.append(route_node) return layer_name, channels def _make_upsample_node(self, layer_name, layer_dict): """建立一個onnx的Upsample節點. Keyword arguments: layer_name -- 層的名字 (如 layer_configs中key) layer_dict -- 一個layer參數字典( layer_configs中的value) """ upsample_factor = float(layer_dict['stride']) previous_node_specs = self._get_previous_node_specs() inputs = [previous_node_specs.name] channels = previous_node_specs.channels assert channels > 0 upsample_node = helper.make_node( 'Upsample', mode='nearest', # For ONNX versions <0.7.0, Upsample nodes accept different parameters than 'scales': scales=[1.0, 1.0, upsample_factor, upsample_factor], inputs=inputs, outputs=[layer_name], name=layer_name, ) self._nodes.append(upsample_node) return layer_name, channels def generate_md5_checksum(local_path): """計算本地文件的md5 Keyword argument: local_path -- 本地文件路徑 """ with open(local_path) as local_file: data = local_file.read() return hashlib.md5(data).hexdigest() def download_file(local_path, link, checksum_reference=None): """下載指定url到本地,並進行摘要校對. Keyword arguments: local_path -- 本地文件存儲路徑 link -- 須要下載的url checksum_reference -- expected MD5 checksum of the file """ if not os.path.exists(local_path): print('Downloading from %s, this may take a while...' % link) wget.download(link, local_path) print() if checksum_reference is not None: checksum = generate_md5_checksum(local_path) if checksum != checksum_reference: raise ValueError( 'The MD5 checksum of local file %s differs from %s, please manually remove \ the file and try again.' % (local_path, checksum_reference)) return local_path def main(): """Run the DarkNet-to-ONNX conversion for YOLOv3-608.""" # 註釋掉下面的部分, # if sys.version_info[0] > 2: # raise Exception("This is script is only compatible with python2, please re-run this script \ # with python2. The rest of this sample can be run with either version of python") ''' 1 - 下載yolov3的配置文件,並進行摘要驗證''' cfg_file_path = download_file( 'yolov3.cfg', 'https://raw.githubusercontent.com/pjreddie/darknet/f86901f6177dfc6116360a13cc06ab680e0c86b0/cfg/yolov3.cfg', 'b969a43a848bbf26901643b833cfb96c') # DarkNetParser將會只提取這些層的參數,類型爲'yolo'的這三層不能很好的解析, # 由於他們包含在後續的後處理中; supported_layers = ['net', 'convolutional', 'shortcut', 'route', 'upsample'] ''' 2 - 建立一個DarkNetParser對象,並生成一個OrderedDict,包含cfg文件讀取的全部層配置''' parser = DarkNetParser(supported_layers) layer_configs = parser.parse_cfg_file(cfg_file_path) # 在解析完以後,再也不須要該對象 del parser ''' 3 - 實例化一個GraphBuilderONNX類對象,用已知輸出tensor維度進行初始化''' # 在上面的layer_config,有三個輸出是須要知道的,CHW格式 output_tensor_dims = OrderedDict() output_tensor_dims['082_convolutional'] = [255, 19, 19] output_tensor_dims['094_convolutional'] = [255, 38, 38] output_tensor_dims['106_convolutional'] = [255, 76, 76] # 內置yolov3的一些默認參數來進行實例化 builder = GraphBuilderONNX(output_tensor_dims) ''' 4 - 調用GraphBuilderONNX的build_onnx_graph方法 用以前解析好的層配置信息和權重文件,生成ONNX graph''' ''' 從做者官網下載yolov3的權重文件,以此填充tensorrt的network ''' weights_file_path = download_file( 'yolov3.weights', 'https://pjreddie.com/media/files/yolov3.weights', 'c84e5b99d0e52cd466ae710cadf6d84c') yolov3_model_def = builder.build_onnx_graph( layer_configs=layer_configs, weights_file_path=weights_file_path, verbose=True) # 模型定義結束,刪除builder對象 del builder ''' 5 - 在ONNX模型定義上進行健全檢查''' onnx.checker.check_model(yolov3_model_def) ''' 6 - 序列化生成的ONNX graph到文件''' output_file_path = 'yolov3.onnx' onnx.save(yolov3_model_def, output_file_path) if __name__ == '__main__': main()
結果以下:安全
[root@30d4bceec4c4 yolov3_onnx]# python yolov3_to_onnx.py Layer of type yolo not supported, skipping ONNX node generation. Layer of type yolo not supported, skipping ONNX node generation. Layer of type yolo not supported, skipping ONNX node generation. graph YOLOv3-608 ( %000_net[FLOAT, 64x3x608x608] ) initializers ( %001_convolutional_bn_scale[FLOAT, 32] %001_convolutional_bn_bias[FLOAT, 32] %001_convolutional_bn_mean[FLOAT, 32] %001_convolutional_bn_var[FLOAT, 32] %001_convolutional_conv_weights[FLOAT, 32x3x3x3] %002_convolutional_bn_scale[FLOAT, 64] ...... %105_convolutional_conv_weights[FLOAT, 256x128x3x3] %106_convolutional_conv_bias[FLOAT, 255] %106_convolutional_conv_weights[FLOAT, 255x256x1x1] ) { %001_convolutional = Conv[auto_pad = 'SAME_LOWER', dilations = [1, 1], kernel_shape = [3, 3], strides = [1, 1]](%000_net, %001_convolutional_conv_weights) %001_convolutional_bn = BatchNormalization[epsilon = 9.99999974737875e-06, momentum = 0.990000009536743](%001_convolutional, ...... %105_convolutional_bn = BatchNormalization[epsilon = 9.99999974737875e-06, momentum = 0.990000009536743](%105_convolutional, %105_convolutional_bn_scale, %105_convolutional_bn_bias, %105_convolutional_bn_mean, %105_convolutional_bn_var) %105_convolutional_lrelu = LeakyRelu[alpha = 0.100000001490116](%105_convolutional_bn) %106_convolutional = Conv[auto_pad = 'SAME_LOWER', dilations = [1, 1], kernel_shape = [1, 1], strides = [1, 1]](%105_convolutional_lrelu, %106_convolutional_conv_weights, %106_convolutional_conv_bias) return %082_convolutional, %094_convolutional, %106_convolutional }
ps:在該例子中onnx不要安裝1.4.1版本,能夠安裝如1.2.1版本,不然會出現
網絡
接下來看onnx_to_tensorrt.py閉包
from __future__ import print_function import numpy as np import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit from PIL import ImageDraw from yolov3_to_onnx import download_file from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES import sys, os #sys.path.insert(1, os.path.join(sys.path[0], "..")) #import common '''main第3.2步 ''' def allocate_buffers(engine): inputs = [] outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # 分配host和device端的buffer host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) # 將device端的buffer追加到device的bindings. bindings.append(int(device_mem)) # Append to the appropriate list. if engine.binding_is_input(binding): inputs.append(HostDeviceMem(host_mem, device_mem)) else: outputs.append(HostDeviceMem(host_mem, device_mem)) return inputs, outputs, bindings, stream '''main中第3.3步 ''' # 該函數能夠適應多個輸入/輸出;輸入和輸出格式爲HostDeviceMem對象組成的列表 def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): # 將數據移動到GPU [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] # 執行inference. context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle) # 將結果從 GPU寫回到host端 [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] # 同步stream stream.synchronize() # 返回host端的輸出結果 return [out.host for out in outputs] #------------ TRT_LOGGER = trt.Logger() def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'): """在原始輸入圖片上標記bounding box沒而後返回結果. Keyword arguments: image_raw -- a raw PIL Image bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4). categories -- NumPy array containing the corresponding category for each object, with shape (N,) confidences -- NumPy array containing the corresponding confidence for each object, with shape (N,) all_categories -- a list of all categories in the correct ordered (required for looking up the category name) bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue') """ draw = ImageDraw.Draw(image_raw) print(bboxes, confidences, categories) for box, score, category in zip(bboxes, confidences, categories): x_coord, y_coord, width, height = box left = max(0, np.floor(x_coord + 0.5).astype(int)) top = max(0, np.floor(y_coord + 0.5).astype(int)) right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int)) bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int)) draw.rectangle(((left, top), (right, bottom)), outline=bbox_color) draw.text((left, top - 12), '{0} {1:.2f}'.format(all_categories[category], score), fill=bbox_color) return image_raw def get_engine(onnx_file_path, engine_file_path=""): """若是已經有序列化engine,則直接用,不然構建新的tensorrt engine而後保存.""" # 閉包 def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder,\ builder.create_network() as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 30 # 1GB builder.max_batch_size = 1 # 解析模型文件 if not os.path.exists(onnx_file_path): print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # 若是序列化engine已經存在,那麼就直接跳過構建部分. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, \ trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine() def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" ''' 1 - 裝載以前轉換好的onnx,準備測試圖片''' onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # 下載測試圖片 input_image_path = download_file('dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) ''' 2 - 對圖片進行預處理''' # yolov3網絡的輸入size,HW順序 input_resolution_yolov3_HW = (608, 608) # 建立一個預處理來處理任意圖片,以符合yolov3的輸入 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # 載入圖像,並進行預處理 image_raw, image = preprocessor.process(input_image_path) # 將該預處理好的圖像以WH格式存儲,以備後續使用 shape_orig_WH = image_raw.size ''' 3 - 基於tensorrt進行yolov3模型的運行''' # yolov3輸出的三個map的shapeOutput shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # 用 TensorRT進行inference trt_outputs = [] ''' 3.1 - 基於get_engine生成engine''' with get_engine(onnx_file_path, engine_file_path) as engine, \ engine.create_execution_context() as context: ''' 3.2 - 分配host,device端的buffer''' inputs, outputs, bindings, stream = allocate_buffers(engine) print('Running inference on image {}...'.format(input_image_path)) ''' 3.3 - 進行inference''' inputs[0].host = image trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) ''' 4 - 對tensorrt在onnx結構的yolov3上獲得的結果進行後處理''' trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # 對象覆蓋的閾值,[0,1]之間 "nms_threshold": 0.5, # nms的閾值,[0,1]之間 "yolo_input_resolution": input_resolution_yolov3_HW} # 建立後處理類的實例 postprocessor = PostprocessYOLO(**postprocessor_args) # 運行後處理算法,並獲得檢測到對象的bounding box boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) ''' 5 - 在原始輸入圖像上將檢測框標記,並保存png文件 ''' obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path)) if __name__ == '__main__': main()
運行程序及結果:app
python onnx_to_tensorrt.py
此時文件目錄爲:
. ├── coco_labels.txt ├── data_processing.py ├── dog_bboxes.png ├── dog.jpg ├── onnx_to_tensorrt.py ├── __pycache__ │ ├── data_processing.cpython-35.pyc │ └── yolov3_to_onnx.cpython-35.pyc ├── README.md ├── requirements.txt ├── yolov3.cfg ├── yolov3.onnx ├── yolov3_to_onnx.py ├── yolov3.trt └── yolov3.weights
最後咱們來看下data_processing.py
import math from PIL import Image import numpy as np # YOLOv3-608 has been trained with these 80 categories from COCO: # Lin, Tsung-Yi, et al. "Microsoft COCO: Common Objects in Context." # European Conference on Computer Vision. Springer, Cham, 2014. def load_label_categories(label_file_path): categories = [line.rstrip('\n') for line in open(label_file_path)] return categories LABEL_FILE_PATH = 'coco_labels.txt' ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH) # 肯定有80個類別: CATEGORY_NUM = len(ALL_CATEGORIES) assert CATEGORY_NUM == 80 class PreprocessYOLO(object): """裝載圖像,而後reshape成yolov3-608須要的分辨率. """ def __init__(self, yolo_input_resolution): """指定yolov3的輸入分辨率. Keyword arguments: yolo_input_resolution -- two-dimensional tuple with the target network's (spatial) input resolution in HW order """ self.yolo_input_resolution = yolo_input_resolution def process(self, input_image_path): """載入圖像,而後進行預處理,如resize,歸一化等等 Keyword arguments: input_image_path -- string path of the image to be loaded """ image_raw, image_resized = self._load_and_resize(input_image_path) image_preprocessed = self._shuffle_and_normalize(image_resized) return image_raw, image_preprocessed def _load_and_resize(self, input_image_path): """對圖像進行resize,而後返回numpy對象 Keyword arguments: input_image_path -- string path of the image to be loaded """ image_raw = Image.open(input_image_path) new_resolution = ( self.yolo_input_resolution[1], self.yolo_input_resolution[0]) image_resized = image_raw.resize( new_resolution, resample=Image.BICUBIC) image_resized = np.array(image_resized, dtype=np.float32, order='C') return image_raw, image_resized def _shuffle_and_normalize(self, image): """將圖像歸一化到[0,1]之間,而後將HWC結構轉換成NCHW結構 Keyword arguments: image -- image as three-dimensional NumPy float array, in HWC format """ image /= 255.0 # HWC -> CHW : image = np.transpose(image, [2, 0, 1]) # CHW -> NCHW image = np.expand_dims(image, axis=0) # j將圖像轉換成row-major order,如 "C order": image = np.array(image, dtype=np.float32, order='C') return image class PostprocessYOLO(object): """後處理yolov3-608的三個輸出tensor.""" def __init__(self, yolo_masks, yolo_anchors, obj_threshold, nms_threshold, yolo_input_resolution): """Initialize with all values that will be kept when processing several frames. Assuming 3 outputs of the network in the case of (large) YOLOv3. Keyword arguments: yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors object_threshold -- threshold for object coverage, float value between 0 and 1 nms_threshold -- threshold for non-max suppression algorithm, float value between 0 and 1 input_resolution_yolo -- two-dimensional tuple with the target network's (spatial) input resolution in HW order """ self.masks = yolo_masks self.anchors = yolo_anchors self.object_threshold = obj_threshold self.nms_threshold = nms_threshold self.input_resolution_yolo = yolo_input_resolution def process(self, outputs, resolution_raw): """Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them and return a list of bounding boxes for detected object together with their category and their confidences in separate lists. Keyword arguments: outputs -- outputs from a TensorRT engine in NCHW format resolution_raw -- the original spatial resolution from the input PIL image in WH order """ outputs_reshaped = list() for output in outputs: outputs_reshaped.append(self._reshape_output(output)) boxes, categories, confidences = self._process_yolo_output( outputs_reshaped, resolution_raw) return boxes, categories, confidences def _reshape_output(self, output): """Reshape a TensorRT output from NCHW to NHWC format (with expected C=255), and then return it in (height,width,3,85) dimensionality after further reshaping. Keyword argument: output -- an output from a TensorRT engine after inference """ output = np.transpose(output, [0, 2, 3, 1]) _, height, width, _ = output.shape dim1, dim2 = height, width dim3 = 3 # There are CATEGORY_NUM=80 object categories: dim4 = (4 + 1 + CATEGORY_NUM) return np.reshape(output, (dim1, dim2, dim3, dim4)) def _process_yolo_output(self, outputs_reshaped, resolution_raw): """Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return return a list of bounding boxes for detected object together with their category and their confidences in separate lists. Keyword arguments: outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays with shape (height,width,3,85) resolution_raw -- the original spatial resolution from the input PIL image in WH order """ # E.g. in YOLOv3-608, there are three output tensors, which we associate with their # respective masks. Then we iterate through all output-mask pairs and generate candidates # for bounding boxes, their corresponding category predictions and their confidences: boxes, categories, confidences = list(), list(), list() for output, mask in zip(outputs_reshaped, self.masks): box, category, confidence = self._process_feats(output, mask) box, category, confidence = self._filter_boxes(box, category, confidence) boxes.append(box) categories.append(category) confidences.append(confidence) boxes = np.concatenate(boxes) categories = np.concatenate(categories) confidences = np.concatenate(confidences) # Scale boxes back to original image shape: width, height = resolution_raw image_dims = [width, height, width, height] boxes = boxes * image_dims # Using the candidates from the previous (loop) step, we apply the non-max suppression # algorithm that clusters adjacent bounding boxes to a single bounding box: nms_boxes, nms_categories, nscores = list(), list(), list() for category in set(categories): idxs = np.where(categories == category) box = boxes[idxs] category = categories[idxs] confidence = confidences[idxs] keep = self._nms_boxes(box, confidence) nms_boxes.append(box[keep]) nms_categories.append(category[keep]) nscores.append(confidence[keep]) if not nms_categories and not nscores: return None, None, None boxes = np.concatenate(nms_boxes) categories = np.concatenate(nms_categories) confidences = np.concatenate(nscores) return boxes, categories, confidences def _process_feats(self, output_reshaped, mask): """Take in a reshaped YOLO output in height,width,3,85 format together with its corresponding YOLO mask and return the detected bounding boxes, the confidence, and the class probability in each cell/pixel. Keyword arguments: output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85) mask -- 2-dimensional tuple with mask specification for this output """ # Two in-line functions required for calculating the bounding box # descriptors: def sigmoid(value): """Return the sigmoid of the input.""" return 1.0 / (1.0 + math.exp(-value)) def exponential(value): """Return the exponential of the input.""" return math.exp(value) # Vectorized calculation of above two functions: sigmoid_v = np.vectorize(sigmoid) exponential_v = np.vectorize(exponential) grid_h, grid_w, _, _ = output_reshaped.shape anchors = [self.anchors[i] for i in mask] # Reshape to N, height, width, num_anchors, box_params: anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2]) box_xy = sigmoid_v(output_reshaped[..., :2]) box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor box_confidence = sigmoid_v(output_reshaped[..., 4]) box_confidence = np.expand_dims(box_confidence, axis=-1) box_class_probs = sigmoid_v(output_reshaped[..., 5:]) col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w) row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h) col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) grid = np.concatenate((col, row), axis=-1) box_xy += grid box_xy /= (grid_w, grid_h) box_wh /= self.input_resolution_yolo box_xy -= (box_wh / 2.) boxes = np.concatenate((box_xy, box_wh), axis=-1) # boxes: centroids, box_confidence: confidence level, box_class_probs: # class confidence return boxes, box_confidence, box_class_probs def _filter_boxes(self, boxes, box_confidences, box_class_probs): """Take in the unfiltered bounding box descriptors and discard each cell whose score is lower than the object threshold set during class initialization. Keyword arguments: boxes -- bounding box coordinates with shape (height,width,3,4); 4 for x,y,height,width coordinates of the boxes box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as confidence scalar per element box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM) """ box_scores = box_confidences * box_class_probs box_classes = np.argmax(box_scores, axis=-1) box_class_scores = np.max(box_scores, axis=-1) pos = np.where(box_class_scores >= self.object_threshold) boxes = boxes[pos] classes = box_classes[pos] scores = box_class_scores[pos] return boxes, classes, scores def _nms_boxes(self, boxes, box_confidences): """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their confidence scores and return an array with the indexes of the bounding boxes we want to keep (and display later). Keyword arguments: boxes -- a NumPy array containing N bounding-box coordinates that survived filtering, with shape (N,4); 4 for x,y,height,width coordinates of the boxes box_confidences -- a Numpy array containing the corresponding confidences with shape N """ x_coord = boxes[:, 0] y_coord = boxes[:, 1] width = boxes[:, 2] height = boxes[:, 3] areas = width * height ordered = box_confidences.argsort()[::-1] keep = list() while ordered.size > 0: # Index of the current element: i = ordered[0] keep.append(i) xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]]) yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]]) xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]]) yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]]) width1 = np.maximum(0.0, xx2 - xx1 + 1) height1 = np.maximum(0.0, yy2 - yy1 + 1) intersection = width1 * height1 union = (areas[i] + areas[ordered[1:]] - intersection) # Compute the Intersection over Union (IoU) score: iou = intersection / union # The goal of the NMS algorithm is to reduce the number of adjacent bounding-box # candidates to a minimum. In this step, we keep only those elements whose overlap # with the current bounding box is lower than the threshold: indexes = np.where(iou <= self.nms_threshold)[0] ordered = ordered[indexes + 1] keep = np.array(keep) return keep