本文是基於TensorRT 5.0.2基礎上,關於其內部的introductory_parser_samples例子的分析和介紹。python
假設當前路徑爲:數組
TensorRT-5.0.2.6/samples
其對應當前例子文件目錄樹爲:網絡
# tree python python/ ├── common.py ├── introductory_parser_samples │ ├── caffe_resnet50.py │ ├── onnx_resnet50.py │ ├── README.md │ ├── requirements.txt │ └── uff_resnet50.py
該例子展現如何使用TensorRT和包含的對應解析器(UFF,Caffe,ONNX解析器),基於在不一樣框架下訓練的ResNet-50結構來進行inference。app
- caffe_resnet50: 該例子展現如何構建基於Caffe解析器去解析Caffe訓練的模型,並構建引擎而後進行inference;
- onnx_resnet50:該例子展現如何基於開源的ONNX解析ONNX模型,並inference;
- uff_resnet50: 該例子展現如何從一個UFF模型文件(從一個tf protobuf轉換過來)構建引擎,而後inference。
所須要的文件內容包含:框架
/TensorRT-5.0.2.6/python/data/resnet50/ ├── binoculars-cc0.jpeg ├── binoculars.jpeg ├── canon-cc0.jpeg ├── class_labels.txt ├── mug-cc0.jpeg ├── reflex_camera.jpeg ├── ResNet50_fp32.caffemodel ├── resnet50-infer-5.uff ├── ResNet50_N2.prototxt ├── ResNet50.onnx └── tabby_tiger_cat.jpg
先上完整代碼,從main函數開始,逐個調用外部的參數完成整個流程,整個代碼仍是挺簡單的:dom
# 該例子使用Caffe ResNet50 模型去建立一個TensorRT Inference Engine import random import argparse from collections import namedtuple from PIL import Image import numpy as np import pycuda.driver as cuda import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的建立和清理工做 import tensorrt as trt import sys, os # sys.path.insert(1, os.path.join(sys.path[0], "..")) # import common # 這裏將common中的GiB和find_sample_data函數移動到該py文件中,保證自包含。 def GiB(val): '''以GB爲單位,計算所須要的存儲值,向左位移10bit表示KB,20bit表示MB ''' return val * 1 << 30 def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): '''該函數就是一個參數解析函數。 Parses sample arguments. Args: description (str): Description of the sample. subfolder (str): The subfolder containing data relevant to this sample find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. Returns: str: Path of data directory. Raises: FileNotFoundError ''' # 爲了簡潔,這裏直接將路徑硬編碼到代碼中。 data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/") subfolder_path = os.path.join(data_root, subfolder) if not os.path.exists(subfolder_path): print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.") data_path = subfolder_path if os.path.exists(subfolder_path) else data_root if not (os.path.exists(data_path)): raise FileNotFoundError(data_path + " does not exist.") for index, f in enumerate(find_files): find_files[index] = os.path.abspath(os.path.join(data_path, f)) if not os.path.exists(find_files[index]): raise FileNotFoundError(find_files[index] + " does not exist. ") if find_files: return data_path, find_files else: return data_path #----------------- _ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'DEPLOY_PATH', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE']) ModelData = _ModelData(MODEL_PATH = "ResNet50_fp32.caffemodel", DEPLOY_PATH = "ResNet50_N2.prototxt", INPUT_SHAPE = (3, 224, 224), OUTPUT_NAME = "prob", DTYPE = trt.float32 ) # 能夠將TensorRT數據類型用trt.nptype()轉換到numpy類型 TRT_LOGGER = trt.Logger(trt.Logger.WARNING) '''main中第二步:構建一個tensorRT engine ''' # The Caffe path is used for Caffe2 models. def build_engine_caffe(model_file, deploy_file): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.CaffeParser() as parser: # Workspace size是builder在構建engine時候最大可使用的內存大小,其越高越好 builder.max_workspace_size = GiB(1) # 載入caffe模型,而後進行解析,並填充TensorRT的network。該函數返回一個對象,其能夠經過name進行檢索tensors model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # 對於caffe,須要手動標記網絡的輸出;由於咱們本來就該知道輸出tensor的name,因此能夠直接找到 network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network) '''main中第三步:分配host和device端的buffers,而後建立一個流 ''' def allocate_buffers(engine): # 設定維度,而後在host端內存建立page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。 h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) # 爲輸入和輸出分配device端內存. d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) # 建立一個流來copy輸入/輸出,並用於執行inference。 stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream '''main中第四步:讀取測試樣本,並歸一化 ''' def load_normalized_test_case(test_image, pagelocked_buffer): # 將輸入圖像變換成CHW Numpy數組 def normalize_image(image): c, h, w = ModelData.INPUT_SHAPE return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # 歸一化該圖片,而後copy到內存設定的pagelocked buffer區域. np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image))) return test_image '''main中第五步:執行inference ''' def do_inference(context, h_input, d_input, h_output, d_output, stream): # 將輸入數據移動到GPU的device端 cuda.memcpy_htod_async(d_input, h_input, stream) # 執行inference context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # 將結果從device端移動到host端 cuda.memcpy_dtoh_async(h_output, d_output, stream) # 同步流操做 stream.synchronize() def main(): ''' 1 - 讀取模型文件,測試樣本等等 ''' data_path, data_files = find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt"]) test_images = data_files[0:3] # 三張測試圖片 caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] # caffe的模型文件,部署文件和標籤文件 labels = open(labels_file, 'r').read().split('\n') # 讀取標籤 ''' 2 - 用build_engine_caffe函數構建一個TensorRT engine. ''' with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine: # Inference不論用哪一個parser構建engine都是這個流程 ''' 3 - 分配buffer和建立一個CUDA流. ''' h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) ''' 4 - 下面的context用於執行inference ''' with engine.create_execution_context() as context: ''' 選擇測試樣本,而後進行歸一化,並塞入host端的page-locked buffer ''' test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # 運行該engine。輸出是一個1000的向量,每一個值表示分到該類的機率。 do_inference(context, h_input, d_input, h_output, d_output, stream) # 提取最高几率的元素,並將其索引映射到對應的label上 pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred) if __name__ == '__main__': main()
從下面的代碼和上面例子代碼進行對比,發現仍是相對一致的流程,就是其中個別函數有所不一樣。async
# # 該例子使用ONNX ResNet50 模型去建立一個TensorRT Inference Engine import random from PIL import Image from collections import namedtuple import numpy as np import pycuda.driver as cuda import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的建立和清理工做 import tensorrt as trt import sys, os # import common # 這裏將common中的GiB和find_sample_data函數移動到該py文件中,保證自包含。 def GiB(val): '''以GB爲單位,計算所須要的存儲值,向左位移10bit表示KB,20bit表示MB ''' return val * 1 << 30 def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): '''該函數就是一個參數解析函數。 Parses sample arguments. Args: description (str): Description of the sample. subfolder (str): The subfolder containing data relevant to this sample find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. Returns: str: Path of data directory. Raises: FileNotFoundError ''' # 爲了簡潔,這裏直接將路徑硬編碼到代碼中。 data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/") subfolder_path = os.path.join(data_root, subfolder) if not os.path.exists(subfolder_path): print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.") data_path = subfolder_path if os.path.exists(subfolder_path) else data_root if not (os.path.exists(data_path)): raise FileNotFoundError(data_path + " does not exist.") for index, f in enumerate(find_files): find_files[index] = os.path.abspath(os.path.join(data_path, f)) if not os.path.exists(find_files[index]): raise FileNotFoundError(find_files[index] + " does not exist. ") if find_files: return data_path, find_files else: return data_path #----------------- _ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_SHAPE', 'DTYPE']) ModelData = _ModelData(MODEL_PATH = "ResNet50.onnx", INPUT_SHAPE = (3, 224, 224), DTYPE = trt.float32 ) # 能夠將TensorRT數據類型用trt.nptype()轉換到numpy類型 TRT_LOGGER = trt.Logger(trt.Logger.WARNING) '''main中第二步:構建一個tensorRT engine ''' # The Onnx path is used for Onnx models. def build_engine_onnx(model_file): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: # Workspace size是builder在構建engine時候最大可使用的內存大小,其越高越好 builder.max_workspace_size = GiB(1) ''' 載入caffe模型,而後進行解析,並填充TensorRT的network''' with open(model_file, 'rb') as model: parser.parse(model.read()) return builder.build_cuda_engine(network) '''main中第三步:分配host和device端的buffers,而後建立一個流 ''' def allocate_buffers(engine): # 設定維度,而後在host端內存建立page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。 h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) # 爲輸入和輸出分配device端內存. d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) # 建立一個流來copy輸入/輸出,並用於執行inference。 stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream '''main中第四步:讀取測試樣本,並歸一化 ''' def load_normalized_test_case(test_image, pagelocked_buffer): # 將輸入圖像變換成CHW Numpy數組 def normalize_image(image): '''這個函數與第一個例子略有不一樣 ''' c, h, w = ModelData.INPUT_SHAPE image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # 該ResNet 5-須要一些預處理,特別是均值歸一化 return (image_arr / 255.0 - 0.45) / 0.225 # 歸一化該圖片,而後copy到內存設定的pagelocked buffer區域. np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image))) return test_image '''main中第五步:執行inference ''' def do_inference(context, h_input, d_input, h_output, d_output, stream): # 將輸入數據移動到GPU的device端 cuda.memcpy_htod_async(d_input, h_input, stream) # 執行inference context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # 將結果從device端移動到host端 cuda.memcpy_dtoh_async(h_output, d_output, stream) # 同步流操做 stream.synchronize() def main(): ''' 1 - 讀取模型文件,測試樣本等等 ''' data_path, data_files = find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) test_images = data_files[0:3] # 三張測試圖片 onnx_model_file, labels_file = data_files[3:] # ONNX模型文件和標籤文件 labels = open(labels_file, 'r').read().split('\n') # 讀取標籤 ''' 2 - 用build_engine_onnx函數構建一個TensorRT engine. ''' with build_engine_onnx(onnx_model_file) as engine: # Inference不論用哪一個parser構建engine都是這個流程,由於這裏都是resnet-50結構 ''' 3 - 分配buffer和建立一個CUDA流. ''' h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) ''' 4 - 下面的context用於執行inference ''' with engine.create_execution_context() as context: ''' 選擇測試樣本,而後進行歸一化,並塞入host端的page-locked buffer ''' test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # 運行該engine。輸出是一個1000的向量,每一個值表示分到該類的機率。 do_inference(context, h_input, d_input, h_output, d_output, stream) # 提取最高几率的元素,並將其索引映射到對應的label上 pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred) if __name__ == '__main__': main()
從下面的例子能夠看出,這三個例子流程大體一致,只有個別區域有少量變化。
UFF是TensorRT內部使用的統一框架格式,用於表示優化前的網絡結構圖,能夠將諸如pb等模型格式先轉換成uff格式tensorrt-3-faster-tensorflow-inference函數
# 該例子使用UFF ResNet50 模型去建立一個TensorRT Inference Engine import random from PIL import Image import numpy as np import pycuda.driver as cuda import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的建立和清理工做 import tensorrt as trt import sys, os #sys.path.insert(1, os.path.join(sys.path[0], "..")) # import common # 這裏將common中的GiB和find_sample_data函數移動到該py文件中,保證自包含。 def GiB(val): '''以GB爲單位,計算所須要的存儲值,向左位移10bit表示KB,20bit表示MB ''' return val * 1 << 30 def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): '''該函數就是一個參數解析函數。 Parses sample arguments. Args: description (str): Description of the sample. subfolder (str): The subfolder containing data relevant to this sample find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. Returns: str: Path of data directory. Raises: FileNotFoundError ''' # 爲了簡潔,這裏直接將路徑硬編碼到代碼中。 data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/") subfolder_path = os.path.join(data_root, subfolder) if not os.path.exists(subfolder_path): print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.") data_path = subfolder_path if os.path.exists(subfolder_path) else data_root if not (os.path.exists(data_path)): raise FileNotFoundError(data_path + " does not exist.") for index, f in enumerate(find_files): find_files[index] = os.path.abspath(os.path.join(data_path, f)) if not os.path.exists(find_files[index]): raise FileNotFoundError(find_files[index] + " does not exist. ") if find_files: return data_path, find_files else: return data_path #----------------- class ModelData(object): MODEL_PATH = "resnet50-infer-5.uff" INPUT_NAME = "input" INPUT_SHAPE = (3, 224, 224) OUTPUT_NAME = "GPU_0/tower_0/Softmax" # We can convert TensorRT data types to numpy types with trt.nptype() DTYPE = trt.float32 _ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_NAME', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE']) ModelData = _ModelData( MODEL_PATH = "resnet50-infer-5.uff", INPUT_NAME = "input", INPUT_SHAPE = (3, 224, 224), OUTPUT_NAME = "GPU_0/tower_0/Softmax", DTYPE = trt.float32 ) # 能夠將TensorRT數據類型用trt.nptype()轉換到numpy類型 TRT_LOGGER = trt.Logger(trt.Logger.WARNING) '''main中第二步:構建一個tensorRT engine ''' # The UFF path is used for TensorFlow models. You can convert a frozen TensorFlow graph to UFF using the included convert-to-uff utility. def build_engine_uff(model_file): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.UffParser() as parser: # Workspace size是builder在構建engine時候最大可使用的內存大小,其越高越好 builder.max_workspace_size = GiB(1) ''' 這裏須要手動註冊輸入和輸出節點到UFF''' parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) ''' 載入UFF模型,而後進行解析,並填充TensorRT的network''' parser.parse(model_file, network) return builder.build_cuda_engine(network) '''main中第三步:分配host和device端的buffers,而後建立一個流 ''' def allocate_buffers(engine): # 設定維度,而後在host端內存建立page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。 h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) # 爲輸入和輸出分配device端內存. d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) # 建立一個流來copy輸入/輸出,並用於執行inference。 stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream '''main中第四步:讀取測試樣本,並歸一化 ''' def load_normalized_test_case(test_image, pagelocked_buffer): # 將輸入圖像變換成CHW Numpy數組 def normalize_image(image): c, h, w = ModelData.INPUT_SHAPE return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # 歸一化該圖片,而後copy到內存設定的pagelocked buffer區域. np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image))) return test_image '''main中第五步:執行inference ''' def do_inference(context, h_input, d_input, h_output, d_output, stream): # 將輸入數據移動到GPU的device端 cuda.memcpy_htod_async(d_input, h_input, stream) # 執行inference context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # 將結果從device端移動到host端 cuda.memcpy_dtoh_async(h_output, d_output, stream) # 同步流操做 stream.synchronize() def main(): ''' 1 - 讀取模型文件,測試樣本等等 ''' data_path, data_files = find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) test_images = data_files[0:3] # 三張測試圖片 uff_model_file, labels_file = data_files[3:] # UFF模型文件和標籤文件 labels = open(labels_file, 'r').read().split('\n') # 讀取標籤 ''' 2 - 用build_engine_uff函數構建一個TensorRT engine. ''' with build_engine_uff(uff_model_file) as engine: # Inference不論用哪一個parser構建engine都是這個流程,由於這裏都是resnet-50結構 ''' 3 - 分配buffer和建立一個CUDA流. ''' h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) ''' 4 - 下面的context用於執行inference ''' with engine.create_execution_context() as context: ''' 選擇測試樣本,而後進行歸一化,並塞入host端的page-locked buffer ''' test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # 運行該engine。輸出是一個1000的向量,每一個值表示分到該類的機率。 do_inference(context, h_input, d_input, h_output, d_output, stream) # 提取最高几率的元素,並將其索引映射到對應的label上 pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred) if __name__ == '__main__': main()
.測試