TensorRT&Sample&Python[introductory_parser_samples]

時間 2019-11-06

標籤 tensorrt&sample&python tensorrt sample python introductory parser samples 欄目 Python 简体版

原文原文鏈接

本文是基於TensorRT 5.0.2基礎上，關於其內部的introductory_parser_samples例子的分析和介紹。python

1 引言

假設當前路徑爲：數組

TensorRT-5.0.2.6/samples

其對應當前例子文件目錄樹爲：網絡

# tree python

python/
├── common.py
├── introductory_parser_samples
│   ├── caffe_resnet50.py
│   ├── onnx_resnet50.py
│   ├── README.md
│   ├── requirements.txt
│   └── uff_resnet50.py

該例子展現如何使用TensorRT和包含的對應解析器（UFF，Caffe，ONNX解析器），基於在不一樣框架下訓練的ResNet-50結構來進行inference。app

caffe_resnet50: 該例子展現如何構建基於Caffe解析器去解析Caffe訓練的模型，並構建引擎而後進行inference；

onnx_resnet50:該例子展現如何基於開源的ONNX解析ONNX模型，並inference；

uff_resnet50: 該例子展現如何從一個UFF模型文件（從一個tf protobuf轉換過來）構建引擎，而後inference。

2 caffe_resnet50

所須要的文件內容包含：框架

/TensorRT-5.0.2.6/python/data/resnet50/
├── binoculars-cc0.jpeg
├── binoculars.jpeg
├── canon-cc0.jpeg
├── class_labels.txt
├── mug-cc0.jpeg
├── reflex_camera.jpeg
├── ResNet50_fp32.caffemodel
├── resnet50-infer-5.uff
├── ResNet50_N2.prototxt
├── ResNet50.onnx
└── tabby_tiger_cat.jpg

先上完整代碼，從main函數開始，逐個調用外部的參數完成整個流程，整個代碼仍是挺簡單的：dom

# 該例子使用Caffe ResNet50 模型去建立一個TensorRT Inference Engine
import random
import argparse
from collections import namedtuple
from PIL import Image
import numpy as np

import pycuda.driver as cuda
import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的建立和清理工做

import tensorrt as trt

import sys, os

# sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 這裏將common中的GiB和find_sample_data函數移動到該py文件中，保證自包含。
def GiB(val):
    '''以GB爲單位，計算所須要的存儲值，向左位移10bit表示KB，20bit表示MB '''
    return val * 1 << 30

def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
    '''該函數就是一個參數解析函數。
    Parses sample arguments.
    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
    Returns:
        str: Path of data directory.
    Raises:
        FileNotFoundError
    '''
    # 爲了簡潔，這裏直接將路徑硬編碼到代碼中。
    data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")

    subfolder_path = os.path.join(data_root, subfolder)
    if not os.path.exists(subfolder_path):
        print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
    data_path = subfolder_path if os.path.exists(subfolder_path) else data_root

    if not (os.path.exists(data_path)):
        raise FileNotFoundError(data_path + " does not exist.")

    for index, f in enumerate(find_files):
        find_files[index] = os.path.abspath(os.path.join(data_path, f))
        if not os.path.exists(find_files[index]):
            raise FileNotFoundError(find_files[index] + " does not exist. ")

    if find_files:
        return data_path, find_files
    else:
        return data_path
#-----------------

_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'DEPLOY_PATH', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50_fp32.caffemodel",
                       DEPLOY_PATH = "ResNet50_N2.prototxt",
                       INPUT_SHAPE = (3, 224, 224),
                       OUTPUT_NAME = "prob",
                       DTYPE = trt.float32  ) # 能夠將TensorRT數據類型用trt.nptype()轉換到numpy類型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


'''main中第二步：構建一個tensorRT engine '''
# The Caffe path is used for Caffe2 models.
def build_engine_caffe(model_file, deploy_file):

    with trt.Builder(TRT_LOGGER) as builder,  \
           builder.create_network() as network,  \
           trt.CaffeParser() as parser:

        # Workspace size是builder在構建engine時候最大可使用的內存大小，其越高越好
        builder.max_workspace_size = GiB(1)

        # 載入caffe模型，而後進行解析,並填充TensorRT的network。該函數返回一個對象，其能夠經過name進行檢索tensors
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)

        # 對於caffe，須要手動標記網絡的輸出；由於咱們本來就該知道輸出tensor的name，因此能夠直接找到
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))

        return builder.build_cuda_engine(network)


'''main中第三步：分配host和device端的buffers，而後建立一個流 '''
def allocate_buffers(engine):

    # 設定維度，而後在host端內存建立page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))

    # 爲輸入和輸出分配device端內存.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)

    # 建立一個流來copy輸入/輸出，並用於執行inference。
    stream = cuda.Stream()

    return h_input, d_input, h_output, d_output, stream


'''main中第四步：讀取測試樣本，並歸一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):

    # 將輸入圖像變換成CHW Numpy數組
    def normalize_image(image):
        c, h, w = ModelData.INPUT_SHAPE
        return np.asarray(image.resize((w, h), 
                                Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()

    # 歸一化該圖片，而後copy到內存設定的pagelocked buffer區域.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))

    return test_image


'''main中第五步：執行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):

    # 將輸入數據移動到GPU的device端
    cuda.memcpy_htod_async(d_input, h_input, stream)

    # 執行inference
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)

    # 將結果從device端移動到host端
    cuda.memcpy_dtoh_async(h_output, d_output, stream)
    # 同步流操做
    stream.synchronize()


def main():

    ''' 1 - 讀取模型文件，測試樣本等等 '''
    data_path, data_files = find_sample_data(
                                            description="Runs a ResNet50 network with a TensorRT inference engine.", 
                                            subfolder="resnet50", 
                                            find_files=["binoculars.jpeg", 
                                                "reflex_camera.jpeg", 
                                                "tabby_tiger_cat.jpg", 
                                                ModelData.MODEL_PATH, 
                                                ModelData.DEPLOY_PATH, 
                                               "class_labels.txt"])
    test_images = data_files[0:3]  # 三張測試圖片
    caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] # caffe的模型文件，部署文件和標籤文件
    labels = open(labels_file, 'r').read().split('\n') # 讀取標籤

    ''' 2 - 用build_engine_caffe函數構建一個TensorRT engine. '''
    with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine:
        # Inference不論用哪一個parser構建engine都是這個流程

        ''' 3 - 分配buffer和建立一個CUDA流. '''
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)

        ''' 4 - 下面的context用於執行inference '''
        with engine.create_execution_context() as context:

            ''' 選擇測試樣本，而後進行歸一化，並塞入host端的page-locked buffer '''
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)

            # 運行該engine。輸出是一個1000的向量，每一個值表示分到該類的機率。
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            # 提取最高几率的元素，並將其索引映射到對應的label上
            pred = labels[np.argmax(h_output)]

            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)

if __name__ == '__main__':
    main()

3 onnx_resnet50

從下面的代碼和上面例子代碼進行對比，發現仍是相對一致的流程，就是其中個別函數有所不一樣。async

# # 該例子使用ONNX ResNet50 模型去建立一個TensorRT Inference Engine
import random
from PIL import Image
from collections import namedtuple
import numpy as np

import pycuda.driver as cuda
import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的建立和清理工做

import tensorrt as trt

import sys, os

# import common
# 這裏將common中的GiB和find_sample_data函數移動到該py文件中，保證自包含。
def GiB(val):
    '''以GB爲單位，計算所須要的存儲值，向左位移10bit表示KB，20bit表示MB '''
    return val * 1 << 30

def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
    '''該函數就是一個參數解析函數。
    Parses sample arguments.
    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
    Returns:
        str: Path of data directory.
    Raises:
        FileNotFoundError
    '''
    # 爲了簡潔，這裏直接將路徑硬編碼到代碼中。
    data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")

    subfolder_path = os.path.join(data_root, subfolder)
    if not os.path.exists(subfolder_path):
        print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
    data_path = subfolder_path if os.path.exists(subfolder_path) else data_root

    if not (os.path.exists(data_path)):
        raise FileNotFoundError(data_path + " does not exist.")

    for index, f in enumerate(find_files):
        find_files[index] = os.path.abspath(os.path.join(data_path, f))
        if not os.path.exists(find_files[index]):
            raise FileNotFoundError(find_files[index] + " does not exist. ")

    if find_files:
        return data_path, find_files
    else:
        return data_path
#-----------------

_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_SHAPE',  'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50.onnx",
                       INPUT_SHAPE = (3, 224, 224),
                       DTYPE = trt.float32  ) # 能夠將TensorRT數據類型用trt.nptype()轉換到numpy類型

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


'''main中第二步：構建一個tensorRT engine '''
# The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):

    with trt.Builder(TRT_LOGGER) as builder, \
           builder.create_network() as network, \
           trt.OnnxParser(network, TRT_LOGGER) as parser:

        # Workspace size是builder在構建engine時候最大可使用的內存大小，其越高越好
        builder.max_workspace_size = GiB(1)

        ''' 載入caffe模型，而後進行解析,並填充TensorRT的network'''
        with open(model_file, 'rb') as model:
            parser.parse(model.read())

        return builder.build_cuda_engine(network)


'''main中第三步：分配host和device端的buffers，而後建立一個流 '''
def allocate_buffers(engine):

    # 設定維度，而後在host端內存建立page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))

    # 爲輸入和輸出分配device端內存.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)

    # 建立一個流來copy輸入/輸出，並用於執行inference。
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream


'''main中第四步：讀取測試樣本，並歸一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):

    # 將輸入圖像變換成CHW Numpy數組
    def normalize_image(image):

        '''這個函數與第一個例子略有不一樣 '''
        c, h, w = ModelData.INPUT_SHAPE
        image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
        # 該ResNet 5-須要一些預處理，特別是均值歸一化

        return (image_arr / 255.0 - 0.45) / 0.225

    # 歸一化該圖片，而後copy到內存設定的pagelocked buffer區域.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))

    return test_image


'''main中第五步：執行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):

    # 將輸入數據移動到GPU的device端
    cuda.memcpy_htod_async(d_input, h_input, stream)

    # 執行inference
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)

    # 將結果從device端移動到host端
    cuda.memcpy_dtoh_async(h_output, d_output, stream)

    # 同步流操做
    stream.synchronize()


def main():

    ''' 1 - 讀取模型文件，測試樣本等等 '''
    data_path, data_files = find_sample_data(
                                description="Runs a ResNet50 network with a TensorRT inference engine.", 
                                subfolder="resnet50", 
                                find_files=["binoculars.jpeg", 
                                    "reflex_camera.jpeg", 
                                    "tabby_tiger_cat.jpg", 
                                    ModelData.MODEL_PATH, 
                                    "class_labels.txt"])
    test_images = data_files[0:3] # 三張測試圖片
    onnx_model_file, labels_file = data_files[3:] # ONNX模型文件和標籤文件
    labels = open(labels_file, 'r').read().split('\n') # 讀取標籤

    ''' 2 - 用build_engine_onnx函數構建一個TensorRT engine. '''
    with build_engine_onnx(onnx_model_file) as engine:
        # Inference不論用哪一個parser構建engine都是這個流程，由於這裏都是resnet-50結構

        ''' 3 - 分配buffer和建立一個CUDA流. '''
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)

        ''' 4 - 下面的context用於執行inference '''
        with engine.create_execution_context() as context:

            ''' 選擇測試樣本，而後進行歸一化，並塞入host端的page-locked buffer '''
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)

            # 運行該engine。輸出是一個1000的向量，每一個值表示分到該類的機率。
            do_inference(context, h_input, d_input, h_output, d_output, stream)

            # 提取最高几率的元素，並將其索引映射到對應的label上
            pred = labels[np.argmax(h_output)]

            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)

if __name__ == '__main__':
    main()

4 uff_resnet50

從下面的例子能夠看出，這三個例子流程大體一致，只有個別區域有少量變化。
UFF是TensorRT內部使用的統一框架格式，用於表示優化前的網絡結構圖，能夠將諸如pb等模型格式先轉換成uff格式tensorrt-3-faster-tensorflow-inference函數

# 該例子使用UFF ResNet50 模型去建立一個TensorRT Inference Engine
import random
from PIL import Image
import numpy as np

import pycuda.driver as cuda
import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的建立和清理工做

import tensorrt as trt

import sys, os

#sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 這裏將common中的GiB和find_sample_data函數移動到該py文件中，保證自包含。
def GiB(val):
    '''以GB爲單位，計算所須要的存儲值，向左位移10bit表示KB，20bit表示MB '''
    return val * 1 << 30

def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
    '''該函數就是一個參數解析函數。
    Parses sample arguments.
    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
    Returns:
        str: Path of data directory.
    Raises:
        FileNotFoundError
    '''
    # 爲了簡潔，這裏直接將路徑硬編碼到代碼中。
    data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")

    subfolder_path = os.path.join(data_root, subfolder)
    if not os.path.exists(subfolder_path):
        print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
    data_path = subfolder_path if os.path.exists(subfolder_path) else data_root

    if not (os.path.exists(data_path)):
        raise FileNotFoundError(data_path + " does not exist.")

    for index, f in enumerate(find_files):
        find_files[index] = os.path.abspath(os.path.join(data_path, f))
        if not os.path.exists(find_files[index]):
            raise FileNotFoundError(find_files[index] + " does not exist. ")

    if find_files:
        return data_path, find_files
    else:
        return data_path
#-----------------

class ModelData(object):
    MODEL_PATH = "resnet50-infer-5.uff"
    INPUT_NAME = "input"
    INPUT_SHAPE = (3, 224, 224)
    OUTPUT_NAME = "GPU_0/tower_0/Softmax"
    # We can convert TensorRT data types to numpy types with trt.nptype()
    DTYPE = trt.float32

_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_NAME', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(   
                          MODEL_PATH = "resnet50-infer-5.uff",
                          INPUT_NAME = "input",
                          INPUT_SHAPE = (3, 224, 224),
                          OUTPUT_NAME = "GPU_0/tower_0/Softmax",
                          DTYPE = trt.float32  ) # 能夠將TensorRT數據類型用trt.nptype()轉換到numpy類型

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


'''main中第二步：構建一個tensorRT engine '''
# The UFF path is used for TensorFlow models. You can convert a frozen TensorFlow graph to UFF using the included convert-to-uff utility.
def build_engine_uff(model_file):

    with trt.Builder(TRT_LOGGER) as builder, \
             builder.create_network() as network, \
             trt.UffParser() as parser:

        # Workspace size是builder在構建engine時候最大可使用的內存大小，其越高越好
        builder.max_workspace_size = GiB(1)
        
        ''' 這裏須要手動註冊輸入和輸出節點到UFF'''
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)

        ''' 載入UFF模型，而後進行解析,並填充TensorRT的network'''
        parser.parse(model_file, network)

        return builder.build_cuda_engine(network)


'''main中第三步：分配host和device端的buffers，而後建立一個流 '''
def allocate_buffers(engine):

    # 設定維度，而後在host端內存建立page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))

    # 爲輸入和輸出分配device端內存.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)

    # 建立一個流來copy輸入/輸出，並用於執行inference。
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream


'''main中第四步：讀取測試樣本，並歸一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):

    # 將輸入圖像變換成CHW Numpy數組
    def normalize_image(image):
        c, h, w = ModelData.INPUT_SHAPE
        return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()

    # 歸一化該圖片，而後copy到內存設定的pagelocked buffer區域.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))

    return test_image


'''main中第五步：執行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):

    # 將輸入數據移動到GPU的device端
    cuda.memcpy_htod_async(d_input, h_input, stream)

    # 執行inference
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)

    # 將結果從device端移動到host端
    cuda.memcpy_dtoh_async(h_output, d_output, stream)

    # 同步流操做
    stream.synchronize()


def main():

    ''' 1 - 讀取模型文件，測試樣本等等 '''
    data_path, data_files = find_sample_data(
                                  description="Runs a ResNet50 network with a TensorRT inference engine.", 
                                  subfolder="resnet50", 
                                  find_files=["binoculars.jpeg", 
                                        "reflex_camera.jpeg", 
                                        "tabby_tiger_cat.jpg", 
                                        ModelData.MODEL_PATH, 
                                        "class_labels.txt"])
    test_images = data_files[0:3] # 三張測試圖片
    uff_model_file, labels_file = data_files[3:] # UFF模型文件和標籤文件
    labels = open(labels_file, 'r').read().split('\n') # 讀取標籤

    ''' 2 - 用build_engine_uff函數構建一個TensorRT engine. '''
    with build_engine_uff(uff_model_file) as engine:
        # Inference不論用哪一個parser構建engine都是這個流程，由於這裏都是resnet-50結構

        ''' 3 - 分配buffer和建立一個CUDA流. '''
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)

        ''' 4 - 下面的context用於執行inference '''
        with engine.create_execution_context() as context:

            ''' 選擇測試樣本，而後進行歸一化，並塞入host端的page-locked buffer '''
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)

            # 運行該engine。輸出是一個1000的向量，每一個值表示分到該類的機率。
            do_inference(context, h_input, d_input, h_output, d_output, stream)

            # 提取最高几率的元素，並將其索引映射到對應的label上
            pred = labels[np.argmax(h_output)]

            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)

if __name__ == '__main__':
    main()

.測試

更多相關文章...

相關標籤/搜索

Python

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。