5、pc端使用C++調用ncnn

因爲yolov5轉ncnn不包括後處理部分，所以在c++的代碼裏須要重構整個後處理部分，很少說，直接上代碼了：python

cmake_minimum_required(VERSION 3.17)
project(yolov5s)

find_package(OpenCV REQUIRED core highgui imgproc)

#這裏連接的時編譯後的ncnn的include和lib文件夾，根據本身的路徑去更改
include_directories(./CLion-2020.2/clion-2020.2/ncnn-master/build/install/include/ncnn)  
link_directories(./CLion-2020.2/clion-2020.2/ncnn-master/build/install/lib)

find_package(ncnn)
FIND_PACKAGE( OpenMP REQUIRED)
if (OPENMP_FOUND)
    message("OPENMAP FOUND")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()

project(yolov5s)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Debug)

add_executable(yolov5s yolov5.cpp)

target_link_libraries(yolov5s ncnn ${ OpenCV_LIBS})

這部分代碼時cmakelist裏的，你們能夠參考着去配置cmake！！ios

接下來是頭文件.h的代碼：c++

#ifndef YOLO_V5_H
#define YOLO_V5_H

#include "net.h"
#include <opencv2/opencv.hpp>
#include <math.h>
#include <chrono>
#include <memory>

struct YoloSize{     //定義結構體YoloSize
    int width;
    int height;
};

struct YoloLayerData{        //定義結構體YoloLayerData
    std::string name;       //構造一個默認爲空的string name，使用標準函數庫中的函數或對象須要使用std進行限定，即這裏
    int stride;             //調用的是std中的string，防止本身定義的類與標準庫中的函數衝突
    std::vector<YoloSize> anchors;  //建立一個容器，放入相似於數組的序列
};

struct BoxInfo {     //定義結構體BoxInfo
    float x1;
    float y1;
    float x2;
    float y2;
    float score;
    int label;
};

class Yolov5Detector {       //定義類
public:                     //公共行爲屬性
    Yolov5Detector(const char* param, const char* bin);     //Yolov5Detector函數
    ~Yolov5Detector();      //析構函數，用於釋放內存，不帶任何參數，沒有返回值，應與類名相同
    std::vector<BoxInfo> Detect(cv::Mat image, float threshold, float nms_threshold=0.4);   //將detect的結果放入boxinfo容器中

private:                    //私有行爲屬性
    static std::vector<BoxInfo> DecodeInfer(ncnn::Mat &data, int stride,const YoloSize& frame_size, int net_size,int num_classes,const std::vector<YoloSize> &anchors, float threshold);     //解碼結果
    static void Nms(std::vector<BoxInfo>& result,float nms_threshold);      //static數據能夠時private成員，全局變量不行

private:
    std::vector<std::string> labels_{ "person", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair",
                                     "cow", "diningtable", "dog", "horse", "motorbike", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};

    int input_size_ = 640;
    int num_class_ = 20;
    int num_thread_ = 1;
    bool hasGPU_ = false;

    std::vector<YoloLayerData> layers{ 
        { "392",32,{ { 116,90},{ 156,198},{ 373,326}}},
        { "373",16,{ { 30,61},{ 62,45},{ 59,119}}},
        { "output",8,{ { 10,13},{ 16,30},{ 33,23}}},
    };

    ncnn::Net* Net_;
    static Yolov5Detector *detector_;
};


#endif //YOLO_V5_H

這一個文件是一個.h文件，主要是定義一些結構體和類，供cpp裏調用，這裏面須要注意幾個點，以下圖：
這裏的input_size要設置爲640，即onnx模型輸入的兩倍，具體爲啥不清楚，反正設置爲320後了效果會有誤差！這裏這三個輸出層的名稱，每一個人轉出來都不太同樣，須要本身的狀況去改，這三個名字怎麼找，在ncnn的官方git社區裏有一個netron的可視化工具能夠直接看轉換後onnx模型的結構，裏面能夠找到這三個輸出層的名稱，傳送門：netron可視化網絡結構git

接下來是cpp的代碼：github

#include "yolov5.h"
#include <fstream>

Yolov5Detector::Yolov5Detector(const char *param, const char* bin) { 
    Net_ = new ncnn::Net();
    ncnn::Option opt;
    opt.use_winograd_convolution = true;
    opt.use_packing_layout = false;
    opt.lightmode = true;
    Net_->opt = opt;
    int ret = Net_->load_param(param);
    std::cout << ret << std::endl;
    ret = Net_->load_model(bin);
    std::cout << ret << std::endl;
}


Yolov5Detector::~Yolov5Detector() { 
    delete Net_;
}


std::vector<BoxInfo> Yolov5Detector::Detect(cv::Mat img, float threshold, float nms_threshold) {         //detect的值傳給BoxInfo，定義命名空間Yolov5Detector類下的函數Detect

// cv::Mat input_mat;
// //TODO padding
// float im_scale = (float)img.cols/input_size_;
// std::cout << "scale" << im_scale << std::endl;
// int new_w = int(img.cols/im_scale);
// int new_h = int(img.rows/im_scale);
// cv::resize(img,input_mat,cv::Size(new_w,new_h)); //resize
// int p_w = input_size_ - new_w;
// int p_h = input_size_ - new_h;
// //擴充圖像邊緣
// cv::copyMakeBorder(input_mat, input_mat, //原圖像與擴充後的圖像
// 0, p_h, //表示在圖像四周擴充邊緣的大小，top,bottom,left,right
// 0, p_w,
// cv::BORDER_CONSTANT, //以常數值擴充邊界
// cv::Scalar(114, 114, 114)); //常量擴充的數值
// 
/*****************************************yolov5後處理中的自適應縮放圖片*************************************************************/
    cv::Mat input_mat;
    //TODO padding
    float col_scale = input_size_/(float)img.cols;
    float row_scale = input_size_/(float)img.rows;
    float im_scale;
    if (col_scale < row_scale)
    { 
        im_scale = col_scale;
    } else
    { 
        im_scale = row_scale;
    }
    std::cout << "scale" << im_scale << std::endl;
    int new_w = int(img.cols * im_scale);
    int new_h = int(img.rows * im_scale);
    cv::resize(img,input_mat,cv::Size(new_w,new_h));    //resize
    int dw = input_size_ - new_w;
    int dh = input_size_ - new_h;
    int p_w = dw%64/2;
    int p_h = dh%64/2;
    int top = (int)std::round(p_h-0.1) ;
    int bottom = (int)std::round(p_h+0.1);
    int left = (int)std::round(p_w-0.1);
    int right = (int)std::round(p_w+0.1);
    //擴充圖像邊緣
    cv::copyMakeBorder(input_mat, input_mat,        //原圖像與擴充後的圖像
                       top, bottom,                 //表示在圖像四周擴充邊緣的大小，top,bottom,left,right
                       left, right,
                       cv::BORDER_CONSTANT,     //以常數值擴充邊界
                       cv::Scalar(114, 114, 114));      //常量擴充的數值
    //cv::imshow("img1",input_mat);
    //cv::waitKey(0);
/************************************************************************************************××××××******/

    cv::resize(input_mat, input_mat, cv::Size(input_size_/2, input_size_/2));     //把圖片resize到320
// cv::imshow("img2",input_mat);
// cv::waitKey(0);
    ncnn::Mat in_net = ncnn::Mat::from_pixels(input_mat.data, ncnn::Mat::PIXEL_BGR2RGB, input_mat.cols, input_mat.rows);    //定義輸入，將BGR轉換爲RGB
    float norm[3] = { 1/255.f,1/255.f,1/255.f};      //方差
    float mean[3] = { 0,0,0};        //均值
    in_net.substract_mean_normalize(mean,norm);     //正則化
    auto ex = Net_->create_extractor();     //提取網絡結構和參數
    ex.set_light_mode(true);
    ex.set_num_threads(4);
// ex.set_vulkan_compute(hasGPU);
    ex.input(0, in_net);    //輸入
    std::vector<BoxInfo> result;      //定義結果
    for(const auto& layer: layers){ 
        ncnn::Mat blob;
        auto t0 = std::chrono::high_resolution_clock::now();    //計算推理時間
        ex.extract(layer.name.c_str(),blob);        //提取網絡輸出層結果
        auto t1 = std::chrono::high_resolution_clock::now();
        auto boxes = DecodeInfer(blob,layer.stride,{ (int)img.cols,(int)img.rows},input_size_,num_class_,layer.anchors,threshold);   //聲明解碼函數
        auto t2 = std::chrono::high_resolution_clock::now();
        std::cout << "time1: " << std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() / 1000.0 << "ms." << std::endl;
        std::cout << "time2: " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() / 1000.0 << "ms." << std::endl;
        result.insert(result.begin(),boxes.begin(),boxes.end());        //在result的首部插入整個boxes數組
    }
    Nms(result,nms_threshold);      //進行NMS
    return result;
}


inline float Sigmoid(float x){       //內聯函數，用於加強性能，可是隻適用於簡單短小的函數
    return 1.0f / (1.0f + std::exp(-x));
}

//定義解碼函數，位於命名空間Yolov5Detector類下
std::vector<BoxInfo> Yolov5Detector::DecodeInfer(ncnn::Mat &data, int stride, const YoloSize &frame_size, int net_size, int num_classes,const std::vector<YoloSize> &anchors, float threshold) { 
    std::vector<BoxInfo> result;
    int grid_size = int(sqrt(data.h));      //data.h是什麼意思？data(dims=3, w=25, h=400, c=3)
    float *mat_data[data.c];                //data.c是什麼意思？
    for(int i=0;i<data.c;i++){ 
        mat_data[i] = data.channel(i);      //
    }
    float cx,cy,w,h;
    for(int shift_y=0;shift_y<grid_size;shift_y++){ 
        for(int shift_x=0;shift_x<grid_size;shift_x++){ 
            //int loc = shift_x+shift_y*grid_size;
            for(int i=0;i<3;i++) { 
                float *record = mat_data[i];
                float *cls_ptr = record + 5;
                for(int cls = 0; cls<num_classes;cls++){ 
                    float score = Sigmoid(cls_ptr[cls]) * Sigmoid(record[4]);
                    if(score>threshold){ 
                        cx = (Sigmoid(record[0]) * 2.f - 0.5f + (float)shift_x) * (float) stride;   //中心點橫座標
                        cy = (Sigmoid(record[1]) * 2.f - 0.5f + (float)shift_y) * (float) stride;   //中心點縱座標
                        w = pow(Sigmoid(record[2]) * 2.f,2)*anchors[i].width;                 //box的寬
                        h = pow(Sigmoid(record[3]) * 2.f,2)*anchors[i].height;                //box的高
                        //std::cout << cx << " "<<cy<<" "<<w<<" "<<h<<std::endl;
                        //printf("[grid size=%d, stride = %d]x y w h %f %f %f %f\n",grid_size,stride,record[0],record[1],record[2],record[3]);
                        BoxInfo box;
                        box.x1 = std::max(0,std::min(frame_size.width,int((cx - w / 2.f) * (float)frame_size.width / (float)net_size)));    //左上角座標
                        box.y1 = std::max(0,std::min(frame_size.height,int((cy - h / 2.f) * (float)frame_size.height / (float)net_size)));
                        box.x2 = std::max(0,std::min(frame_size.width,int((cx + w / 2.f) * (float)frame_size.width / (float)net_size)));    //右下角座標
                        box.y2 = std::max(0,std::min(frame_size.height,int((cy + h / 2.f) * (float)frame_size.height / (float)net_size)));
                        box.score = score;
                        box.label = cls;
                        result.push_back(box);
                    }
                }
            }
            for(auto& ptr:mat_data){ 
                ptr+=(num_classes + 5);
            }
        }
    }
    return result;
}


void Yolov5Detector::Nms(std::vector<BoxInfo> &input_boxes, float nms_thresh) { 
    std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b){ return a.score > b.score;});
    std::vector<float>vArea(input_boxes.size());    //定義box面積容器
    for (int i = 0; i < int(input_boxes.size()); ++i)   //遍歷全部的box
    { 
        vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)        //計算面積(x2-x1)×(y2-y1)
                   * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
    }
    for (int i = 0; i < int(input_boxes.size()); ++i)   //遍歷全部的面積
    { 
        for (int j = i + 1; j < int(input_boxes.size());)
        { 
            float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);     //求解兩個box相交處的左上角座標
            float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
            float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);     //求解兩個box相交處的右下角座標
            float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
            float w = std::max(float(0), xx2 - xx1 + 1);        //計算相交矩形的寬
            float h = std::max(float(0), yy2 - yy1 + 1);        //計算相交矩形的高
            float inter = w * h;                                //計算相交矩形的面積
            float ovr = inter / (vArea[i] + vArea[j] - inter);      //overlap
            if (ovr >= nms_thresh)
            { 
                input_boxes.erase(input_boxes.begin() + j);     //若overlap大於閾值，刪除該box
                vArea.erase(vArea.begin() + j);                 //若overlap大於閾值，刪除該面積
            }
            else
            { 
                j++;
            }
        }
    }
}

int main()
{ 
    //cv::VideoCapture cap;
    const char *yolov5_param = "/home/zhangyi/CLionProjects/yolov5s/last_500_320.param";
    const char *yolov5_bin = "/home/zhangyi/CLionProjects/yolov5s/last_500_320.bin";
    float nms_threshold = 0.4;
    float threshold = 0.3;
    Yolov5Detector ret(yolov5_param, yolov5_bin);

/*****************************************將預測結果寫入txt中*************************************************************/
// std::vector<cv::String> filename;
// cv::String folder = "/home/zhangyi/CLionProjects/yolov5s/img_val";
// cv::glob(folder, filename);
// std::ofstream out("out.txt");
// for(size_t i=0; i<filename.size(); ++i)
// { 
// std::cout << filename[i] << std::endl;
// cv::Mat frame = cv::imread(filename[i]);
// std::vector<BoxInfo> result = ret.Detect(frame, threshold, nms_threshold);
//
// std::ofstream write("out.txt", std::ios::app);
// out << filename[i] << " ";
// for(int j=0; j<result.size(); ++j)
// { 
// const auto obj = result[j];
// //std::ofstream write("out.txt", std::ios::app);
// std::cout << obj.x1 << "," << obj.y1 << "," << obj.x2 << "," << obj.y2 << "," << obj.label << "\n";
// out << obj.score << "," << obj.x1 << "," << obj.y1 << "," << obj.x2 << "," << obj.y2 << "," << obj.label << " ";
// }
// //std::ofstream write("out.txt", std::ios::app);
// out << "\n";
// }
/*****************************************************************************************************/

    cv::Mat frame;
    const char *img_path = "/home/zhangyi/CLionProjects/yolov5s/img_val/2008_000243.jpg";
    std::cout << img_path << std::endl;
    cv::Mat img = cv::imread(img_path, 1);
    frame = cv::imread(img_path, 1);

    std::vector<BoxInfo> result = ret.Detect(frame, threshold, nms_threshold);


    /*畫圖部分*/
    for(int i=0; i<result.size(); i++)
    { 
        const auto obj = result[i];
        fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.score,
                obj.x1, obj.y1, obj.x2, obj.y2);
        cv::rectangle(frame, cvPoint(obj.x1, obj.y1), cvPoint(obj.x2, obj.y2), cv::Scalar(255, 0, 0));

        char text[256];
        static const char* labels[] = { "person", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair",
                "cow", "diningtable", "dog", "horse", "motorbike", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};

        sprintf(text, "%s %.1f%%", labels[obj.label], obj.score * 100);

        int baseLine = 0;
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

        int x = obj.x1;
        int y = obj.y1 - label_size.height - baseLine;
        if (y < 0)
            y = 0;
        if (x + label_size.width > frame.cols)
            x = frame.cols - label_size.width;

        cv::rectangle(frame, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
                      cv::Scalar(255, 255, 255), -1);

        cv::putText(frame, text, cv::Point(x, y + label_size.height),
                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
    }
    cv::imshow("image", frame);
    cv::waitKey(0);

    return 0;
}

這裏面也有個地方須要注意，以下：
由於.h文件裏設置輸入size爲640，而轉出的ncnn模型接受輸入爲320，因此這裏須要除以2！
其中這塊的代碼是yolov5中後處理中加入的自適應圖片縮放機制，具yolov5的paper說這個操做能夠提速30%，在這裏也會對ncnn模型的精度有影響，加了這個能提升map2.5個點！數組

這一部分代碼是對測試集進行測試，將結果寫入一個txt文本中，而後去測試map，由於map計算以python版本居多，我嫌麻煩，沒有複寫c++版的，有興趣能夠直接寫一個c++版的測試map代碼，直接在c++裏測試便可！網絡

這部分想要測試map的時候把這部分註釋打開，而後這部分如下的代碼註釋掉就ok！app

6、ncnn的map測試

對於轉ncnn後測試map指標不是一個很好的選擇，我只是想與以前的pt模型作一下對比，才搞了這麼一個測試，不少測試指標都是看預測的tensor與原tensor之間查了幾位小數來作測試指標！ide

上一節已經將測試集的測試結果保存爲txt文本了，這時將結果拿到pycharm中進行map測試！函數

因爲我使用的是voc數據集，voc計算map的開源代碼有不少，搜一搜就有，裏面須要將某一類的結果先提取出來保存爲單獨一個txt文本，因此最後一共有20個txt文本，分別時20類目標的預測結果，下面是這一步驟的代碼：

import os

def file_name(file_dir):
    with open("val.txt", 'w') as f:
        for root, dirs, files in os.walk(file_dir):
            for file in files:
                img_name = file.split(".")[0]
                f.write(img_name)
                f.write("\n")

def cls_pred_file(pred_file):
    with open(pred_file) as f:
        lines = f.readlines()
        classes_name = ["person", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair",
                     "cow", "diningtable", "dog", "horse", "motorbike", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
        for cls in classes_name:
            with open("./datasets/score/pred_out/%s.txt"%cls, 'w') as F:
                print("Writing %s.txt"%cls)
                for line in lines:
                    img_name = line.strip().split(" ")[0]
                    objects = line.strip().split(" ")[1:]
                    for i in range(len(objects)):
                        score = objects[i].split(",")[0]
                        x1 = objects[i].split(",")[1]
                        y1 = objects[i].split(",")[2]
                        x2 = objects[i].split(",")[3]
                        y2 = objects[i].split(",")[4]
                        label = int(objects[i].split(",")[5])
                        if classes_name[label] == cls:
                            F.write(img_name + " " + score + " " + x1 + " " + y1 + " " + x2 + " " + y2)
                            F.write("\n")
            print("%s.txt is done!"%cls)

if __name__ == "__main__":
    #file_name("./datasets/score/labels/val")
    cls_pred_file("./out.txt")

這裏就生成了20類目標的預測結果，接下來就是測試map的過程了，代碼以下：

# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------

import xml.etree.ElementTree as ET
import os
import _pickle as cPickle
import numpy as np

def parse_rec(filename):  # 經過ET解析xml後返回一個obj
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    # 解析xml文件，將GT框信息放入一個列表
    for obj in tree.findall('object'):
        obj_struct = { }
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)
    return objects


def voc_ap(rec, prec, use_07_metric=False):
    """ ap = voc_ap(rec, prec, [use_07_metric]) Compute VOC AP given precision and recall. If use_07_metric is true, uses the VOC 07 11 point method (default:False). """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap


def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.2,
             use_07_metric=False):
    """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name 默認txt中是無後綴imgName
    # cachedir caches the annotations in a pickle file

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)  # 若無pkl文件的路徑，生成cachedir路徑
    cachefile = os.path.join(cachedir, 'annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]  # imagenames爲全部imgName的list

    if not os.path.isfile(cachefile):  # cache路徑下無pkl
        # load annots
        recs = { }  # recs是一個dict，以imagename爲key，解析xml後的obj爲value，詳情見下兩句
        for i, imagename in enumerate(imagenames):
            # imagename = imagename.split(' ')[0]
            recs[imagename] = parse_rec(annopath.format(imagename))  # 依次寫入format上imagename的xml路徑到resc列表
            if i % 100 == 0:
                print('Reading annotation for {:d}/{:d}'.format(i + 1, len(imagenames)))  # 顯示進程
        # save
        print('Saving cached annotations to {:s}'.format(cachefile))
        with open(cachefile, 'wb') as f:
            cPickle.dump(recs, f)  # 將resc列表中的內容寫入pkl
    else:
        # load
        with open(cachefile, 'rb') as f:
            recs = cPickle.load(f)  # 若存在pkl，直接load到recs

    # extract gt objects for this class
    class_recs = { }
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname]  # 除去recs中其餘類別
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = { 'bbox': bbox,
                                 'difficult': difficult,
                                 'det': det}

    # read dets
    detfile = detpath.format(classname)
    with open(detfile, 'rb') as f:  # 讀批量驗證的結果txt文件
        lines = f.readlines()

    splitlines = [x.decode().strip().split(' ') for x in lines]  # split對txt每一行的數據作分割
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs 如下爲計算對比各參數
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap

if __name__ == "__main__":
    results_path = "./datasets/score/detection"
    cls_result = os.listdir(results_path)

    AP = []
    for i in range(len(cls_result)):
        class_name = cls_result[i].split(".txt")[0]
        rec, prec, ap = voc_eval("./datasets/score/pred_out/{}.txt",
                                 "./datasets/score/Annotations/{}.xml",
                                 "./val.txt",
                                 class_name,
                                 '.')
        print("{} :\t {}".format(class_name, ap))
        AP.append(ap)
    #map = sum(AP)/len(AP)
    map = tuple(AP)
    print("***************************")
    print("mAP :\t {}".format(float(sum(map) / len(map))))

最終的測試結果，ncnn的map在46.8，原torch模型的精度在(一)裏面已經說過了，爲51.5，轉爲ncnn後map掉了4.7個點，這個偏差仍是蠻大的，可是看了不少大佬的結果，意思好像是掉不了這麼多點，裏面的緣由就不得而知了！多是本人後處理部分寫的有問題，也多是模型訓練的時候迭代次數不夠，這個torch模型從頭只迭代了500個epoch，具體什麼緣由不知道，有知道的大佬，歡迎指點迷津啊！

最後放上ncnn的測試結果圖：
這與torch模型測試得是同一張圖，你們能夠對比一下！

好了，到這裏yolov5轉ncnn就結束了，但願對你們能有幫助，畢竟裏面的坑太多！！！！

YOLOv5利用ncnn部署系列(三)

5、pc端使用C++調用ncnn

6、ncnn的map測試