5、pc端使用C++調用ncnn
因爲yolov5轉ncnn不包括後處理部分,所以在c++的代碼裏須要重構整個後處理部分,很少說,直接上代碼了:python
cmake_minimum_required(VERSION 3.17) project(yolov5s) find_package(OpenCV REQUIRED core highgui imgproc) #這裏連接的時編譯後的ncnn的include和lib文件夾,根據本身的路徑去更改 include_directories(./CLion-2020.2/clion-2020.2/ncnn-master/build/install/include/ncnn) link_directories(./CLion-2020.2/clion-2020.2/ncnn-master/build/install/lib) find_package(ncnn) FIND_PACKAGE( OpenMP REQUIRED) if (OPENMP_FOUND) message("OPENMAP FOUND") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") endif() project(yolov5s) set(CMAKE_CXX_STANDARD 14) set(CMAKE_BUILD_TYPE Debug) add_executable(yolov5s yolov5.cpp) target_link_libraries(yolov5s ncnn ${ OpenCV_LIBS})
這部分代碼時cmakelist裏的,你們能夠參考着去配置cmake!!ios
接下來是頭文件.h的代碼:c++
#ifndef YOLO_V5_H #define YOLO_V5_H #include "net.h" #include <opencv2/opencv.hpp> #include <math.h> #include <chrono> #include <memory> struct YoloSize{ //定義結構體YoloSize int width; int height; }; struct YoloLayerData{ //定義結構體YoloLayerData std::string name; //構造一個默認爲空的string name,使用標準函數庫中的函數或對象須要使用std進行限定,即這裏 int stride; //調用的是std中的string,防止本身定義的類與標準庫中的函數衝突 std::vector<YoloSize> anchors; //建立一個容器,放入相似於數組的序列 }; struct BoxInfo { //定義結構體BoxInfo float x1; float y1; float x2; float y2; float score; int label; }; class Yolov5Detector { //定義類 public: //公共行爲屬性 Yolov5Detector(const char* param, const char* bin); //Yolov5Detector函數 ~Yolov5Detector(); //析構函數,用於釋放內存,不帶任何參數,沒有返回值,應與類名相同 std::vector<BoxInfo> Detect(cv::Mat image, float threshold, float nms_threshold=0.4); //將detect的結果放入boxinfo容器中 private: //私有行爲屬性 static std::vector<BoxInfo> DecodeInfer(ncnn::Mat &data, int stride,const YoloSize& frame_size, int net_size,int num_classes,const std::vector<YoloSize> &anchors, float threshold); //解碼結果 static void Nms(std::vector<BoxInfo>& result,float nms_threshold); //static數據能夠時private成員,全局變量不行 private: std::vector<std::string> labels_{ "person", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; int input_size_ = 640; int num_class_ = 20; int num_thread_ = 1; bool hasGPU_ = false; std::vector<YoloLayerData> layers{ { "392",32,{ { 116,90},{ 156,198},{ 373,326}}}, { "373",16,{ { 30,61},{ 62,45},{ 59,119}}}, { "output",8,{ { 10,13},{ 16,30},{ 33,23}}}, }; ncnn::Net* Net_; static Yolov5Detector *detector_; }; #endif //YOLO_V5_H
這一個文件是一個.h文件,主要是定義一些結構體和類,供cpp裏調用,這裏面須要注意幾個點,以下圖:
這裏的input_size要設置爲640,即onnx模型輸入的兩倍,具體爲啥不清楚,反正設置爲320後了效果會有誤差!這裏這三個輸出層的名稱,每一個人轉出來都不太同樣,須要本身的狀況去改,這三個名字怎麼找,在ncnn的官方git社區裏有一個netron的可視化工具能夠直接看轉換後onnx模型的結構,裏面能夠找到這三個輸出層的名稱,傳送門:netron可視化網絡結構git
接下來是cpp的代碼:github
#include "yolov5.h" #include <fstream> Yolov5Detector::Yolov5Detector(const char *param, const char* bin) { Net_ = new ncnn::Net(); ncnn::Option opt; opt.use_winograd_convolution = true; opt.use_packing_layout = false; opt.lightmode = true; Net_->opt = opt; int ret = Net_->load_param(param); std::cout << ret << std::endl; ret = Net_->load_model(bin); std::cout << ret << std::endl; } Yolov5Detector::~Yolov5Detector() { delete Net_; } std::vector<BoxInfo> Yolov5Detector::Detect(cv::Mat img, float threshold, float nms_threshold) { //detect的值傳給BoxInfo,定義命名空間Yolov5Detector類下的函數Detect // cv::Mat input_mat; // //TODO padding // float im_scale = (float)img.cols/input_size_; // std::cout << "scale" << im_scale << std::endl; // int new_w = int(img.cols/im_scale); // int new_h = int(img.rows/im_scale); // cv::resize(img,input_mat,cv::Size(new_w,new_h)); //resize // int p_w = input_size_ - new_w; // int p_h = input_size_ - new_h; // //擴充圖像邊緣 // cv::copyMakeBorder(input_mat, input_mat, //原圖像與擴充後的圖像 // 0, p_h, //表示在圖像四周擴充邊緣的大小,top,bottom,left,right // 0, p_w, // cv::BORDER_CONSTANT, //以常數值擴充邊界 // cv::Scalar(114, 114, 114)); //常量擴充的數值 // /*****************************************yolov5後處理中的自適應縮放圖片*************************************************************/ cv::Mat input_mat; //TODO padding float col_scale = input_size_/(float)img.cols; float row_scale = input_size_/(float)img.rows; float im_scale; if (col_scale < row_scale) { im_scale = col_scale; } else { im_scale = row_scale; } std::cout << "scale" << im_scale << std::endl; int new_w = int(img.cols * im_scale); int new_h = int(img.rows * im_scale); cv::resize(img,input_mat,cv::Size(new_w,new_h)); //resize int dw = input_size_ - new_w; int dh = input_size_ - new_h; int p_w = dw%64/2; int p_h = dh%64/2; int top = (int)std::round(p_h-0.1) ; int bottom = (int)std::round(p_h+0.1); int left = (int)std::round(p_w-0.1); int right = (int)std::round(p_w+0.1); //擴充圖像邊緣 cv::copyMakeBorder(input_mat, input_mat, //原圖像與擴充後的圖像 top, bottom, //表示在圖像四周擴充邊緣的大小,top,bottom,left,right left, right, cv::BORDER_CONSTANT, //以常數值擴充邊界 cv::Scalar(114, 114, 114)); //常量擴充的數值 //cv::imshow("img1",input_mat); //cv::waitKey(0); /************************************************************************************************××××××******/ cv::resize(input_mat, input_mat, cv::Size(input_size_/2, input_size_/2)); //把圖片resize到320 // cv::imshow("img2",input_mat); // cv::waitKey(0); ncnn::Mat in_net = ncnn::Mat::from_pixels(input_mat.data, ncnn::Mat::PIXEL_BGR2RGB, input_mat.cols, input_mat.rows); //定義輸入,將BGR轉換爲RGB float norm[3] = { 1/255.f,1/255.f,1/255.f}; //方差 float mean[3] = { 0,0,0}; //均值 in_net.substract_mean_normalize(mean,norm); //正則化 auto ex = Net_->create_extractor(); //提取網絡結構和參數 ex.set_light_mode(true); ex.set_num_threads(4); // ex.set_vulkan_compute(hasGPU); ex.input(0, in_net); //輸入 std::vector<BoxInfo> result; //定義結果 for(const auto& layer: layers){ ncnn::Mat blob; auto t0 = std::chrono::high_resolution_clock::now(); //計算推理時間 ex.extract(layer.name.c_str(),blob); //提取網絡輸出層結果 auto t1 = std::chrono::high_resolution_clock::now(); auto boxes = DecodeInfer(blob,layer.stride,{ (int)img.cols,(int)img.rows},input_size_,num_class_,layer.anchors,threshold); //聲明解碼函數 auto t2 = std::chrono::high_resolution_clock::now(); std::cout << "time1: " << std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() / 1000.0 << "ms." << std::endl; std::cout << "time2: " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() / 1000.0 << "ms." << std::endl; result.insert(result.begin(),boxes.begin(),boxes.end()); //在result的首部插入整個boxes數組 } Nms(result,nms_threshold); //進行NMS return result; } inline float Sigmoid(float x){ //內聯函數,用於加強性能,可是隻適用於簡單短小的函數 return 1.0f / (1.0f + std::exp(-x)); } //定義解碼函數,位於命名空間Yolov5Detector類下 std::vector<BoxInfo> Yolov5Detector::DecodeInfer(ncnn::Mat &data, int stride, const YoloSize &frame_size, int net_size, int num_classes,const std::vector<YoloSize> &anchors, float threshold) { std::vector<BoxInfo> result; int grid_size = int(sqrt(data.h)); //data.h是什麼意思?data(dims=3, w=25, h=400, c=3) float *mat_data[data.c]; //data.c是什麼意思? for(int i=0;i<data.c;i++){ mat_data[i] = data.channel(i); // } float cx,cy,w,h; for(int shift_y=0;shift_y<grid_size;shift_y++){ for(int shift_x=0;shift_x<grid_size;shift_x++){ //int loc = shift_x+shift_y*grid_size; for(int i=0;i<3;i++) { float *record = mat_data[i]; float *cls_ptr = record + 5; for(int cls = 0; cls<num_classes;cls++){ float score = Sigmoid(cls_ptr[cls]) * Sigmoid(record[4]); if(score>threshold){ cx = (Sigmoid(record[0]) * 2.f - 0.5f + (float)shift_x) * (float) stride; //中心點橫座標 cy = (Sigmoid(record[1]) * 2.f - 0.5f + (float)shift_y) * (float) stride; //中心點縱座標 w = pow(Sigmoid(record[2]) * 2.f,2)*anchors[i].width; //box的寬 h = pow(Sigmoid(record[3]) * 2.f,2)*anchors[i].height; //box的高 //std::cout << cx << " "<<cy<<" "<<w<<" "<<h<<std::endl; //printf("[grid size=%d, stride = %d]x y w h %f %f %f %f\n",grid_size,stride,record[0],record[1],record[2],record[3]); BoxInfo box; box.x1 = std::max(0,std::min(frame_size.width,int((cx - w / 2.f) * (float)frame_size.width / (float)net_size))); //左上角座標 box.y1 = std::max(0,std::min(frame_size.height,int((cy - h / 2.f) * (float)frame_size.height / (float)net_size))); box.x2 = std::max(0,std::min(frame_size.width,int((cx + w / 2.f) * (float)frame_size.width / (float)net_size))); //右下角座標 box.y2 = std::max(0,std::min(frame_size.height,int((cy + h / 2.f) * (float)frame_size.height / (float)net_size))); box.score = score; box.label = cls; result.push_back(box); } } } for(auto& ptr:mat_data){ ptr+=(num_classes + 5); } } } return result; } void Yolov5Detector::Nms(std::vector<BoxInfo> &input_boxes, float nms_thresh) { std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b){ return a.score > b.score;}); std::vector<float>vArea(input_boxes.size()); //定義box面積容器 for (int i = 0; i < int(input_boxes.size()); ++i) //遍歷全部的box { vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) //計算面積(x2-x1)×(y2-y1) * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); } for (int i = 0; i < int(input_boxes.size()); ++i) //遍歷全部的面積 { for (int j = i + 1; j < int(input_boxes.size());) { float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1); //求解兩個box相交處的左上角座標 float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1); float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2); //求解兩個box相交處的右下角座標 float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2); float w = std::max(float(0), xx2 - xx1 + 1); //計算相交矩形的寬 float h = std::max(float(0), yy2 - yy1 + 1); //計算相交矩形的高 float inter = w * h; //計算相交矩形的面積 float ovr = inter / (vArea[i] + vArea[j] - inter); //overlap if (ovr >= nms_thresh) { input_boxes.erase(input_boxes.begin() + j); //若overlap大於閾值,刪除該box vArea.erase(vArea.begin() + j); //若overlap大於閾值,刪除該面積 } else { j++; } } } } int main() { //cv::VideoCapture cap; const char *yolov5_param = "/home/zhangyi/CLionProjects/yolov5s/last_500_320.param"; const char *yolov5_bin = "/home/zhangyi/CLionProjects/yolov5s/last_500_320.bin"; float nms_threshold = 0.4; float threshold = 0.3; Yolov5Detector ret(yolov5_param, yolov5_bin); /*****************************************將預測結果寫入txt中*************************************************************/ // std::vector<cv::String> filename; // cv::String folder = "/home/zhangyi/CLionProjects/yolov5s/img_val"; // cv::glob(folder, filename); // std::ofstream out("out.txt"); // for(size_t i=0; i<filename.size(); ++i) // { // std::cout << filename[i] << std::endl; // cv::Mat frame = cv::imread(filename[i]); // std::vector<BoxInfo> result = ret.Detect(frame, threshold, nms_threshold); // // std::ofstream write("out.txt", std::ios::app); // out << filename[i] << " "; // for(int j=0; j<result.size(); ++j) // { // const auto obj = result[j]; // //std::ofstream write("out.txt", std::ios::app); // std::cout << obj.x1 << "," << obj.y1 << "," << obj.x2 << "," << obj.y2 << "," << obj.label << "\n"; // out << obj.score << "," << obj.x1 << "," << obj.y1 << "," << obj.x2 << "," << obj.y2 << "," << obj.label << " "; // } // //std::ofstream write("out.txt", std::ios::app); // out << "\n"; // } /*****************************************************************************************************/ cv::Mat frame; const char *img_path = "/home/zhangyi/CLionProjects/yolov5s/img_val/2008_000243.jpg"; std::cout << img_path << std::endl; cv::Mat img = cv::imread(img_path, 1); frame = cv::imread(img_path, 1); std::vector<BoxInfo> result = ret.Detect(frame, threshold, nms_threshold); /*畫圖部分*/ for(int i=0; i<result.size(); i++) { const auto obj = result[i]; fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.score, obj.x1, obj.y1, obj.x2, obj.y2); cv::rectangle(frame, cvPoint(obj.x1, obj.y1), cvPoint(obj.x2, obj.y2), cv::Scalar(255, 0, 0)); char text[256]; static const char* labels[] = { "person", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; sprintf(text, "%s %.1f%%", labels[obj.label], obj.score * 100); int baseLine = 0; cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); int x = obj.x1; int y = obj.y1 - label_size.height - baseLine; if (y < 0) y = 0; if (x + label_size.width > frame.cols) x = frame.cols - label_size.width; cv::rectangle(frame, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), cv::Scalar(255, 255, 255), -1); cv::putText(frame, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); } cv::imshow("image", frame); cv::waitKey(0); return 0; }
這裏面也有個地方須要注意,以下:
由於.h文件裏設置輸入size爲640,而轉出的ncnn模型接受輸入爲320,因此這裏須要除以2!
其中這塊的代碼是yolov5中後處理中加入的自適應圖片縮放機制,具yolov5的paper說這個操做能夠提速30%,在這裏也會對ncnn模型的精度有影響,加了這個能提升map2.5個點!數組
這一部分代碼是對測試集進行測試,將結果寫入一個txt文本中,而後去測試map,由於map計算以python版本居多,我嫌麻煩,沒有複寫c++版的,有興趣能夠直接寫一個c++版的測試map代碼,直接在c++裏測試便可!網絡
這部分想要測試map的時候把這部分註釋打開,而後這部分如下的代碼註釋掉就ok!app
6、ncnn的map測試
對於轉ncnn後測試map指標不是一個很好的選擇,我只是想與以前的pt模型作一下對比,才搞了這麼一個測試,不少測試指標都是看預測的tensor與原tensor之間查了幾位小數來作測試指標!ide
上一節已經將測試集的測試結果保存爲txt文本了,這時將結果拿到pycharm中進行map測試!函數
因爲我使用的是voc數據集,voc計算map的開源代碼有不少,搜一搜就有,裏面須要將某一類的結果先提取出來保存爲單獨一個txt文本,因此最後一共有20個txt文本,分別時20類目標的預測結果,下面是這一步驟的代碼:
import os def file_name(file_dir): with open("val.txt", 'w') as f: for root, dirs, files in os.walk(file_dir): for file in files: img_name = file.split(".")[0] f.write(img_name) f.write("\n") def cls_pred_file(pred_file): with open(pred_file) as f: lines = f.readlines() classes_name = ["person", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] for cls in classes_name: with open("./datasets/score/pred_out/%s.txt"%cls, 'w') as F: print("Writing %s.txt"%cls) for line in lines: img_name = line.strip().split(" ")[0] objects = line.strip().split(" ")[1:] for i in range(len(objects)): score = objects[i].split(",")[0] x1 = objects[i].split(",")[1] y1 = objects[i].split(",")[2] x2 = objects[i].split(",")[3] y2 = objects[i].split(",")[4] label = int(objects[i].split(",")[5]) if classes_name[label] == cls: F.write(img_name + " " + score + " " + x1 + " " + y1 + " " + x2 + " " + y2) F.write("\n") print("%s.txt is done!"%cls) if __name__ == "__main__": #file_name("./datasets/score/labels/val") cls_pred_file("./out.txt")
這裏就生成了20類目標的預測結果,接下來就是測試map的過程了,代碼以下:
# -------------------------------------------------------- # Fast/er R-CNN # Licensed under The MIT License [see LICENSE for details] # Written by Bharath Hariharan # -------------------------------------------------------- import xml.etree.ElementTree as ET import os import _pickle as cPickle import numpy as np def parse_rec(filename): # 經過ET解析xml後返回一個obj """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] # 解析xml文件,將GT框信息放入一個列表 for obj in tree.findall('object'): obj_struct = { } obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects def voc_ap(rec, prec, use_07_metric=False): """ ap = voc_ap(rec, prec, [use_07_metric]) Compute VOC AP given precision and recall. If use_07_metric is true, uses the VOC 07 11 point method (default:False). """ if use_07_metric: # 11 point metric ap = 0. for t in np.arange(0., 1.1, 0.1): if np.sum(rec >= t) == 0: p = 0 else: p = np.max(prec[rec >= t]) ap = ap + p / 11. else: # correct AP calculation # first append sentinel values at the end mrec = np.concatenate(([0.], rec, [1.])) mpre = np.concatenate(([0.], prec, [0.])) # compute the precision envelope for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) # to calculate area under PR curve, look for points # where X axis (recall) changes value i = np.where(mrec[1:] != mrec[:-1])[0] # and sum (\Delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) return ap def voc_eval(detpath, annopath, imagesetfile, classname, cachedir, ovthresh=0.2, use_07_metric=False): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name 默認txt中是無後綴imgName # cachedir caches the annotations in a pickle file # first load gt if not os.path.isdir(cachedir): os.mkdir(cachedir) # 若無pkl文件的路徑,生成cachedir路徑 cachefile = os.path.join(cachedir, 'annots.pkl') # read list of images with open(imagesetfile, 'r') as f: lines = f.readlines() imagenames = [x.strip() for x in lines] # imagenames爲全部imgName的list if not os.path.isfile(cachefile): # cache路徑下無pkl # load annots recs = { } # recs是一個dict,以imagename爲key,解析xml後的obj爲value,詳情見下兩句 for i, imagename in enumerate(imagenames): # imagename = imagename.split(' ')[0] recs[imagename] = parse_rec(annopath.format(imagename)) # 依次寫入format上imagename的xml路徑到resc列表 if i % 100 == 0: print('Reading annotation for {:d}/{:d}'.format(i + 1, len(imagenames))) # 顯示進程 # save print('Saving cached annotations to {:s}'.format(cachefile)) with open(cachefile, 'wb') as f: cPickle.dump(recs, f) # 將resc列表中的內容寫入pkl else: # load with open(cachefile, 'rb') as f: recs = cPickle.load(f) # 若存在pkl,直接load到recs # extract gt objects for this class class_recs = { } npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj['name'] == classname] # 除去recs中其餘類別 bbox = np.array([x['bbox'] for x in R]) difficult = np.array([x['difficult'] for x in R]).astype(np.bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = { 'bbox': bbox, 'difficult': difficult, 'det': det} # read dets detfile = detpath.format(classname) with open(detfile, 'rb') as f: # 讀批量驗證的結果txt文件 lines = f.readlines() splitlines = [x.decode().strip().split(' ') for x in lines] # split對txt每一行的數據作分割 image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs 如下爲計算對比各參數 nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) return rec, prec, ap if __name__ == "__main__": results_path = "./datasets/score/detection" cls_result = os.listdir(results_path) AP = [] for i in range(len(cls_result)): class_name = cls_result[i].split(".txt")[0] rec, prec, ap = voc_eval("./datasets/score/pred_out/{}.txt", "./datasets/score/Annotations/{}.xml", "./val.txt", class_name, '.') print("{} :\t {}".format(class_name, ap)) AP.append(ap) #map = sum(AP)/len(AP) map = tuple(AP) print("***************************") print("mAP :\t {}".format(float(sum(map) / len(map))))
最終的測試結果,ncnn的map在46.8,原torch模型的精度在(一)裏面已經說過了,爲51.5,轉爲ncnn後map掉了4.7個點,這個偏差仍是蠻大的,可是看了不少大佬的結果,意思好像是掉不了這麼多點,裏面的緣由就不得而知了!多是本人後處理部分寫的有問題,也多是模型訓練的時候迭代次數不夠,這個torch模型從頭只迭代了500個epoch,具體什麼緣由不知道,有知道的大佬,歡迎指點迷津啊!
最後放上ncnn的測試結果圖:
這與torch模型測試得是同一張圖,你們能夠對比一下!
好了,到這裏yolov5轉ncnn就結束了,但願對你們能有幫助,畢竟裏面的坑太多!!!!