在沒有出現sppnet以前,RCNN使用corp和warp來對圖片進行大小調整,這種操做會形成圖片信息失真和信息丟失。sppnet這個模型推出來以後(關於這個網絡的描述,能夠看看以前寫的一篇理解:http://www.cnblogs.com/gongxijun/p/7172134.html),rg大神沿用了sppnet的思路到他的下一個模型中fast-rcnn中,可是roi_pooling和sppnet的思路雖然相同,可是實現方式仍是不一樣的.咱們看一下網絡參數:html
layer { name: "roi_pool5" type: "ROIPooling" bottom: "conv5_3" bottom: "rois" top: "pool5" roi_pooling_param { pooled_w: 7 pooled_h: 7 spatial_scale: 0.0625 # 1/16 }
結合源代碼,做者藉助了sppnet的空域金字塔pool方式,可是和sppnet並不一樣的是,做者在這裏只使用了(pooled_w,pooled_h)這個尺度,來將獲得的每個特徵圖分紅(pooled_w,pooled_h),而後對每一塊進行max_pooling取值,最後獲得一個n*7*7固定大小的特徵圖。網絡
1 // ------------------------------------------------------------------ 2 // Fast R-CNN 3 // Copyright (c) 2015 Microsoft 4 // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 // Written by Ross Girshick 6 // ------------------------------------------------------------------ 7 8 #include <cfloat> 9 10 #include "caffe/fast_rcnn_layers.hpp" 11 12 using std::max; 13 using std::min; 14 using std::floor; 15 using std::ceil; 16 17 namespace caffe { 18 19 template <typename Dtype> 20 void ROIPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, 21 const vector<Blob<Dtype>*>& top) { 22 ROIPoolingParameter roi_pool_param = this->layer_param_.roi_pooling_param(); 23 CHECK_GT(roi_pool_param.pooled_h(), 0) 24 << "pooled_h must be > 0"; 25 CHECK_GT(roi_pool_param.pooled_w(), 0) 26 << "pooled_w must be > 0"; 27 pooled_height_ = roi_pool_param.pooled_h(); //定義網絡的大小 28 pooled_width_ = roi_pool_param.pooled_w(); 29 spatial_scale_ = roi_pool_param.spatial_scale(); 30 LOG(INFO) << "Spatial scale: " << spatial_scale_; 31 } 32 33 template <typename Dtype> 34 void ROIPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, 35 const vector<Blob<Dtype>*>& top) { 36 channels_ = bottom[0]->channels(); 37 height_ = bottom[0]->height(); 38 width_ = bottom[0]->width(); 39 top[0]->Reshape(bottom[1]->num(), channels_, pooled_height_, 40 pooled_width_); 41 max_idx_.Reshape(bottom[1]->num(), channels_, pooled_height_, 42 pooled_width_); 43 } 44 45 template <typename Dtype> 46 void ROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, 47 const vector<Blob<Dtype>*>& top) { 48 const Dtype* bottom_data = bottom[0]->cpu_data(); 49 const Dtype* bottom_rois = bottom[1]->cpu_data();//獲取roidb信息(n,x1,y1,x2,y2) 50 // Number of ROIs 51 int num_rois = bottom[1]->num();//候選目標的個數 52 int batch_size = bottom[0]->num();//特徵圖的維度,vgg16的conv5以後爲512 53 int top_count = top[0]->count();//須要輸出的值個數 54 Dtype* top_data = top[0]->mutable_cpu_data(); 55 caffe_set(top_count, Dtype(-FLT_MAX), top_data); 56 int* argmax_data = max_idx_.mutable_cpu_data(); 57 caffe_set(top_count, -1, argmax_data); 58 59 // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 60 for (int n = 0; n < num_rois; ++n) { 61 int roi_batch_ind = bottom_rois[0]; 62 int roi_start_w = round(bottom_rois[1] * spatial_scale_);//縮小16倍,將候選區域在原始座標中的位置,映射到conv_5特徵圖上 63 int roi_start_h = round(bottom_rois[2] * spatial_scale_); 64 int roi_end_w = round(bottom_rois[3] * spatial_scale_); 65 int roi_end_h = round(bottom_rois[4] * spatial_scale_); 66 CHECK_GE(roi_batch_ind, 0); 67 CHECK_LT(roi_batch_ind, batch_size); 68 69 int roi_height = max(roi_end_h - roi_start_h + 1, 1);//獲得候選區域在特徵圖上的大小 70 int roi_width = max(roi_end_w - roi_start_w + 1, 1); 71 const Dtype bin_size_h = static_cast<Dtype>(roi_height) 72 / static_cast<Dtype>(pooled_height_);//計算若是須要劃分紅(pooled_height_,pooled_weight_)這麼多塊,那麼每個塊的大小(bin_size_w,bin_size_h); 73 const Dtype bin_size_w = static_cast<Dtype>(roi_width) 74 / static_cast<Dtype>(pooled_width_); 75 76 const Dtype* batch_data = bottom_data + bottom[0]->offset(roi_batch_ind);//獲取當前維度的特徵圖數據,好比一共有(n,x1,x2,x3,x4)的數據,拿到第一塊特徵圖的數據 77 78 for (int c = 0; c < channels_; ++c) { 79 for (int ph = 0; ph < pooled_height_; ++ph) { 80 for (int pw = 0; pw < pooled_width_; ++pw) { 81 // Compute pooling region for this output unit: 82 // start (included) = floor(ph * roi_height / pooled_height_) 83 // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) 84 int hstart = static_cast<int>(floor(static_cast<Dtype>(ph) 85 * bin_size_h)); //計算每一塊的位置 86 int wstart = static_cast<int>(floor(static_cast<Dtype>(pw) 87 * bin_size_w)); 88 int hend = static_cast<int>(ceil(static_cast<Dtype>(ph + 1) 89 * bin_size_h)); 90 int wend = static_cast<int>(ceil(static_cast<Dtype>(pw + 1) 91 * bin_size_w)); 92 93 hstart = min(max(hstart + roi_start_h, 0), height_); 94 hend = min(max(hend + roi_start_h, 0), height_); 95 wstart = min(max(wstart + roi_start_w, 0), width_); 96 wend = min(max(wend + roi_start_w, 0), width_); 97 98 bool is_empty = (hend <= hstart) || (wend <= wstart); 99 100 const int pool_index = ph * pooled_width_ + pw; 101 if (is_empty) { 102 top_data[pool_index] = 0; 103 argmax_data[pool_index] = -1; 104 } 105 106 for (int h = hstart; h < hend; ++h) { 107 for (int w = wstart; w < wend; ++w) { 108 const int index = h * width_ + w; 109 if (batch_data[index] > top_data[pool_index]) { 110 top_data[pool_index] = batch_data[index]; //在取每一塊中的最大值,就是max_pooling操做. 111 argmax_data[pool_index] = index; 112 } 113 } 114 } 115 } 116 } 117 // Increment all data pointers by one channel 118 batch_data += bottom[0]->offset(0, 1); 119 top_data += top[0]->offset(0, 1); 120 argmax_data += max_idx_.offset(0, 1); 121 } 122 // Increment ROI data pointer 123 bottom_rois += bottom[1]->offset(1); 124 } 125 } 126 127 template <typename Dtype> 128 void ROIPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, 129 const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { 130 NOT_IMPLEMENTED; 131 } 132 133 134 #ifdef CPU_ONLY 135 STUB_GPU(ROIPoolingLayer); 136 #endif 137 138 INSTANTIATE_CLASS(ROIPoolingLayer); 139 REGISTER_LAYER_CLASS(ROIPooling); 140 141 } // namespace caffe
進過以上的操做事後,就獲得了固定大小的特徵圖啦,而後就能夠進行全鏈接操做了. 希望我說明白了.this
---完.spa