data_layer應該是網絡的最底層,主要是將數據送給blob進入到net中,在data_layer中存在多個跟data_layer相關的類數據庫
這裏首先說明一下這幾個類之間的區別。
首先Layer是基類,這個以前就已經提到過了。其次看HDF5相關的類有兩個,一個是HDF5DataLayer,另外一個是HDF5OutputLayer,主要是基於HDF5數據格式的讀取和存儲windows
留意到這個data_layer的頭文件還include了很多頭文件網絡
#include <string> #include <utility> #include <vector> #include "hdf5.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/data_reader.hpp" #include "caffe/data_transformer.hpp" #include "caffe/filler.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/blocking_queue.hpp" #include "caffe/util/db.hpp"
hdf5就是以前說到的一種主要用於科學數據記錄、能自我描述的數據格式。
還有幾個跟data相關的頭文件好比data_read.hpp,data_transformer.hpp
其中data_reader主要是負責數據的讀取,傳送到data layer中。而且對於每個source,都會開一一塊兒獨立的reading thread讀取線程,幾十有多個solver在並行的跑。好比在多GPU訓練的時候,能夠保證對於數據庫的讀取是順序的dom
data_transformer.hpp裏面的DataTransformer這個類,這個類咱們要關注一下,這個類主要能對input data 執一些預處理操做,好比縮放、鏡像、減去均值。同時還支持一些隨機的操做。
其核心的函數以下,這裏總共有5個常在的Transform函數,其中全部函數的第二部分是相同的,都是一個目標blob,而輸入根據輸入的狀況能夠有所選擇,能夠是blob,也能夠是opencv的mat 結構,或者proto中定義的datum結構。函數
void Transform(const Datum& datum, Blob<Dtype>* transformed_blob); void Transform(const vector<Datum> & datum_vector, Blob<Dtype>* transformed_blob); void Transform(const vector<cv::Mat> & mat_vector, Blob<Dtype>* transformed_blob); void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob); void Transform(Blob<Dtype>* input_blob, Blob<Dtype>* transformed_blob);
TransformationParameter是該類構造器中須要傳入的一些變形參數,相關的操做定義在proto中,摘錄以下,能夠看到總共有sacle,mirror,crop_size,mean_file,mean_value,force_color,force_grey共7個相關操做fetch
message TransformationParameter { optional float scale = 1 [default = 1]; optional bool mirror = 2 [default = false]; optional uint32 crop_size = 3 [default = 0]; optional string mean_file = 4; repeated float mean_value = 5; optional bool force_color = 6 [default = false]; optional bool force_gray = 7 [default = false]; }
首先對於dat_layer,裏面根據繼承關係最後的幾個子類分別是ImageDataLayer,DataLayer,WindowDataLayer,MemoryDataLayer,HDF5以及Dummy這裏暫時先不作分析。
其實最重要的就是類面的layerSetup.首先咱們來看DataLayer的DataLayerSetUpui
void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int batch_size = this->layer_param_.data_param().batch_size(); //得到相應的datum,用來初始化top blob Datum& datum = *(reader_.full().peek()); //使用data_transformer 來計算根據datum的指望blob的shape vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); //首先reshape top[0],再根據batch的大小進行預取 top_shape[0] = batch_size; top[0]->Reshape(top_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // 一樣reshape label的blob的shape if (this->output_labels_) { vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } } }
MemoryDataLayerthis
void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //直接經過memory_data_param類設置layer的相關參數 batch_size_ = this->layer_param_.memory_data_param().batch_size(); channels_ = this->layer_param_.memory_data_param().channels(); height_ = this->layer_param_.memory_data_param().height(); width_ = this->layer_param_.memory_data_param().width(); size_ = channels_ * height_ * width_; CHECK_GT(batch_size_ * size_, 0) << "batch_size, channels, height, and width must be specified and" " positive in memory_data_param"; //這裏跟datalayer同樣都是先設置top[0],而後對label進行reshape vector<int> label_shape(1, batch_size_); top[0]->Reshape(batch_size_, channels_, height_, width_); top[1]->Reshape(label_shape); added_data_.Reshape(batch_size_, channels_, height_, width_); added_label_.Reshape(label_shape); data_ = NULL; labels_ = NULL; added_data_.cpu_data(); added_label_.cpu_data(); }
ImageDataLayer,它的DataLayerSetUP函數線程
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int new_height = this->layer_param_.image_data_param().new_height(); const int new_width = this->layer_param_.image_data_param().new_width(); const bool is_color = this->layer_param_.image_data_param().is_color(); string root_folder = this->layer_param_.image_data_param().root_folder(); CHECK((new_height == 0 && new_width == 0) || (new_height > 0 && new_width > 0)) << "Current implementation requires " "new_height and new_width to be set at the same time."; //讀取圖像文件和相應的label const string& source = this->layer_param_.image_data_param().source(); LOG(INFO) << "Opening file " << source; std::ifstream infile(source.c_str()); string filename; int label; while (infile >> filename >> label) { lines_.push_back(std::make_pair(filename, label)); } if (this->layer_param_.image_data_param().shuffle()) { // randomly shuffle data LOG(INFO) << "Shuffling data"; const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); ShuffleImages(); } LOG(INFO) << "A total of " << lines_.size() << " images."; lines_id_ = 0; //check是否須要隨機跳過一些圖像 if (this->layer_param_.image_data_param().rand_skip()) { unsigned int skip = caffe_rng_rand() % this->layer_param_.image_data_param().rand_skip(); LOG(INFO) << "Skipping first " << skip << " data points."; CHECK_GT(lines_.size(), skip) << "Not enough points to skip"; lines_id_ = skip; } //使用Opencv來讀進圖像,而後使用它初始化相應的top blob cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; //這裏的步驟和上面相同,使用transformer來作reshape vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); //以後部分跟前面差很少,初始化top[0] const int batch_size = this->layer_param_.image_data_param().batch_size(); CHECK_GT(batch_size, 0) << "Positive batch size required"; top_shape[0] = batch_size; for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } top[0]->Reshape(top_shape); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); //reshape label vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } }
WindowDataLayer的DataLayerSetUp,這個函數標比較長,我只列出了其中主要的部分,以前的Image至關因而已經剪裁過的一個圖像,也就是說你的目標基本上是充棉了整個畫面,而Window File是用於原始圖的,也就是說有background和object,這個window file 的格式以下code
window_file format repeated: # image_index img_path (abs path) channels height width num_windows class_index overlap x1 y1 x2 y2
//讀取每個box int num_windows; infile >> num_windows; const float fg_threshold = this->layer_param_.window_data_param().fg_threshold(); const float bg_threshold = this->layer_param_.window_data_param().bg_threshold(); for (int i = 0; i < num_windows; ++i) { int label, x1, y1, x2, y2; float overlap; infile >> label >> overlap >> x1 >> y1 >> x2 >> y2; vector<float> window(WindowDataLayer::NUM); window[WindowDataLayer::IMAGE_INDEX] = image_index; window[WindowDataLayer::LABEL] = label; window[WindowDataLayer::OVERLAP] = overlap; window[WindowDataLayer::X1] = x1; window[WindowDataLayer::Y1] = y1; window[WindowDataLayer::X2] = x2; window[WindowDataLayer::Y2] = y2; // add window to foreground list or background list// read each box int num_windows; infile >> num_windows; const float fg_threshold = this->layer_param_.window_data_param().fg_threshold(); const float bg_threshold = this->layer_param_.window_data_param().bg_threshold(); for (int i = 0; i < num_windows; ++i) { int label, x1, y1, x2, y2; float overlap; infile >> label >> overlap >> x1 >> y1 >> x2 >> y2; vector<float> window(WindowDataLayer::NUM); window[WindowDataLayer::IMAGE_INDEX] = image_index; window[WindowDataLayer::LABEL] = label; window[WindowDataLayer::OVERLAP] = overlap; window[WindowDataLayer::X1] = x1; window[WindowDataLayer::Y1] = y1; window[WindowDataLayer::X2] = x2; window[WindowDataLayer::Y2] = y2; //首先計算獲得overlap,根據Overlap與fg_threshold的比較載添加到fg的list中 if (overlap >= fg_threshold) { int label = window[WindowDataLayer::LABEL]; CHECK_GT(label, 0); fg_windows_.push_back(window); label_hist.insert(std::make_pair(label, 0)); label_hist[label]++; } else if (overlap < bg_threshold) { // background window, force label and overlap to 0 window[WindowDataLayer::LABEL] = 0; window[WindowDataLayer::OVERLAP] = 0; bg_windows_.push_back(window); label_hist[0]++; } } =- if (overlap >= fg_threshold) { int label = window[WindowDataLayer::LABEL]; CHECK_GT(label, 0); fg_windows_.push_back(window); label_hist.insert(std::make_pair(label, 0)); label_hist[label]++; } else if (overlap < bg_threshold) { //background的label和overlap都是0 window[WindowDataLayer::LABEL] = 0; window[WindowDataLayer::OVERLAP] = 0; bg_windows_.push_back(window); label_hist[0]++; } } .............. for (map<int, int>::iterator it = label_hist.begin(); it != label_hist.end(); ++it) { LOG(INFO) << "class " << it->first << " has " << label_hist[it->first] << " samples"; } LOG(INFO) << "Amount of context padding: " << this->layer_param_.window_data_param().context_pad(); LOG(INFO) << "Crop mode: " << this->layer_param_.window_data_param().crop_mode(); //這裏以後的步驟就差很少了,一樣是對transform的一些操做 const int crop_size = this->transform_param_.crop_size(); CHECK_GT(crop_size, 0); const int batch_size = this->layer_param_.window_data_param().batch_size(); top[0]->Reshape(batch_size, channels, crop_size, crop_size); for (int i = 0; i < this->PREFETCH_COUNT; ++i) this->prefetch_[i].data_.Reshape( batch_size, channels, crop_size, crop_size); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // 對label進行reshape vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } //作減均值的操做 has_mean_file_ = this->transform_param_.has_mean_file(); has_mean_values_ = this->transform_param_.mean_value_size() > 0; if (has_mean_file_) { const string& mean_file = this->transform_param_.mean_file(); LOG(INFO) << "Loading mean file from: " << mean_file; BlobProto blob_proto; ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); data_mean_.FromProto(blob_proto); } if (has_mean_values_) { CHECK(has_mean_file_ == false) << "Cannot specify mean_file and mean_value at the same time"; for (int c = 0; c < this->transform_param_.mean_value_size(); ++c) { mean_values_.push_back(this->transform_param_.mean_value(c)); } CHECK(mean_values_.size() == 1 || mean_values_.size() == channels) << "Specify either 1 mean_value or as many as channels: " << channels; if (channels > 1 && mean_values_.size() == 1) { // Replicate the mean_value for simplicity for (int c = 1; c < channels; ++c) { mean_values_.push_back(mean_values_[0]); } } }