标签:
/** * @brief Provides base for data layers that feed blobs to the Net. * * TODO(dox): thorough documentation for Forward and proto params. * 数据层的基类 */ template <typename Dtype> class BaseDataLayer : public Layer<Dtype> { public: // 显式构造函数 explicit BaseDataLayer(const LayerParameter& param); // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden except by the BasePrefetchingDataLayer. // 该函数只能被BasePrefetchingDataLayer层进行重载 virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel // 数据是否需要给多个并行solver进行共享 virtual inline bool ShareInParallel() const { return true; } // 数据层的初始化 virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} // 数据层是没有输入的(即bottoms),所以reshape只是形式 // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} protected: // 对输入的数据进行变换的参数,这其中包括是否需要mirror,是否需要crop // 是否需要减去meanfile,是否需要scale TransformationParameter transform_param_; // 实际执行数据变换类的指针(一个Transform函数加上参数即可完成对数据的变换,参数是数据哈) shared_ptr<DataTransformer<Dtype> > data_transformer_; bool output_labels_; };
// 构造函数就是初始化数据变换参数 template <typename Dtype> BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param) : Layer<Dtype>(param), transform_param_(param.transform_param()) { } // 初始化的时候根据top的大小来确定,如果是1表明只输出数据,而不输出类标 template <typename Dtype> void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { if (top.size() == 1) { output_labels_ = false; } else { output_labels_ = true; } // 初始化一个DataTransformer实例,便于对数据进行预处理 data_transformer_.reset( new DataTransformer<Dtype>(transform_param_, this->phase_)); // 初始化种子 data_transformer_->InitRand(); // The subclasses should setup the size of bottom and top // 执行数据层的初始化 DataLayerSetUp(bottom, top); }
// BasePrefetchingDataLayer层是继承于BaseDataLayer的 // 是预取层的基类 template <typename Dtype> class BasePrefetchingDataLayer : public BaseDataLayer<Dtype>, public InternalThread { public: explicit BasePrefetchingDataLayer(const LayerParameter& param); // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden. void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Prefetches batches (asynchronously if to GPU memory) static const int PREFETCH_COUNT = 3; protected: virtual void InternalThreadEntry(); // 多了load_batch函数,该函数是纯虚函数,继承该函数的类都需要实现的 virtual void load_batch(Batch<Dtype>* batch) = 0; // 还有prefetch数组,prefetch_free_,prefetch_full_ Batch<Dtype> prefetch_[PREFETCH_COUNT]; BlockingQueue<Batch<Dtype>*> prefetch_free_; BlockingQueue<Batch<Dtype>*> prefetch_full_; Blob<Dtype> transformed_data_; }; BasePrefetchingDataLayer类的具体实现如下: // 构造函数,初始化预取的队列,free和full template <typename Dtype> BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer( const LayerParameter& param) : BaseDataLayer<Dtype>(param), prefetch_free_(), prefetch_full_() { for (int i = 0; i < PREFETCH_COUNT; ++i) { prefetch_free_.push(&prefetch_[i]); } } // 进行层的初始化 template <typename Dtype> void BasePrefetchingDataLayer<Dtype>::LayerSetUp( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 首先执行基类BaseDataLayer的层初始化 BaseDataLayer<Dtype>::LayerSetUp(bottom, top); // Before starting the prefetch thread, we make cpu_data and gpu_data // calls so that the prefetch thread does not accidentally make simultaneous // cudaMalloc calls when the main thread is running. In some GPUs this // seems to cause failures if we do not so. // 在开启预取线程的时候,需要让cpu数据和gpu数据分配空间 // 这样才能够避免在某些GPU上出现问题 // 首先是CPU for (int i = 0; i < PREFETCH_COUNT; ++i) { prefetch_[i].data_.mutable_cpu_data(); if (this->output_labels_) { prefetch_[i].label_.mutable_cpu_data(); } } #ifndef CPU_ONLY // 然后是GPU if (Caffe::mode() == Caffe::GPU) { for (int i = 0; i < PREFETCH_COUNT; ++i) { prefetch_[i].data_.mutable_gpu_data(); if (this->output_labels_) { prefetch_[i].label_.mutable_gpu_data(); } } } #endif DLOG(INFO) << "Initializing prefetch"; // 初始化随机数种子 this->data_transformer_->InitRand(); // 开启线程 StartInternalThread(); DLOG(INFO) << "Prefetch initialized."; } // 在StartInternalThread开启线程后就会执行下面自己定义的函数 // 这个就是自己定义的函数,让线程去执行的 template <typename Dtype> void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() { #ifndef CPU_ONLY cudaStream_t stream; if (Caffe::mode() == Caffe::GPU) { // 创建非阻塞流 CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); } #endif try { while (!must_stop()) { // 弹出一个batch Batch<Dtype>* batch = prefetch_free_.pop(); // 装载batch load_batch(batch); #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { // 如果GPU模式开始,则推送到GPU batch->data_.data().get()->async_gpu_push(stream); // 检查是否成功 CUDA_CHECK(cudaStreamSynchronize(stream)); } #endif // 将装好的batch压入full队列 prefetch_full_.push(batch); } } catch (boost::thread_interrupted&) { // Interrupted exception is expected on shutdown } #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { // 销毁流 CUDA_CHECK(cudaStreamDestroy(stream)); } #endif } template <typename Dtype> void BasePrefetchingDataLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 传递的时候是从full队列中弹出一个数据 Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty"); // Reshape to loaded data. // 根据batch的形状改变数据形状 top[0]->ReshapeLike(batch->data_); // Copy the data // 将batch数据复制到top[0] caffe_copy(batch->data_.count(), batch->data_.cpu_data(), top[0]->mutable_cpu_data()); DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { // 输出类标的话 // Reshape to loaded labels. // 根据batch中类标的形状改变top[1]的形状 top[1]->ReshapeLike(batch->label_); // Copy the labels. // 复制类标到top[1] caffe_copy(batch->label_.count(), batch->label_.cpu_data(), top[1]->mutable_cpu_data()); } // 将该batch压入free队列 prefetch_free_.push(batch); } // 如果没有GPU的话则在BasePrefetchingDataLayer类中生成一个Forward函数 // 该函数并不前传,而是直接报错 #ifdef CPU_ONLY STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward); #endif // 初始化层 INSTANTIATE_CLASS(BaseDataLayer); INSTANTIATE_CLASS(BasePrefetchingDataLayer);
// DataLayer才是主角,继承自BasePrefetchingDataLayer template <typename Dtype> class DataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit DataLayer(const LayerParameter& param); virtual ~DataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // DataLayer uses DataReader instead for sharing for parallelism // 多了下面几个 virtual inline bool ShareInParallel() const { return false; } virtual inline const char* type() const { return "Data"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } virtual inline int MaxTopBlobs() const { return 2; } protected: virtual void load_batch(Batch<Dtype>* batch); DataReader reader_; };
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #endif // USE_OPENCV #include <stdint.h> #include <string> #include <vector> #include "caffe/common.hpp" #include "caffe/data_layers.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" namespace caffe { // 初始化DataReader,层参数 template <typename Dtype> DataLayer<Dtype>::DataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param), reader_(param) { } // 析构函数停止内部线程 template <typename Dtype> DataLayer<Dtype>::~DataLayer() { this->StopInternalThread(); } // 数据层的初始化 template <typename Dtype> void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 从层参数中读取batch_size const int batch_size = this->layer_param_.data_param().batch_size(); // Read a data point, and use it to initialize the top blob. // 从reader_中获取一个数据 Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. // 用数据来推断blob的形状存放到top_shape vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape top[0] and prefetch_data according to the batch_size. // 既然获取了数据的形状(channel,height,width),那么这里再设置一下batch_size // top_shape[0]=batch_size // top_shape[1]=channel // top_shape[2]=height // top_shape[3]=width top_shape[0] = batch_size; // 根据形状设置top[0]的形状 top[0]->Reshape(top_shape); // 设置预取数据的形状 for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label // 如果输出类标的话则把top[1]的形状也弄一下 if (this->output_labels_) { vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } } } // This function is called on prefetch thread // 这个函数是在自己定义的线程执行函数内部执行的 template<typename Dtype> void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); // Reshape according to the first datum of each batch // on single input batches allows for inputs of varying dimension. // 意思是像以下这种做法这样的话,每个batch的数据的维度可以不一样 // 从参数文件获取batch_size const int batch_size = this->layer_param_.data_param().batch_size(); // 获取第一个数据 Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. // 使用第一个数据推断blob的形状 vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); // top_data存数据 Dtype* top_data = batch->data_.mutable_cpu_data(); Dtype* top_label = NULL; // suppress warnings about uninitialized variables // top_label存类标 if (this->output_labels_) { top_label = batch->label_.mutable_cpu_data(); } // 对这批数据进行处理 for (int item_id = 0; item_id < batch_size; ++item_id) { timer.Start(); // get a datum Datum& datum = *(reader_.full().pop("Waiting for data")); read_time += timer.MicroSeconds(); timer.Start(); // Apply data transformations (mirror, scale, crop...) // 对于给定批的数据获取offset,这里调用的是给定batchid,然后获取offset int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(top_data + offset); this->data_transformer_->Transform(datum, &(this->transformed_data_)); // Copy label. // 复制类标 if (this->output_labels_) { top_label[item_id] = datum.label(); } // 数据传输时间 trans_time += timer.MicroSeconds(); // 将数据指针压到free队列 reader_.free().push(const_cast<Datum*>(&datum)); } timer.Stop(); batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(DataLayer); REGISTER_LAYER_CLASS(Data); } // namespace caffe
/** * @brief Provides data to the Net generated by a Filler. * * TODO(dox): thorough documentation for Forward and proto params. * 该类是继承自Layer,通过Filler产生数据 */ template <typename Dtype> class DummyDataLayer : public Layer<Dtype> { public: explicit DummyDataLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} virtual inline const char* type() const { return "DummyData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} vector<shared_ptr<Filler<Dtype> > > fillers_; vector<bool> refill_; };
message FillerParameter { // The filler type. optional string type = 1 [default = 'constant']; optional float value = 2 [default = 0]; // the value in constant filler optional float min = 3 [default = 0]; // the min value in uniform filler optional float max = 4 [default = 1]; // the max value in uniform filler optional float mean = 5 [default = 0]; // the mean value in Gaussian filler optional float std = 6 [default = 1]; // the std value in Gaussian filler // The expected number of non-zero output weights for a given input in // Gaussian filler -- the default -1 means don't perform sparsification. optional int32 sparse = 7 [default = -1]; // Normalize the filler variance by fan_in, fan_out, or their average. // Applies to 'xavier' and 'msra' fillers. enum VarianceNorm { FAN_IN = 0; FAN_OUT = 1; AVERAGE = 2; } optional VarianceNorm variance_norm = 8 [default = FAN_IN]; }
</pre><pre name="code" class="plain">// DummyDataLayer fills any number of arbitrarily shaped blobs with random // (or constant) data generated by "Fillers" (see "message FillerParameter"). message DummyDataParameter { // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N // shape fields, and 0, 1 or N data_fillers. // // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. // If 1 data_filler is specified, it is applied to all top blobs. If N are // specified, the ith is applied to the ith top blob. repeated FillerParameter data_filler = 1; repeated BlobShape shape = 6; // 4D dimensions -- deprecated. Use "shape" instead. repeated uint32 num = 2; repeated uint32 channels = 3; repeated uint32 height = 4; repeated uint32 width = 5; }
#include <vector> #include "caffe/filler.hpp" #include "caffe/layer.hpp" #include "caffe/vision_layers.hpp" namespace caffe { template <typename Dtype> void DummyDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 输出有几个 const int num_top = top.size(); // 获取该层的参数 const DummyDataParameter& param = this->layer_param_.dummy_data_param(); // 有几个filler const int num_data_filler = param.data_filler_size(); // 检查filler的个数,要么为0、1、或者等于输出的个数 CHECK(num_data_filler == 0 || num_data_filler == 1 || num_data_filler == num_top) << "Number of data fillers must be 0, 1 or equal to the number of tops: " << num_top << "; you specified " << num_data_filler << " data fillers."; // 判断是否全部为0 const bool legacy_dims = param.num_size() || param.channels_size() || param.height_size() || param.width_size(); // 下面就是检查参数是不是满足要求,1或者0或者等于num_top if (legacy_dims) {// 如果不是全部为0 CHECK_EQ(0, param.shape_size()) << "Both shape and legacy fields were specified"; // Using deprecated 4D output dim specifiers. CHECK(param.num_size() == 1 || param.num_size() == num_top) << "Must specify 'num' once, or once per top blob " << "(" << num_top << "); specified " << param.num_size() << "."; CHECK(param.channels_size() == 1 || param.channels_size() == num_top) << "Must specify 'channels' once, or once per top blob " << "(" << num_top << "); specified " << param.channels_size() << "."; CHECK(param.height_size() == 1 || param.height_size() == num_top) << "Must specify 'height' once, or once per top blob " << "(" << num_top << "); specified " << param.height_size() << "."; CHECK(param.width_size() == 1 || param.width_size() == num_top) << "Must specify 'width' once, or once per top blob " << "(" << num_top << "); specified " << param.width_size() << "."; } else { CHECK(param.shape_size() == 1 || param.shape_size() == num_top) << "Must specify 'shape' once, or once per top blob " << "(" << num_top << "); specified " << param.shape_size() << "."; } // refill_[i] tells Forward i whether or not to actually refill top Blob i. // If refill_[i] is false, Forward does nothing for Blob i. We use this to // avoid wastefully refilling "constant" Blobs in every forward pass. // We first fill refill_ in with the INVERSE of its final values. // The first time we run Forward from the LayerSetUp method, we'll fill only // Blobs for which refill_ is normally false. These Blobs will never be // filled again. // refill_表明是不是需要填充Blob,如果refill_[i]=false,那么就不会Blob i做任何事 // refill_.clear(); fillers_.clear(); // 要么是0,要么是1 if (num_data_filler <= 1) { // 定义了生成数据的参数 // 比如均值、方差等,详细请看其定义 FillerParameter filler_param; if (num_data_filler == 0) { // 如果没有指定,那么就是常数值填充 filler_param.set_type("constant"); filler_param.set_value(0); } else { // 否则复制filler到filler_param filler_param.CopyFrom(param.data_filler(0)); } // Refill on each iteration iff not using a constant filler, // but use the inverse of this rule for the first run. // 如果 refill_.resize(1); refill_[0] = (strcmp(filler_param.type().c_str(), "constant") == 0); fillers_.resize(1); // 实例化填充器 fillers_[0].reset(GetFiller<Dtype>(filler_param)); } else {// 如果等于=num_top refill_.resize(num_top); fillers_.resize(num_top); for (int i = 0; i < num_top; ++i) { fillers_[i].reset(GetFiller<Dtype>(param.data_filler(i))); // Refill on each iteration iff not using a constant filler, // but use the inverse of this rule for the first run. refill_[i] = (strcmp(param.data_filler(i).type().c_str(), "constant") == 0); } } // 改变形状 for (int i = 0; i < num_top; ++i) { if (legacy_dims) { const int num = (param.num_size() == 1) ? param.num(0) : param.num(i); const int channels = (param.channels_size() == 1) ? param.channels(0) : param.channels(i); const int height = (param.height_size() == 1) ? param.height(0) : param.height(i); const int width = (param.width_size() == 1) ? param.width(0) : param.width(i); top[i]->Reshape(num, channels, height, width); } else { const int shape_index = (param.shape_size() == 1) ? 0 : i; top[i]->Reshape(param.shape(shape_index)); } } // Run Forward once, with refill_ inverted, to fill the constant Blobs. // 执行forward_cpu this->Forward(bottom, top); // Invert the inverted refill_ values to refill the desired (non-constant) // Blobs in every usual forward pass. for (int i = 0; i < refill_.size(); ++i) { refill_[i] = !refill_[i]; } } // Forward里调用了该函数 template <typename Dtype> void DummyDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 调用fillers_来进行錐ill for (int i = 0; i < top.size(); ++i) { const int filler_id = (fillers_.size() > 1) ? i : 0; if (refill_[filler_id]) { fillers_[filler_id]->Fill(top[i]); } } } // 初始化类 // 注册类 INSTANTIATE_CLASS(DummyDataLayer); REGISTER_LAYER_CLASS(DummyData); } // namespace caffe
template <typename Dtype> class HDF5DataLayer : public Layer<Dtype> { public: explicit HDF5DataLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual ~HDF5DataLayer(); virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} virtual inline const char* type() const { return "HDF5Data"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} // 从HDF5文件读取数据 virtual void LoadHDF5FileData(const char* filename); std::vector<std::string> hdf_filenames_; unsigned int num_files_; unsigned int current_file_; hsize_t current_row_; std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_; // 存放的是数据的索引,可以对索引进行shuffle std::vector<unsigned int> data_permutation_; // 存放的是文件名字的索引,可以对索引进行shuffle std::vector<unsigned int> file_permutation_; };
#ifndef CAFFE_UTIL_HDF5_H_ #define CAFFE_UTIL_HDF5_H_ #include <string> #include "hdf5.h" #include "hdf5_hl.h" #include "caffe/blob.hpp" namespace caffe { // 获取HDF5文件的信息以及数据的维度 template <typename Dtype> void hdf5_load_nd_dataset_helper( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<Dtype>* blob); // float类型的获取数据维度和信息的包裹函数 template <typename Dtype> void hdf5_load_nd_dataset( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<Dtype>* blob); // double类型的获取数据维度和信息的包裹函数 template <typename Dtype> void hdf5_save_nd_dataset( const hid_t file_id, const string& dataset_name, const Blob<Dtype>& blob, bool write_diff = false); // 读取int和存储int,读取字符串和存储字符串到文件 int hdf5_load_int(hid_t loc_id, const string& dataset_name); void hdf5_save_int(hid_t loc_id, const string& dataset_name, int i); string hdf5_load_string(hid_t loc_id, const string& dataset_name); void hdf5_save_string(hid_t loc_id, const string& dataset_name, const string& s); // 获取链接数 int hdf5_get_num_links(hid_t loc_id); // 根据名字找到索引 string hdf5_get_name_by_idx(hid_t loc_id, int idx); } // namespace caffe #endif // CAFFE_UTIL_HDF5_H_
#include "caffe/util/hdf5.hpp" #include <string> #include <vector> namespace caffe { // Verifies format of data stored in HDF5 file and reshapes blob accordingly. // 获取HDF5文件的信息以及数据的维度 template <typename Dtype> void hdf5_load_nd_dataset_helper( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<Dtype>* blob) { // Verify that the dataset exists. // 检查是否存在 CHECK(H5LTfind_dataset(file_id, dataset_name_)) << "Failed to find HDF5 dataset " << dataset_name_; // Verify that the number of dimensions is in the accepted range. herr_t status; int ndims; // 获取数据维度 status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); CHECK_GE(status, 0) << "Failed to get dataset ndims for " << dataset_name_; CHECK_GE(ndims, min_dim); CHECK_LE(ndims, max_dim); // Verify that the data format is what we expect: float or double. std::vector<hsize_t> dims(ndims); H5T_class_t class_; // 获取数据信息 status = H5LTget_dataset_info( file_id, dataset_name_, dims.data(), &class_, NULL); CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; switch (class_) { case H5T_FLOAT: LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_FLOAT"; break; case H5T_INTEGER: LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_INTEGER"; break; case H5T_TIME: LOG(FATAL) << "Unsupported datatype class: H5T_TIME"; case H5T_STRING: LOG(FATAL) << "Unsupported datatype class: H5T_STRING"; case H5T_BITFIELD: LOG(FATAL) << "Unsupported datatype class: H5T_BITFIELD"; case H5T_OPAQUE: LOG(FATAL) << "Unsupported datatype class: H5T_OPAQUE"; case H5T_COMPOUND: LOG(FATAL) << "Unsupported datatype class: H5T_COMPOUND"; case H5T_REFERENCE: LOG(FATAL) << "Unsupported datatype class: H5T_REFERENCE"; case H5T_ENUM: LOG(FATAL) << "Unsupported datatype class: H5T_ENUM"; case H5T_VLEN: LOG(FATAL) << "Unsupported datatype class: H5T_VLEN"; case H5T_ARRAY: LOG(FATAL) << "Unsupported datatype class: H5T_ARRAY"; default: LOG(FATAL) << "Datatype class unknown"; } // 设置blob的维度 vector<int> blob_dims(dims.size()); for (int i = 0; i < dims.size(); ++i) { blob_dims[i] = dims[i]; } blob->Reshape(blob_dims); } // float类型的获取数据维度和信息的包裹函数 template <> void hdf5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<float>* blob) { hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob); herr_t status = H5LTread_dataset_float( file_id, dataset_name_, blob->mutable_cpu_data()); CHECK_GE(status, 0) << "Failed to read float dataset " << dataset_name_; } // double类型的获取数据维度和信息的包裹函数 template <> void hdf5_load_nd_dataset<double>(hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<double>* blob) { hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob); herr_t status = H5LTread_dataset_double( file_id, dataset_name_, blob->mutable_cpu_data()); CHECK_GE(status, 0) << "Failed to read double dataset " << dataset_name_; } // 存放float类型到hdf5文件 template <> void hdf5_save_nd_dataset<float>( const hid_t file_id, const string& dataset_name, const Blob<float>& blob, bool write_diff) { // blob信息放到dims int num_axes = blob.num_axes(); hsize_t *dims = new hsize_t[num_axes]; for (int i = 0; i < num_axes; ++i) { dims[i] = blob.shape(i); } // 获取数据指针 const float* data; if (write_diff) { data = blob.cpu_diff(); } else { data = blob.cpu_data(); } // 存放数据到hdf5 herr_t status = H5LTmake_dataset_float( file_id, dataset_name.c_str(), num_axes, dims, data); CHECK_GE(status, 0) << "Failed to make float dataset " << dataset_name; delete[] dims; } // 存放double类型到hdf5文件 template <> void hdf5_save_nd_dataset<double>( hid_t file_id, const string& dataset_name, const Blob<double>& blob, bool write_diff) { int num_axes = blob.num_axes(); hsize_t *dims = new hsize_t[num_axes]; for (int i = 0; i < num_axes; ++i) { dims[i] = blob.shape(i); } const double* data; if (write_diff) { data = blob.cpu_diff(); } else { data = blob.cpu_data(); } herr_t status = H5LTmake_dataset_double( file_id, dataset_name.c_str(), num_axes, dims, data); CHECK_GE(status, 0) << "Failed to make double dataset " << dataset_name; delete[] dims; } // 读取string到字符串 string hdf5_load_string(hid_t loc_id, const string& dataset_name) { // Get size of dataset size_t size; H5T_class_t class_; herr_t status = H5LTget_dataset_info(loc_id, dataset_name.c_str(), NULL, &class_, &size); CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name; char *buf = new char[size]; status = H5LTread_dataset_string(loc_id, dataset_name.c_str(), buf); CHECK_GE(status, 0) << "Failed to load int dataset with name " << dataset_name; string val(buf); delete[] buf; return val; } // 保存string到字符串 void hdf5_save_string(hid_t loc_id, const string& dataset_name, const string& s) { herr_t status = H5LTmake_dataset_string(loc_id, dataset_name.c_str(), s.c_str()); CHECK_GE(status, 0) << "Failed to save string dataset with name " << dataset_name; } // 载入int类型 int hdf5_load_int(hid_t loc_id, const string& dataset_name) { int val; herr_t status = H5LTread_dataset_int(loc_id, dataset_name.c_str(), &val); CHECK_GE(status, 0) << "Failed to load int dataset with name " << dataset_name; return val; } // 存储int类型 void hdf5_save_int(hid_t loc_id, const string& dataset_name, int i) { hsize_t one = 1; herr_t status = H5LTmake_dataset_int(loc_id, dataset_name.c_str(), 1, &one, &i); CHECK_GE(status, 0) << "Failed to save int dataset with name " << dataset_name; } // 获取链接数 int hdf5_get_num_links(hid_t loc_id) { H5G_info_t info; herr_t status = H5Gget_info(loc_id, &info); CHECK_GE(status, 0) << "Error while counting HDF5 links."; return info.nlinks; } // 通过名字找到索引 string hdf5_get_name_by_idx(hid_t loc_id, int idx) { ssize_t str_size = H5Lget_name_by_idx( loc_id, ".", H5_INDEX_NAME, H5_ITER_NATIVE, idx, NULL, 0, H5P_DEFAULT); CHECK_GE(str_size, 0) << "Error retrieving HDF5 dataset at index " << idx; char *c_str = new char[str_size+1]; ssize_t status = H5Lget_name_by_idx( loc_id, ".", H5_INDEX_NAME, H5_ITER_NATIVE, idx, c_str, str_size+1, H5P_DEFAULT); CHECK_GE(status, 0) << "Error retrieving HDF5 dataset at index " << idx; string result(c_str); delete[] c_str; return result; } } // namespace caffe 给出具体实现: /* TODO: - load file in a separate thread ("prefetch") - can be smarter about the memcpy call instead of doing it row-by-row :: use util functions caffe_copy, and Blob->offset() :: don't forget to update hdf5_daa_layer.cu accordingly - add ability to shuffle filenames if flag is set */ #include <fstream> // NOLINT(readability/streams) #include <string> #include <vector> #include "hdf5.h" #include "hdf5_hl.h" #include "stdint.h" #include "caffe/data_layers.hpp" #include "caffe/layer.hpp" #include "caffe/util/hdf5.hpp" namespace caffe { template <typename Dtype> HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { } // Load data and label from HDF5 filename into the class property blobs. // 读取HDF5文件数据到hdf_blobs template <typename Dtype> void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) { DLOG(INFO) << "Loading HDF5 file: " << filename; // 打开文件 hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); if (file_id < 0) { LOG(FATAL) << "Failed opening HDF5 file: " << filename; } int top_size = this->layer_param_.top_size(); hdf_blobs_.resize(top_size); const int MIN_DATA_DIM = 1; const int MAX_DATA_DIM = INT_MAX; for (int i = 0; i < top_size; ++i) { hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>()); // message LayerParameter { // optional string name = 1; // the layer name // optional string type = 2; // the layer type // repeated string bottom = 3; // the name of each bottom blob // repeated string top = 4; // the name of each top blob hdf5_load_nd_dataset(file_id, this->layer_param_.top(i).c_str(), MIN_DATA_DIM, MAX_DATA_DIM, hdf_blobs_[i].get()); } herr_t status = H5Fclose(file_id); CHECK_GE(status, 0) << "Failed to close HDF5 file: " << filename; // MinTopBlobs==1 guarantees at least one top blob CHECK_GE(hdf_blobs_[0]->num_axes(), 1) << "Input must have at least 1 axis."; const int num = hdf_blobs_[0]->shape(0); for (int i = 1; i < top_size; ++i) { CHECK_EQ(hdf_blobs_[i]->shape(0), num); } // Default to identity permutation. data_permutation_.clear(); data_permutation_.resize(hdf_blobs_[0]->shape(0)); for (int i = 0; i < hdf_blobs_[0]->shape(0); i++) data_permutation_[i] = i; // Shuffle if needed. // 将数据索引映射表进行shuffle if (this->layer_param_.hdf5_data_param().shuffle()) { std::random_shuffle(data_permutation_.begin(), data_permutation_.end()); DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows (shuffled)"; } else { DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows"; } } // 主要的功能就是读取HDF5文件,并且设置top blob的形状 template <typename Dtype> void HDF5DataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Refuse transformation parameters since HDF5 is totally generic. CHECK(!this->layer_param_.has_transform_param()) << this->type() << " does not transform data."; // Read the source to parse the filenames. // 读取HDF列表文件 const string& source = this->layer_param_.hdf5_data_param().source(); LOG(INFO) << "Loading list of HDF5 filenames from: " << source; hdf_filenames_.clear(); std::ifstream source_file(source.c_str()); if (source_file.is_open()) { std::string line; while (source_file >> line) { hdf_filenames_.push_back(line); } } else { LOG(FATAL) << "Failed to open source file: " << source; } source_file.close(); num_files_ = hdf_filenames_.size(); current_file_ = 0; LOG(INFO) << "Number of HDF5 files: " << num_files_; CHECK_GE(num_files_, 1) << "Must have at least 1 HDF5 filename listed in " << source; file_permutation_.clear(); file_permutation_.resize(num_files_); // 文件名字是否shuffle // Default to identity permutation. for (int i = 0; i < num_files_; i++) { file_permutation_[i] = i; } // Shuffle if needed. if (this->layer_param_.hdf5_data_param().shuffle()) { std::random_shuffle(file_permutation_.begin(), file_permutation_.end()); } // Load the first HDF5 file and initialize the line counter. // 从给定的文件名列表中的第一个文件名读取数据到hdf_blobs LoadHDF5FileData(hdf_filenames_[file_permutation_[current_file_]].c_str()); // 设置行指针 current_row_ = 0; // Reshape blobs. // 根据读取的hdf_blobs形状改变top的形状 const int batch_size = this->layer_param_.hdf5_data_param().batch_size(); const int top_size = this->layer_param_.top_size(); vector<int> top_shape; for (int i = 0; i < top_size; ++i) { top_shape.resize(hdf_blobs_[i]->num_axes()); top_shape[0] = batch_size; for (int j = 1; j < top_shape.size(); ++j) { top_shape[j] = hdf_blobs_[i]->shape(j); } top[i]->Reshape(top_shape); } } template <typename Dtype> void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int batch_size = this->layer_param_.hdf5_data_param().batch_size(); for (int i = 0; i < batch_size; ++i, ++current_row_) { // 因为SetUp里面已经读取了第一个文件的数据了 if (current_row_ == hdf_blobs_[0]->shape(0)) { if (num_files_ > 1) {// 如果文件数目大于1 ++current_file_; // 如果current_file是最后一个文件的索引编号则 if (current_file_ == num_files_) { current_file_ = 0;// 重置 // 打乱文件索引,再来一遍 if (this->layer_param_.hdf5_data_param().shuffle()) { std::random_shuffle(file_permutation_.begin(), file_permutation_.end()); } DLOG(INFO) << "Looping around to first file."; } // 读取数据到hdf_blobs LoadHDF5FileData( hdf_filenames_[file_permutation_[current_file_]].c_str()); }// end of if (current_row_ current_row_ = 0; // 打乱数据顺序索引 if (this->layer_param_.hdf5_data_param().shuffle()) std::random_shuffle(data_permutation_.begin(), data_permutation_.end()); } // 复制数据到top for (int j = 0; j < this->layer_param_.top_size(); ++j) { int data_dim = top[j]->count() / top[j]->shape(0); caffe_copy(data_dim, &hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_] * data_dim], &top[j]->mutable_cpu_data()[i * data_dim]); } } } #ifdef CPU_ONLY STUB_GPU_FORWARD(HDF5DataLayer, Forward); #endif INSTANTIATE_CLASS(HDF5DataLayer); REGISTER_LAYER_CLASS(HDF5Data); } // namespace caffe
/** * @brief Write blobs to disk as HDF5 files. * * TODO(dox): thorough documentation for Forward and proto params. * 将数据写入到HDF5文件 */ template <typename Dtype> class HDF5OutputLayer : public Layer<Dtype> { public: explicit HDF5OutputLayer(const LayerParameter& param) : Layer<Dtype>(param), file_opened_(false) {} virtual ~HDF5OutputLayer(); virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} virtual inline const char* type() const { return "HDF5Output"; } // TODO: no limit on the number of blobs virtual inline int ExactNumBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return 0; } inline std::string file_name() const { return file_name_; } protected: // HDF5输出层不前向传也不反向传,只是将前一层传递过来的数据写入HDF5文件 virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); // 将bottom的数据存储到文件 virtual void SaveBlobs(); bool file_opened_; std::string file_name_; hid_t file_id_; Blob<Dtype> data_blob_; Blob<Dtype> label_blob_; };
#include <vector> #include "hdf5.h" #include "hdf5_hl.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/layer.hpp" #include "caffe/util/hdf5.hpp" #include "caffe/vision_layers.hpp" namespace caffe { template <typename Dtype> void HDF5OutputLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 参数文件中的文件名 file_name_ = this->layer_param_.hdf5_output_param().file_name(); // 打开文件 file_id_ = H5Fcreate(file_name_.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); CHECK_GE(file_id_, 0) << "Failed to open HDF5 file" << file_name_; file_opened_ = true;// 设置文件打开标志 } template <typename Dtype> HDF5OutputLayer<Dtype>::~HDF5OutputLayer<Dtype>() { if (file_opened_) { herr_t status = H5Fclose(file_id_); CHECK_GE(status, 0) << "Failed to close HDF5 file " << file_name_; } } // 将blob存放到hdf5文件 // 数据和类标 template <typename Dtype> void HDF5OutputLayer<Dtype>::SaveBlobs() { // TODO: no limit on the number of blobs LOG(INFO) << "Saving HDF5 file " << file_name_; CHECK_EQ(data_blob_.num(), label_blob_.num()) << "data blob and label blob must have the same batch size"; hdf5_save_nd_dataset(file_id_, HDF5_DATA_DATASET_NAME, data_blob_); hdf5_save_nd_dataset(file_id_, HDF5_DATA_LABEL_NAME, label_blob_); LOG(INFO) << "Successfully saved " << data_blob_.num() << " rows"; } // 实际上就是从bottom将输入过来的数据存放到hdf5文件 template <typename Dtype> void HDF5OutputLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_GE(bottom.size(), 2); CHECK_EQ(bottom[0]->num(), bottom[1]->num()); // 改变data_blob_的形状以及label_blob_的形状 data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()); label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(), bottom[1]->height(), bottom[1]->width()); const int data_datum_dim = bottom[0]->count() / bottom[0]->num(); const int label_datum_dim = bottom[1]->count() / bottom[1]->num(); // 从bottom[0]和[1]复制到data_blob_和label_blob_ for (int i = 0; i < bottom[0]->num(); ++i) { caffe_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim], &data_blob_.mutable_cpu_data()[i * data_datum_dim]); caffe_copy(label_datum_dim, &bottom[1]->cpu_data()[i * label_datum_dim], &label_blob_.mutable_cpu_data()[i * label_datum_dim]); } // 存放到文件 SaveBlobs(); } // 不反传 template <typename Dtype> void HDF5OutputLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { return; } #ifdef CPU_ONLY STUB_GPU(HDF5OutputLayer); #endif INSTANTIATE_CLASS(HDF5OutputLayer); REGISTER_LAYER_CLASS(HDF5Output); } // namespace caffe
message ImageDataParameter { // Specify the data source. // 列表文件包含图像的路径和对应的类标,以空格隔开 optional string source = 1; // Specify the batch size. // 批大小 optional uint32 batch_size = 4 [default = 1]; // The rand_skip variable is for the data layer to skip a few data points // to avoid all asynchronous sgd clients to start at the same point. The skip // point would be set as rand_skip * rand(0,1). Note that rand_skip should not // be larger than the number of keys in the database. // 随机调过一些数据 optional uint32 rand_skip = 7 [default = 0]; // 是否需要打乱数据顺序 // Whether or not ImageLayer should shuffle the list of files at every epoch. optional bool shuffle = 8 [default = false]; // It will also resize images if new_height or new_width are not zero. // 将图像resize到新的高度的宽度 optional uint32 new_height = 9 [default = 0]; optional uint32 new_width = 10 [default = 0]; // Specify if the images are color or gray // 图像是否是彩色的 optional bool is_color = 11 [default = true]; // DEPRECATED. See TransformationParameter. For data pre-processing, we can do // simple scaling and subtracting the data mean, if provided. Note that the // mean subtraction is always carried out before scaling. // 是否需要对图像进行缩放 optional float scale = 2 [default = 1]; // 均值文件 optional string mean_file = 3; // DEPRECATED. See TransformationParameter. Specify if we would like to randomly // crop an image. // crop的大小 optional uint32 crop_size = 5 [default = 0]; // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror // data. // 是否需要对图像进行镜像,所谓镜像就是左边复制到右边 optional bool mirror = 6 [default = false]; // 图像的根目录 optional string root_folder = 12 [default = ""]; }
/** * @brief Provides data to the Net from image files. * * TODO(dox): thorough documentation for Forward and proto params. * 从图像文件中读取数据,这个应该比较常用 * 从一个列表文件读取图像的路径和类标,列表文件的路径在层参数的配置文件中指定 */ template <typename Dtype> class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit ImageDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param) {} virtual ~ImageDataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "ImageData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } protected: shared_ptr<Caffe::RNG> prefetch_rng_; // 对图像索引进行打乱 virtual void ShuffleImages(); virtual void load_batch(Batch<Dtype>* batch); // 图像路径和类标的vector vector<std::pair<std::string, int> > lines_; // 随机跳过的图像的个数,也就是调过之后的一开始的图像的id int lines_id_; };
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #include <fstream> // NOLINT(readability/streams) #include <iostream> // NOLINT(readability/streams) #include <string> #include <utility> #include <vector> #include "caffe/data_layers.hpp" #include "caffe/layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/rng.hpp" namespace caffe { template <typename Dtype> ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() { this->StopInternalThread(); } template <typename Dtype> void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 根据参数文件设置参数 // 图像的高度、宽度、是否彩色图像、图像目录 const int new_height = this->layer_param_.image_data_param().new_height(); const int new_width = this->layer_param_.image_data_param().new_width(); const bool is_color = this->layer_param_.image_data_param().is_color(); string root_folder = this->layer_param_.image_data_param().root_folder(); // 当前只支持读取高度和宽度同样大小的图像 CHECK((new_height == 0 && new_width == 0) || (new_height > 0 && new_width > 0)) << "Current implementation requires " "new_height and new_width to be set at the same time."; // Read the file with filenames and labels // 读取存放图像文件名和类标的列表文件 const string& source = this->layer_param_.image_data_param().source(); LOG(INFO) << "Opening file " << source; std::ifstream infile(source.c_str()); string filename; int label; // lines_存放文件名和类标的pair while (infile >> filename >> label) { lines_.push_back(std::make_pair(filename, label)); } // 是否需要打乱文件的顺序 if (this->layer_param_.image_data_param().shuffle()) { // randomly shuffle data LOG(INFO) << "Shuffling data"; const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); ShuffleImages(); } LOG(INFO) << "A total of " << lines_.size() << " images."; // 随机跳过的图像,调过的图像个数在[0, rand_skip-1]之间 lines_id_ = 0; // Check if we would need to randomly skip a few data points // 如果参数中的rand_skip大于1,则随机跳过[0,rand_skip-1]个图片 // if (this->layer_param_.image_data_param().rand_skip()) { unsigned int skip = caffe_rng_rand() % this->layer_param_.image_data_param().rand_skip(); LOG(INFO) << "Skipping first " << skip << " data points."; CHECK_GT(lines_.size(), skip) << "Not enough points to skip"; lines_id_ = skip; } // Read an image, and use it to initialize the top blob. // 读取文件名到Mat cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_image. // 对数据的形状进行推断 vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img); // 设置transformed_data_的形状 this->transformed_data_.Reshape(top_shape); // Reshape prefetch_data and top[0] according to the batch_size. // 设置batch_size const int batch_size = this->layer_param_.image_data_param().batch_size(); CHECK_GT(batch_size, 0) << "Positive batch size required"; top_shape[0] = batch_size; // 设置预取数组中数据的形状 for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } // 设置输出的数据的形状 top[0]->Reshape(top_shape); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label // 设置输出的类标的形状 vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); // 设置预取数组中类标的形状 for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } } // 产生打乱图像顺序的数组 template <typename Dtype> void ImageDataLayer<Dtype>::ShuffleImages() { caffe::rng_t* prefetch_rng = static_cast<caffe::rng_t*>(prefetch_rng_->generator()); shuffle(lines_.begin(), lines_.end(), prefetch_rng); } // This function is called on prefetch thread // 该函数会被内部的线程调用 template <typename Dtype> void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); // 获取层参数,具体参见层参数的定义的解释 ImageDataParameter image_data_param = this->layer_param_.image_data_param(); const int batch_size = image_data_param.batch_size(); const int new_height = image_data_param.new_height(); const int new_width = image_data_param.new_width(); const bool is_color = image_data_param.is_color(); string root_folder = image_data_param.root_folder(); // Reshape according to the first image of each batch // on single input batches allows for inputs of varying dimension. // 读取跳过之后的第一幅图像,然后根据该图像设置相撞 cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_img. // 推断图像形状 vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img); // 设置transformed_data_形状 this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. // 设置batch_size top_shape[0] = batch_size; batch->data_.Reshape(top_shape); Dtype* prefetch_data = batch->data_.mutable_cpu_data(); Dtype* prefetch_label = batch->label_.mutable_cpu_data(); // datum scales // 读取一批图像,并进行预处理 const int lines_size = lines_.size(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob timer.Start(); CHECK_GT(lines_size, lines_id_); cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; read_time += timer.MicroSeconds(); timer.Start(); // Apply transformations (mirror, crop...) to the image // 进行预处理 // 根据图像的批次获得图像数据的偏移量 int offset = batch->data_.offset(item_id); // 设置图像数据的指针到transformed_data_ this->transformed_data_.set_cpu_data(prefetch_data + offset); // 进行预处理 this->data_transformer_->Transform(cv_img, &(this->transformed_data_)); trans_time += timer.MicroSeconds();//统计预处理时间 // 复制类标到prefetch_label prefetch_label[item_id] = lines_[lines_id_].second; // go to the next iter lines_id_++; // 是否是图像目录中的最后一个图像 if (lines_id_ >= lines_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; lines_id_ = 0; // 打乱图像索引的顺序 if (this->layer_param_.image_data_param().shuffle()) { ShuffleImages(); } } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; // 预处理时间 DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(ImageDataLayer); REGISTER_LAYER_CLASS(ImageData); } // namespace caffe #endif // USE_OPENCV
/** * @brief Provides data to the Net from memory. * 从内存中读取数据,这里指已经从数据文件或者图像文件中读取到了数据,然后输入到该层 * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class MemoryDataLayer : public BaseDataLayer<Dtype> { public: explicit MemoryDataLayer(const LayerParameter& param) : BaseDataLayer<Dtype>(param), has_new_data_(false) {} virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "MemoryData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } // 将内存中的数据加入added_data_和added_label_(数据和类标) virtual void AddDatumVector(const vector<Datum>& datum_vector); #ifdef USE_OPENCV // 如果有opencv则将opencv读取到的Mat,并且将labels加入added_data_和added_label_(数据和类标) virtual void AddMatVector(const vector<cv::Mat>& mat_vector, const vector<int>& labels); #endif // USE_OPENCV // Reset should accept const pointers, but can't, because the memory // will be given to Blob, which is mutable // Reset函数实际上是将data、label、以及batchsize(n)设置到内部的变量里面去 void Reset(Dtype* data, Dtype* label, int n); void set_batch_size(int new_size); int batch_size() { return batch_size_; } int channels() { return channels_; } int height() { return height_; } int width() { return width_; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); int batch_size_, channels_, height_, width_, size_; Dtype* data_; Dtype* labels_; // batch_size int n_; size_t pos_; // 内部的数据和类标 Blob<Dtype> added_data_; Blob<Dtype> added_label_; // 是否有新的数据 bool has_new_data_; };
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #endif // USE_OPENCV #include <vector> #include "caffe/data_layers.hpp" #include "caffe/layer.hpp" #include "caffe/util/io.hpp" namespace caffe { template <typename Dtype> void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 从参数文件获取参数 batch_size_ = this->layer_param_.memory_data_param().batch_size(); channels_ = this->layer_param_.memory_data_param().channels(); height_ = this->layer_param_.memory_data_param().height(); width_ = this->layer_param_.memory_data_param().width(); size_ = channels_ * height_ * width_; CHECK_GT(batch_size_ * size_, 0) << "batch_size, channels, height, and width must be specified and" " positive in memory_data_param"; // 设置top的形状 vector<int> label_shape(1, batch_size_); top[0]->Reshape(batch_size_, channels_, height_, width_); top[1]->Reshape(label_shape); // 设置内部变量added_data_和added_label_的形状 added_data_.Reshape(batch_size_, channels_, height_, width_); added_label_.Reshape(label_shape); data_ = NULL; labels_ = NULL; added_data_.cpu_data(); added_label_.cpu_data(); } // 将Datum的vector放入到added_data_和added_label_ // 并进行预处理 template <typename Dtype> void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) { CHECK(!has_new_data_) << "Can't add data until current data has been consumed."; size_t num = datum_vector.size(); CHECK_GT(num, 0) << "There is no datum to add."; CHECK_EQ(num % batch_size_, 0) << "The added data must be a multiple of the batch size."; // 改变形状 added_data_.Reshape(num, channels_, height_, width_); added_label_.Reshape(num, 1, 1, 1); // Apply data transformations (mirror, scale, crop...) // 对数据进行预处理 this->data_transformer_->Transform(datum_vector, &added_data_); // Copy Labels // 复制类标到top_label Dtype* top_label = added_label_.mutable_cpu_data(); for (int item_id = 0; item_id < num; ++item_id) { top_label[item_id] = datum_vector[item_id].label(); } // num_images == batch_size_ Dtype* top_data = added_data_.mutable_cpu_data(); // 将数据、类标以及数据个数设置到该类的内部变量 Reset(top_data, top_label, num); // 设置标记为true has_new_data_ = true; } // 如果定义OPENCV,则对数据进行处理存放到added_data_和added_label_ #ifdef USE_OPENCV template <typename Dtype> void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector, const vector<int>& labels) { size_t num = mat_vector.size(); CHECK(!has_new_data_) << "Can't add mat until current data has been consumed."; CHECK_GT(num, 0) << "There is no mat to add"; CHECK_EQ(num % batch_size_, 0) << "The added data must be a multiple of the batch size."; added_data_.Reshape(num, channels_, height_, width_); added_label_.Reshape(num, 1, 1, 1); // Apply data transformations (mirror, scale, crop...) // 预处理 this->data_transformer_->Transform(mat_vector, &added_data_); // Copy Labels Dtype* top_label = added_label_.mutable_cpu_data(); for (int item_id = 0; item_id < num; ++item_id) { top_label[item_id] = labels[item_id]; } // num_images == batch_size_ Dtype* top_data = added_data_.mutable_cpu_data(); Reset(top_data, top_label, num); has_new_data_ = true; } #endif // USE_OPENCV // 将数据和类标设置到内部的变量 // data_、labels_、n_ // 并且设置位置pos_=0 template <typename Dtype> void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) { CHECK(data); CHECK(labels); CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size"; // Warn with transformation parameters since a memory array is meant to // be generic and no transformations are done with Reset(). if (this->layer_param_.has_transform_param()) { LOG(WARNING) << this->type() << " does not transform array data on Reset()"; } data_ = data; labels_ = labels; n_ = n;// batch_size pos_ = 0; } // 设置内内部变量added_data_和added_label_的批数 template <typename Dtype> void MemoryDataLayer<Dtype>::set_batch_size(int new_size) { CHECK(!has_new_data_) << "Can't change batch_size until current data has been consumed."; batch_size_ = new_size; added_data_.Reshape(batch_size_, channels_, height_, width_); added_label_.Reshape(batch_size_, 1, 1, 1); } // 将内部变量added_data_和added_label_复制到top传递给下一层 template <typename Dtype> void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset"; // 这里直接使用内部变量将数据复制到top[0]、将类标复制到top[1] top[0]->Reshape(batch_size_, channels_, height_, width_); top[1]->Reshape(batch_size_, 1, 1, 1); top[0]->set_cpu_data(data_ + pos_ * size_); top[1]->set_cpu_data(labels_ + pos_); pos_ = (pos_ + batch_size_) % n_; if (pos_ == 0) has_new_data_ = false;// 传过一次之后,就没有新数据啦 } INSTANTIATE_CLASS(MemoryDataLayer); REGISTER_LAYER_CLASS(MemoryData); } // namespace caffe
message WindowDataParameter { // Specify the data source. // 装有窗口数据的列表文件 optional string source = 1; // For data pre-processing, we can do simple scaling and subtracting the // data mean, if provided. Note that the mean subtraction is always carried // out before scaling. // 是否需要缩放图像中的像素值,注意哈这不是缩放图像的大小,是拿图像的像素值乘以这个 optional float scale = 2 [default = 1]; // 平均值文件路径 optional string mean_file = 3; // Specify the batch size. // 批大小 optional uint32 batch_size = 4; // Specify if we would like to randomly crop an image. // 随机crop的图像块的大小 optional uint32 crop_size = 5 [default = 0]; // Specify if we want to randomly mirror data. // 是否随机镜像图像 optional bool mirror = 6 [default = false]; // Foreground (object) overlap threshold // 前景重叠阈值 optional float fg_threshold = 7 [default = 0.5]; // Background (non-object) overlap threshold // 背景重叠阈值 optional float bg_threshold = 8 [default = 0.5]; // Fraction of batch that should be foreground objects // 每一批中有多少比例应该是前景(也就是是你要检测的物体) optional float fg_fraction = 9 [default = 0.25]; // Amount of contextual padding to add around a window // (used only by the window_data_layer) // 是否需要在窗口周围padding optional uint32 context_pad = 10 [default = 0]; // Mode for cropping out a detection window // warp: cropped window is warped to a fixed size and aspect ratio // square: the tightest square around the window is cropped // crop的模式,square还是warp optional string crop_mode = 11 [default = "warp"]; // cache_images: will load all images in memory for faster access // 是否将文件缓冲到内存 optional bool cache_images = 12 [default = false]; // append root_folder to locate images // 图像文件根目录 optional string root_folder = 13 [default = ""]; }
/** * @brief Provides data to the Net from windows of images files, specified * by a window data file. * 从图像文件的窗口获取数据,需要指定窗口数据文件 * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit WindowDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param) {} virtual ~WindowDataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "WindowData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } protected: virtual unsigned int PrefetchRand(); virtual void load_batch(Batch<Dtype>* batch); shared_ptr<Caffe::RNG> prefetch_rng_; vector<std::pair<std::string, vector<int> > > image_database_; // 窗口类中所使用的窗口数据的枚举 // 就是定义个vector<float>,然后里面按顺序存放下面这些类型的数据 enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM }; vector<vector<float> > fg_windows_; vector<vector<float> > bg_windows_; Blob<Dtype> data_mean_; vector<Dtype> mean_values_; bool has_mean_file_; bool has_mean_values_; bool cache_images_; vector<std::pair<std::string, Datum > > image_database_cache_; };
#ifdef USE_OPENCV #include <opencv2/highgui/highgui_c.h> #include <stdint.h> #include <algorithm> #include <map> #include <string> #include <utility> #include <vector> #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "caffe/common.hpp" #include "caffe/data_layers.hpp" #include "caffe/layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/rng.hpp" // caffe.proto > LayerParameter > WindowDataParameter // 'source' field specifies the window_file // 'crop_size' indicates the desired warped size namespace caffe { template <typename Dtype> WindowDataLayer<Dtype>::~WindowDataLayer<Dtype>() { this->StopInternalThread(); } // 读取窗口数据文件的信息,并设置各个数据结构的形状 template <typename Dtype> void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // LayerSetUp runs through the window_file and creates two structures // that hold windows: one for foreground (object) windows and one // for background (non-object) windows. We use an overlap threshold // to decide which is which. // window_file format // repeated: // # image_index // img_path (abs path) // channels // height // width // num_windows // class_index overlap x1 y1 x2 y2 // 窗口文件的格式如下: // # 图像索引(举例:# 1就表示第一个图像,注意#号与数字之间有空格) // 图像的路径 // 图像通道数 // 图像高度 // 图像宽度 // 窗口数目 // 类标,overlap,x1,y1,x2,y2 // 注:x1,y1,x2,y2是窗口的左上和右下的坐标 // 我这里举个例子 // # 1 /1.jpg 3 720 480 100 1 1 0 0 100 100 // 上述的例子即使表示一个编号为1的图像相对路径为/1.jpg,通道为3,高度为720 // 宽度为480,窗口数目为100,类标为1,overlap为1,窗口的左上坐标为(0,0),右下坐标为(100,100) LOG(INFO) << "Window data layer:" << std::endl << " foreground (object) overlap threshold: " << this->layer_param_.window_data_param().fg_threshold() << std::endl << " background (non-object) overlap threshold: " << this->layer_param_.window_data_param().bg_threshold() << std::endl << " foreground sampling fraction: " << this->layer_param_.window_data_param().fg_fraction() << std::endl << " cache_images: " << this->layer_param_.window_data_param().cache_images() << std::endl << " root_folder: " << this->layer_param_.window_data_param().root_folder(); cache_images_ = this->layer_param_.window_data_param().cache_images(); string root_folder = this->layer_param_.window_data_param().root_folder(); // 根据参数文件中是否需要进行左右mirror,或者是否进行crop, // 来判断是否需要初始化随机数种子 const bool prefetch_needs_rand = this->transform_param_.mirror() || this->transform_param_.crop_size(); if (prefetch_needs_rand) { const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); } else { prefetch_rng_.reset(); } // 打开窗口文件 std::ifstream infile(this->layer_param_.window_data_param().source().c_str()); CHECK(infile.good()) << "Failed to open window file " << this->layer_param_.window_data_param().source() << std::endl; // 这个是类标与类标出现的次数之间的映射 // 这里称之为类标直方图 map<int, int> label_hist; label_hist.insert(std::make_pair(0, 0)); string hashtag; int image_index, channels; // 先从窗口文件中读取一个图像索引测试一下是否为空 if (!(infile >> hashtag >> image_index)) { LOG(FATAL) << "Window file is empty"; } do { // 检查是否# 开头 CHECK_EQ(hashtag, "#"); // read image path string image_path; // 接下来读取图像的相对路径 // 将该路径与根目录路径拼接 infile >> image_path; image_path = root_folder + image_path; // read image dimensions vector<int> image_size(3); // 读取图像的维度信息,分别为channel,height , width infile >> image_size[0] >> image_size[1] >> image_size[2]; channels = image_size[0]; // 将图像路径和图像大小压入到image_database_中 image_database_.push_back(std::make_pair(image_path, image_size)); // 如果需要缓存图像到内存的话,则用image_database_cache_进行存储 if (cache_images_) { Datum datum; // 将图像数据读取到Datum这个结构 if (!ReadFileToDatum(image_path, &datum)) { LOG(ERROR) << "Could not open or find file " << image_path; return; } // 将Datum结构的图像缓存到到image_database_cache_ image_database_cache_.push_back(std::make_pair(image_path, datum)); } // read each box int num_windows; // 读取窗口个数 infile >> num_windows; // 从参数文件获取前景和背景阈值 const float fg_threshold = this->layer_param_.window_data_param().fg_threshold(); const float bg_threshold = this->layer_param_.window_data_param().bg_threshold(); for (int i = 0; i < num_windows; ++i) { int label, x1, y1, x2, y2; float overlap; // 读取 类标,与前景目标的重叠率,x1,y1,x2,y2 infile >> label >> overlap >> x1 >> y1 >> x2 >> y2; // 按照顺序放在window这个数据结构里头 vector<float> window(WindowDataLayer::NUM); window[WindowDataLayer::IMAGE_INDEX] = image_index; window[WindowDataLayer::LABEL] = label; window[WindowDataLayer::OVERLAP] = overlap; window[WindowDataLayer::X1] = x1; window[WindowDataLayer::Y1] = y1; window[WindowDataLayer::X2] = x2; window[WindowDataLayer::Y2] = y2; // add window to foreground list or background list // 下面是将窗口的前景和背景都装入到fg_windows_和bg_windows_中去 // 如果重叠的比例大于前景阈值,那么就认为是前景 if (overlap >= fg_threshold) { int label = window[WindowDataLayer::LABEL]; // 类标必须大于0,因为重叠区域已经大于前景阈值了 // 此时如果类标不大于0,表明数据有误! CHECK_GT(label, 0); fg_windows_.push_back(window); // 该类的直方图+1 label_hist.insert(std::make_pair(label, 0)); label_hist[label]++; } else if (overlap < bg_threshold) { // 如果重叠阈值小于背景阈值则认为是背景 // background window, force label and overlap to 0 window[WindowDataLayer::LABEL] = 0; window[WindowDataLayer::OVERLAP] = 0; bg_windows_.push_back(window); // 0类的直方图(也就是背景的直方图)+1 label_hist[0]++; } } // 每处理100个就显示一瞎 if (image_index % 100 == 0) { LOG(INFO) << "num: " << image_index << " " << image_path << " " << image_size[0] << " " << image_size[1] << " " << image_size[2] << " " << "windows to process: " << num_windows; } } while (infile >> hashtag >> image_index); // 读取完毕后输出图像的个数 LOG(INFO) << "Number of images: " << image_index+1; // 输出统计的每个类别的个数 for (map<int, int>::iterator it = label_hist.begin(); it != label_hist.end(); ++it) { LOG(INFO) << "class " << it->first << " has " << label_hist[it->first] << " samples"; } LOG(INFO) << "Amount of context padding: " << this->layer_param_.window_data_param().context_pad(); LOG(INFO) << "Crop mode: " << this->layer_param_.window_data_param().crop_mode(); // image // 获取crop_size const int crop_size = this->transform_param_.crop_size(); CHECK_GT(crop_size, 0); // 获取batch_size const int batch_size = this->layer_param_.window_data_param().batch_size(); // 将top[0]设置为batch_size,channels, crop_size, crop_size大小的 top[0]->Reshape(batch_size, channels, crop_size, crop_size); // 将prefetch_中的数据形状也这么设置 for (int i = 0; i < this->PREFETCH_COUNT; ++i) this->prefetch_[i].data_.Reshape( batch_size, channels, crop_size, crop_size); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label // 将top[1]设置为类标大小 vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); // 将prefetch_中的类标形状也这么设置 for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } // data mean // 是否有均值文件或者有均值 has_mean_file_ = this->transform_param_.has_mean_file(); has_mean_values_ = this->transform_param_.mean_value_size() > 0; if (has_mean_file_) {// 有均值文件就读 const string& mean_file = this->transform_param_.mean_file(); LOG(INFO) << "Loading mean file from: " << mean_file; BlobProto blob_proto; ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); data_mean_.FromProto(blob_proto); } if (has_mean_values_) {// 有均值就直接从参数中获取 CHECK(has_mean_file_ == false) << "Cannot specify mean_file and mean_value at the same time"; for (int c = 0; c < this->transform_param_.mean_value_size(); ++c) { mean_values_.push_back(this->transform_param_.mean_value(c)); } // 检查均值是不是等于1,或者等于图像的通道数 // 也就是要么所有通道都使用同一个均值 // 要么每个通道用一个均值 CHECK(mean_values_.size() == 1 || mean_values_.size() == channels) << "Specify either 1 mean_value or as many as channels: " << channels; if (channels > 1 && mean_values_.size() == 1) { // Replicate the mean_value for simplicity for (int c = 1; c < channels; ++c) { mean_values_.push_back(mean_values_[0]); } } } } // 随机数生成器进行初始化并生成随机数 template <typename Dtype> unsigned int WindowDataLayer<Dtype>::PrefetchRand() { CHECK(prefetch_rng_); caffe::rng_t* prefetch_rng = static_cast<caffe::rng_t*>(prefetch_rng_->generator()); return (*prefetch_rng)(); } // 因为继承BasePrefetchingDataLayer所以要实现load_batch // 以供线程调用 // This function is called on prefetch thread template <typename Dtype> void WindowDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { // At each iteration, sample N windows where N*p are foreground (object) // windows and N*(1-p) are background (non-object) windows CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; // top数据和类标 Dtype* top_data = batch->data_.mutable_cpu_data(); Dtype* top_label = batch->label_.mutable_cpu_data(); // 缩放尺度 const Dtype scale = this->layer_param_.window_data_param().scale(); // batch_size const int batch_size = this->layer_param_.window_data_param().batch_size(); // 上下文填充 const int context_pad = this->layer_param_.window_data_param().context_pad(); // crop_size const int crop_size = this->transform_param_.crop_size(); // 是否镜像 const bool mirror = this->transform_param_.mirror(); // 前景比例 const float fg_fraction = this->layer_param_.window_data_param().fg_fraction(); Dtype* mean = NULL; int mean_off = 0; int mean_width = 0; int mean_height = 0; // 如果有平均值文件则 if (this->has_mean_file_) { mean = this->data_mean_.mutable_cpu_data(); // 经过crop之后的平均值图像的中心 mean_off = (this->data_mean_.width() - crop_size) / 2; mean_width = this->data_mean_.width(); mean_height = this->data_mean_.height(); } cv::Size cv_crop_size(crop_size, crop_size); // 获取crop的模式,是warp还是square const string& crop_mode = this->layer_param_.window_data_param().crop_mode(); bool use_square = (crop_mode == "square") ? true : false; // zero out batch caffe_set(batch->data_.count(), Dtype(0), top_data); // 根据前景比例获得前景图像的数目 const int num_fg = static_cast<int>(static_cast<float>(batch_size) * fg_fraction); // 样本数量,是前景还是背景?[0]是背景[1]是前景 const int num_samples[2] = { batch_size - num_fg, num_fg }; int item_id = 0; // sample from bg set then fg set // 先对背景进行采样 // 再对前景进行采样 for (int is_fg = 0; is_fg < 2; ++is_fg) { for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) { // sample a window timer.Start(); // 生成一个随机数 const unsigned int rand_index = PrefetchRand(); // fg_windows_和bg_windows_存储的是对应的窗口信息 // 在SetUp中读取的窗口数据文件的时候获得的 // 从该图像的若干窗口中去随机选择一个窗口 vector<float> window = (is_fg) ? fg_windows_[rand_index % fg_windows_.size()] : bg_windows_[rand_index % bg_windows_.size()]; // 随机选择是否需要镜像 bool do_mirror = mirror && PrefetchRand() % 2; // load the image containing the window // 载入图像的路径以及类标 pair<std::string, vector<int> > image = image_database_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]]; // 读取图像 cv::Mat cv_img; if (this->cache_images_) { // 如果图像缓冲到内存则获得对应图像的Datum pair<std::string, Datum> image_cached = image_database_cache_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]]; // 将图像的Datum解码为OpenCV的Mat cv_img = DecodeDatumToCVMat(image_cached.second, true); } else { // 否则直接读取 cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR); if (!cv_img.data) { LOG(ERROR) << "Could not open or find file " << image.first; return; } } read_time += timer.MicroSeconds(); timer.Start(); const int channels = cv_img.channels(); // crop window out of image and warp it // 窗口坐标 int x1 = window[WindowDataLayer<Dtype>::X1]; int y1 = window[WindowDataLayer<Dtype>::Y1]; int x2 = window[WindowDataLayer<Dtype>::X2]; int y2 = window[WindowDataLayer<Dtype>::Y2]; int pad_w = 0; int pad_h = 0; // context_pad也是个大小,具体什么含义,我没有具体研究 // 毕竟不是搞检测的 // context_scale = crop_size / (crop_size - 2*context_pad) if (context_pad > 0 || use_square) { // scale factor by which to expand the original region // such that after warping the expanded region to crop_size x crop_size // there's exactly context_pad amount of padding on each side Dtype context_scale = static_cast<Dtype>(crop_size) / static_cast<Dtype>(crop_size - 2*context_pad); // compute the expanded region // 高度的一半 Dtype half_height = static_cast<Dtype>(y2-y1+1)/2.0; // 宽度的一半 Dtype half_width = static_cast<Dtype>(x2-x1+1)/2.0; // x中心 Dtype center_x = static_cast<Dtype>(x1) + half_width; // y中心 Dtype center_y = static_cast<Dtype>(y1) + half_height; if (use_square) {// 如果使用正方形形状则将较大的那个赋值给小的 if (half_height > half_width) { half_width = half_height; } else { half_height = half_width; } } // 获取经过处理之后的x1,y1,x2,y2 x1 = static_cast<int>(round(center_x - half_width*context_scale)); x2 = static_cast<int>(round(center_x + half_width*context_scale)); y1 = static_cast<int>(round(center_y - half_height*context_scale)); y2 = static_cast<int>(round(center_y + half_height*context_scale)); // the expanded region may go outside of the image // so we compute the clipped (expanded) region and keep track of // the extent beyond the image // 经过处理之后的窗口如果不在图像内部是有问题的 // 这里对窗口的坐标进行处理 // 使得窗口的左上角不超过图像的左上角 // 窗口的右下角不超过图像的右下角 // 所以这里叫clip bounds嘛 int unclipped_height = y2-y1+1; int unclipped_width = x2-x1+1; int pad_x1 = std::max(0, -x1); int pad_y1 = std::max(0, -y1); int pad_x2 = std::max(0, x2 - cv_img.cols + 1); int pad_y2 = std::max(0, y2 - cv_img.rows + 1); // clip bounds x1 = x1 + pad_x1; x2 = x2 - pad_x2; y1 = y1 + pad_y1; y2 = y2 - pad_y2; CHECK_GT(x1, -1); CHECK_GT(y1, -1); CHECK_LT(x2, cv_img.cols); CHECK_LT(y2, cv_img.rows); // 经过clip之后的高度和宽度 int clipped_height = y2-y1+1; int clipped_width = x2-x1+1; // scale factors that would be used to warp the unclipped // expanded region // scale_x/scale_y=crop_size除以未经clip之后的宽度/高度 Dtype scale_x = static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_width); Dtype scale_y = static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_height); // size to warp the clipped expanded region to // 用clip的宽度和高度乘以scale_x或者scale_y得到crop_size中的宽度和高度 cv_crop_size.width = static_cast<int>(round(static_cast<Dtype>(clipped_width)*scale_x)); cv_crop_size.height = static_cast<int>(round(static_cast<Dtype>(clipped_height)*scale_y)); // 再对pad的边界进行处理 pad_x1 = static_cast<int>(round(static_cast<Dtype>(pad_x1)*scale_x)); pad_x2 = static_cast<int>(round(static_cast<Dtype>(pad_x2)*scale_x)); pad_y1 = static_cast<int>(round(static_cast<Dtype>(pad_y1)*scale_y)); pad_y2 = static_cast<int>(round(static_cast<Dtype>(pad_y2)*scale_y)); pad_h = pad_y1; // if we're mirroring, we mirror the padding too (to be pedantic) // 如果需要镜像填充的部分也要镜像 if (do_mirror) { pad_w = pad_x2; } else { pad_w = pad_x1; } // ensure that the warped, clipped region plus the padding fits in the // crop_size x crop_size image (it might not due to rounding) // 确保大小是在crop_size x crop_size以内的 if (pad_h + cv_crop_size.height > crop_size) { cv_crop_size.height = crop_size - pad_h; } if (pad_w + cv_crop_size.width > crop_size) { cv_crop_size.width = crop_size - pad_w; } } cv::Rect roi(x1, y1, x2-x1+1, y2-y1+1); // 进行crop cv::Mat cv_cropped_img = cv_img(roi); // 使用线性插值进行缩放,缩放到cv_crop_size cv::resize(cv_cropped_img, cv_cropped_img, cv_crop_size, 0, 0, cv::INTER_LINEAR); // horizontal flip at random if (do_mirror) { // 对图像进行镜像 cv::flip(cv_cropped_img, cv_cropped_img, 1); } // copy the warped window into top_data for (int h = 0; h < cv_cropped_img.rows; ++h) { const uchar* ptr = cv_cropped_img.ptr<uchar>(h); int img_index = 0; for (int w = 0; w < cv_cropped_img.cols; ++w) { for (int c = 0; c < channels; ++c) { int top_index = ((item_id * channels + c) * crop_size + h + pad_h) * crop_size + w + pad_w; // int top_index = (c * height + h) * width + w; Dtype pixel = static_cast<Dtype>(ptr[img_index++]); if (this->has_mean_file_) {// 有均值文件减去均值文件中对应的数值 int mean_index = (c * mean_height + h + mean_off + pad_h) * mean_width + w + mean_off + pad_w; top_data[top_index] = (pixel - mean[mean_index]) * scale; } else { if (this->has_mean_values_) {// 有均值则减去均值 top_data[top_index] = (pixel - this->mean_values_[c]) * scale; } else { top_data[top_index] = pixel * scale;// 像素值进行缩放 } } } } } trans_time += timer.MicroSeconds(); // get window label top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL]; #if 0 // useful debugging code for dumping transformed windows to disk string file_id; std::stringstream ss; ss << PrefetchRand(); ss >> file_id; std::ofstream inf((string("dump/") + file_id + string("_info.txt")).c_str(), std::ofstream::out); inf << image.first << std::endl << window[WindowDataLayer<Dtype>::X1]+1 << std::endl << window[WindowDataLayer<Dtype>::Y1]+1 << std::endl << window[WindowDataLayer<Dtype>::X2]+1 << std::endl << window[WindowDataLayer<Dtype>::Y2]+1 << std::endl << do_mirror << std::endl << top_label[item_id] << std::endl << is_fg << std::endl; inf.close(); std::ofstream top_data_file((string("dump/") + file_id + string("_data.txt")).c_str(), std::ofstream::out | std::ofstream::binary); for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { top_data_file.write(reinterpret_cast<char*>( &top_data[((item_id * channels + c) * crop_size + h) * crop_size + w]), sizeof(Dtype)); } } } top_data_file.close(); #endif item_id++; } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(WindowDataLayer); REGISTER_LAYER_CLASS(WindowData); } // namespace caffe #endif // USE_OPENCV
template <typename Dtype> void BasePrefetchingDataLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // 传递的时候是从full队列中弹出一个数据 Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty"); // Reshape to loaded data. // 根据batch的形状改变数据形状 top[0]->ReshapeLike(batch->data_); // Copy the data // 将batch数据复制到top[0] caffe_copy(batch->data_.count(), batch->data_.cpu_data(), top[0]->mutable_cpu_data()); DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { // 输出类标的话 // Reshape to loaded labels. // 根据batch中类标的形状改变top[1]的形状 top[1]->ReshapeLike(batch->label_); // Copy the labels. // 复制类标到top[1] caffe_copy(batch->label_.count(), batch->label_.cpu_data(), top[1]->mutable_cpu_data()); } // 将该batch压入free队列 prefetch_free_.push(batch); }
caffe代码阅读8: Data_layers的实现细节(各个数据读取层的实现细节) 2016.3.25-28
标签:
原文地址:http://blog.csdn.net/xizero00/article/details/50999630