您的位置：首页 > Web前端

梳理caffe代码image_data_layer、data_layer、window_data_layer(七)

2016-04-05 10:13 561 查看

第七个现在才更新，因为有诸多头文件需要先梳理一下（八到十三），不管是新版旧版的caffe，数据成这一块，继承关系是不会变得，可能里面只是一些文件的名字和文件路径修改过了，更加层次化便于以后的开发应用。

先看一张继承图就明白了：

然后按照这个图layer所派生出的这几个类，以新版caffe为例，梳理一遍（每层从上向下梳理）：

首先是后面梳理的internal_thread，这里就不多说了。

上面几个layer的头文件看一下分别是多个文件的头文件，内容组合到一起了：

#ifndef CAFFE_DATA_LAYERS_HPP_
#define CAFFE_DATA_LAYERS_HPP_

#include <string>
#include <utility>
#include <vector>
#include "hdf5.h"

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/data_reader.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/filler.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/blocking_queue.hpp"
#include "caffe/util/db.hpp"

namespace caffe {

/**
* @brief Provides base for data layers that feed blobs to the Net.
*
* TODO(dox): thorough documentation for Forward and proto params.
* 数据层的基类
*/
template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {
public:
// 显式构造函数
explicit BaseDataLayer(const LayerParameter& param);
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden except by the BasePrefetchingDataLayer.
// 该函数只能被BasePrefetchingDataLayer层进行重载
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
// 数据是否需要给多个并行solver进行共享
virtual inline bool ShareInParallel() const { return true; }

// 数据层的初始化，该函数是虚函数，子类可以进行重载，从而初始化数据层
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}

// 数据层是没有输入的(即bottoms)，所以reshape只是形式
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}

protected:
// 对输入的数据进行变换的参数，这其中包括是否需要mirror，是否需要crop
// 是否需要减去meanfile，是否需要scale
TransformationParameter transform_param_;
// 实际执行数据变换类的指针(一个Transform函数加上参数即可完成对数据的变换，参数是数据哈)
shared_ptr<DataTransformer<Dtype> > data_transformer_;
bool output_labels_;
};

// Batch实际上就是一个data_和label_类标
template <typename Dtype>
class Batch {
public:
Blob<Dtype> data_, label_;
};

// BasePrefetchingDataLayer层是继承于BaseDataLayer的
// 是预取层的基类
template <typename Dtype>
class BasePrefetchingDataLayer :
public BaseDataLayer<Dtype>, public InternalThread {
public:
explicit BasePrefetchingDataLayer(const LayerParameter& param);
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden.
void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

// Prefetches batches (asynchronously if to GPU memory)
static const int PREFETCH_COUNT = 3;

protected:
virtual void InternalThreadEntry();
// 多了load_batch函数，该函数是纯虚函数，继承该函数的类都需要实现的
virtual void load_batch(Batch<Dtype>* batch) = 0;
// 还有prefetch数组,prefetch_free_,prefetch_full_
Batch<Dtype> prefetch_[PREFETCH_COUNT];
BlockingQueue<Batch<Dtype>*> prefetch_free_;
BlockingQueue<Batch<Dtype>*> prefetch_full_;

Blob<Dtype> transformed_data_;
};

// DataLayer才是主角，继承自BasePrefetchingDataLayer
template <typename Dtype>
class DataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit DataLayer(const LayerParameter& param);
virtual ~DataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// DataLayer uses DataReader instead for sharing for parallelism
// 多了下面几个
virtual inline bool ShareInParallel() const { return false; }
virtual inline const char* type() const { return "Data"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 2; }

protected:
virtual void load_batch(Batch<Dtype>* batch);

DataReader reader_;
};

/**
* @brief Provides data to the Net generated by a Filler.
*
* TODO(dox): thorough documentation for Forward and proto params.
* 该类是继承自Layer,通过Filler产生数据
*/
template <typename Dtype>
class DummyDataLayer : public Layer<Dtype> {
public:
explicit DummyDataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}

virtual inline const char* type() const { return "DummyData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}

vector<shared_ptr<Filler<Dtype> > > fillers_;
vector<bool> refill_;
};

/**
* @brief Provides data to the Net from HDF5 files.
*
* TODO(dox): thorough documentation for Forward and proto params.
* 从HDF5中读取
*/
template <typename Dtype>
class HDF5DataLayer : public Layer<Dtype> {
public:
explicit HDF5DataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual ~HDF5DataLayer();
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}

virtual inline const char* type() const { return "HDF5Data"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
// 从HDF5文件读取数据
virtual void LoadHDF5FileData(const char* filename);

std::vector<std::string> hdf_filenames_;
unsigned int num_files_;
unsigned int current_file_;
hsize_t current_row_;
std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_;
std::vector<unsigned int> data_permutation_;
std::vector<unsigned int> file_permutation_;
};

/**
* @brief Write blobs to disk as HDF5 files.
*
* TODO(dox): thorough documentation for Forward and proto params.
* 将数据写入到HDF5文件
*/
template <typename Dtype>
class HDF5OutputLayer : public Layer<Dtype> {
public:
explicit HDF5OutputLayer(const LayerParameter& param)
: Layer<Dtype>(param), file_opened_(false) {}
virtual ~HDF5OutputLayer();
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}

virtual inline const char* type() const { return "HDF5Output"; }
// TODO: no limit on the number of blobs
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 0; }

inline std::string file_name() const { return file_name_; }

protected:
// HDF5输出层不前向传也不反向传，只是将前一层传递过来的数据写入HDF5文件
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
// 将bottom的数据存储到文件
virtual void SaveBlobs();

bool file_opened_;
std::string file_name_;
hid_t file_id_;
Blob<Dtype> data_blob_;
Blob<Dtype> label_blob_;
};

/**
* @brief Provides data to the Net from image files.
*
* TODO(dox): thorough documentation for Forward and proto params.
* 从图像文件中读取数据，这个应该比较常用
* 从一个列表文件读取图像的路径和类标，列表文件的路径在层参数的配置文件中指定
*/
template <typename Dtype>
class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit ImageDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~ImageDataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "ImageData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }

protected:
shared_ptr<Caffe::RNG> prefetch_rng_;
// 对图像索引进行打乱
virtual void ShuffleImages();
virtual void load_batch(Batch<Dtype>* batch);

// 图像路径和类标的vector
vector<std::pair<std::string, int> > lines_;
// 随机跳过的图像的个数，也就是调过之后的一开始的图像的id
int lines_id_;
};

/**
* @brief Provides data to the Net from memory.
* 从内存中读取数据，这里指已经从数据文件或者图像文件中读取到了数据，然后输入到该层
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class MemoryDataLayer : public BaseDataLayer<Dtype> {
public:
explicit MemoryDataLayer(const LayerParameter& param)
: BaseDataLayer<Dtype>(param), has_new_data_(false) {}
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "MemoryData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }

// 将内存中的数据加入added_data_和added_label_(数据和类标)
virtual void AddDatumVector(const vector<Datum>& datum_vector);
#ifdef USE_OPENCV
// 如果有opencv则将opencv读取到的Mat,并且将labels加入added_data_和added_label_(数据和类标)
virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<int>& labels);
#endif  // USE_OPENCV

// Reset should accept const pointers, but can't, because the memory
//  will be given to Blob, which is mutable
// Reset函数实际上是将data、label、以及batchsize(n)设置到内部的变量里面去
void Reset(Dtype* data, Dtype* label, int n);
void set_batch_size(int new_size);

int batch_size() { return batch_size_; }
int channels() { return channels_; }
int height() { return height_; }
int width() { return width_; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

int batch_size_, channels_, height_, width_, size_;
Dtype* data_;
Dtype* labels_;
// batch_size
int n_;
size_t pos_;
// 内部的数据和类标
Blob<Dtype> added_data_;
Blob<Dtype> added_label_;
// 是否有新的数据
bool has_new_data_;
};

/**
* @brief Provides data to the Net from windows of images files, specified
*        by a window data file.
*  从图像文件的窗口获取数据，需要指定窗口数据文件
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit WindowDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~WindowDataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "WindowData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }

protected:
virtual unsigned int PrefetchRand();
virtual void load_batch(Batch<Dtype>* batch);

shared_ptr<Caffe::RNG> prefetch_rng_;
vector<std::pair<std::string, vector<int> > > image_database_;
// 窗口类中所使用的窗口数据的枚举
// 就是定义个vector<float>，然后里面按顺序存放下面这些类型的数据
enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };
vector<vector<float> > fg_windows_;
vector<vector<float> > bg_windows_;
Blob<Dtype> data_mean_;
vector<Dtype> mean_values_;
bool has_mean_file_;
bool has_mean_values_;
bool cache_images_;
vector<std::pair<std::string, Datum > > image_database_cache_;
};

}  // namespace caffe

#endif  // CAFFE_DATA_LAYERS_HPP_

然后接着就是base_data_layer.cpp:

#include <boost/thread.hpp>
#include <vector>

#include "caffe/blob.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/blocking_queue.hpp"

namespace caffe {
//调用模板类layer的构造函数，将param传递给模板类layer的构造函数，同时用param.transform_param()初始化BaseDataLayer的transform_param_成员
template <typename Dtype>
BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
: Layer<Dtype>(param),
transform_param_(param.transform_param()) {
}

template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (top.size() == 1) {
output_labels_ = false;
} else {
output_labels_ = true;
}
data_transformer_.reset(
new DataTransformer<Dtype>(transform_param_, this->phase_));
data_transformer_->InitRand();
// The subclasses should setup the size of bottom and top
DataLayerSetUp(bottom, top);
}

template <typename Dtype>
BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
const LayerParameter& param)
: BaseDataLayer<Dtype>(param),
prefetch_free_(), prefetch_full_() {
for (int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_free_.push(&prefetch_[i]);//压入batch的地址
}
}

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
//1. 调用父类BaseDataLayer构造方法，因为BasePrefetchingDataLayer没有覆盖BaseDataLayer的DataLayerSetUp方法，所有他仍然调用父类的DataLayerSetUp方法
BaseDataLayer<Dtype>::LayerSetUp(bottom, top);//回避虚函数的机制，通过作用域符来限定调用的虚函数版本。由于C++的多态性，BaseDataLayer<Dtype>::LayerSetUp(bottom, top)会调用Datalayer的ataLayerSetUp方法，该方法会Reshape prefetch_的batch的data_ blob、label_ blob
// Before starting the prefetch thread（预取线程）, we make cpu_data and gpu_data
// calls so that the prefetch thread does not accidentally make simultaneous
// cudaMalloc calls when the main thread is running. In some GPUs this
// seems to cause failures if we do not so.
for (int i = 0; i < PREFETCH_COUNT; ++i) {
//2. 访问预取数据空间，这里是为了提前分配预取数据的存储空间
prefetch_[i].data_.mutable_cpu_data();//由于C++的多态性，BaseDataLayer<Dtype>::LayerSetUp(bottom, top)会调用Datalayer的ataLayerSetUp方法，该方法会Reshape prefetch_的batch的data_ blob、label_ blob
if (this->output_labels_) {
prefetch_[i].label_.mutable_cpu_data();
}
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
for (int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_[i].data_.mutable_gpu_data();
if (this->output_labels_) {
prefetch_[i].label_.mutable_gpu_data();
}
}
}
#endif
//3. 创建用于预取数据的线程
DLOG(INFO) << "Initializing prefetch";
this->data_transformer_->InitRand();//调用 DataTransformer类的InitRand方法生成一个随机数生成器。注意，DataTransformer类中有一个成员是shared_ptr<Caffe::RNG> rng_
StartInternalThread();//会调用InternalThreadEntry方法,因为Datalayer类没有覆盖InternalThreadEntry方法，所以如果真实的对象类型是Data_layer的话，也只会调用其基类的方法，即BasePrefetchingDataLayer<Dtype>::InternalThreadEntry()。
DLOG(INFO) << "Prefetch initialized.";
}

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
cudaStream_t stream;
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
}
#endif

try {
while (!must_stop()) {
Batch<Dtype>* batch = prefetch_free_.pop();//batch是指针
//load_batch(Batch<Dtype>* batch)方法Reshape了其中的data_ Blob，为其重新分配所需的内存。做到这一点已经足够，因为prefetch_free_中存储的也只是指针
load_batch(batch);//实际上会调用DataLayer的load_batch方法，因为它是个纯虚函数
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
batch->data_.data().get()->async_gpu_push(stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
}
#endif
prefetch_full_.push(batch);//batch在经过load_batch(batch)后发生了变化
}
} catch (boost::thread_interrupted&) {
// Interrupted exception is expected on shutdown
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamDestroy(stream));
}
#endif
}

//数据层作为网络的最底层，其forward功能只需要将设置top[0] top[1]的数据，即拷贝。
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
// Reshape to loaded data.
top[0]->ReshapeLike(batch->data_);
// Copy the data
caffe_copy(batch->data_.count(), batch->data_.cpu_data(),
top[0]->mutable_cpu_data());//将数据从batch拷贝到top[0]
DLOG(INFO) << "Prefetch copied";
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(batch->label_);
// Copy the labels.
caffe_copy(batch->label_.count(), batch->label_.cpu_data(),
top[1]->mutable_cpu_data());//将数据从batch拷贝到top[1]
}

prefetch_free_.push(batch);
}
// 如果没有GPU的话则在BasePrefetchingDataLayer类中生成一个Forward函数
// 该函数并不前传，而是直接报错
#ifdef CPU_ONLY
STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
#endif

INSTANTIATE_CLASS(BaseDataLayer);
INSTANTIATE_CLASS(BasePrefetchingDataLayer);

//最后这两个再后面的几篇博客中有介绍到，可以看看定义<code class="prettyprint">INSTANTIATE_CLASS(</code>BaseDataLayer)被用来实例化BaseDataLayer的
<code class="prettyprint"></code>//类模板，<code class="prettyprint">REGISTER_LAYER_CLASS(</code>BaseData)被用来向layer_factory注册BaseDataLayer的构造方法，方便直接通过层的
<code class="prettyprint"></code>//名称（BaseData）直接获取层的对象。Caffe中内置的层在实现的码的最后都会加上这两个宏。
 /*
<pre><code class="language-" data-lang="">// ------ in common.hpp ------
// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
char gInstantiationGuard##classname; \
template class classname<float>; \
template class classname<double>
// ------ in common.hpp ------

// ------ in layer_factory.hpp ------
#define REGISTER_LAYER_CREATOR(type, creator)                                  \
static LayerRegisterer<float> g_creator_f_##type(#type, creator<float>);     \
static LayerRegisterer<double> g_creator_d_##type(#type, creator<double>)    \

#define REGISTER_LAYER_CLASS(type)                                             \
template <typename Dtype>                                                    \
shared_ptr<Layer<Dtype> > Creator_##type##Layer(const LayerParameter& param) \
{                                                                            \
return shared_ptr<Layer<Dtype> >(new type##Layer<Dtype>(param));           \
}                                                                            \
REGISTER_LAYER_CREATOR(type, Creator_##type##Layer)
// ------ in layer_factory.hpp ------
} // namespace caffe

第二个就是hdf5_output_layer的实现：

#include <vector>

#include "hdf5.h"
#include "hdf5_hl.h"

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/hdf5.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

template <typename Dtype>
void HDF5OutputLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 参数文件中的文件名
file_name_ = this->layer_param_.hdf5_output_param().file_name();
// 打开文件
file_id_ = H5Fcreate(file_name_.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT,
H5P_DEFAULT);
CHECK_GE(file_id_, 0) << "Failed to open HDF5 file" << file_name_;
file_opened_ = true;// 设置文件打开标志
}

template <typename Dtype>
HDF5OutputLayer<Dtype>::~HDF5OutputLayer<Dtype>() {
if (file_opened_) {
herr_t status = H5Fclose(file_id_);
CHECK_GE(status, 0) << "Failed to close HDF5 file " << file_name_;
}
}

// 将blob存放到hdf5文件
// 数据和类标
template <typename Dtype>
void HDF5OutputLayer<Dtype>::SaveBlobs() {
// TODO: no limit on the number of blobs
LOG(INFO) << "Saving HDF5 file " << file_name_;
CHECK_EQ(data_blob_.num(), label_blob_.num()) <<
"data blob and label blob must have the same batch size";
hdf5_save_nd_dataset(file_id_, HDF5_DATA_DATASET_NAME, data_blob_);
hdf5_save_nd_dataset(file_id_, HDF5_DATA_LABEL_NAME, label_blob_);
LOG(INFO) << "Successfully saved " << data_blob_.num() << " rows";
}

// 实际上就是从bottom将输入过来的数据存放到hdf5文件
template <typename Dtype>
void HDF5OutputLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK_GE(bottom.size(), 2);
CHECK_EQ(bottom[0]->num(), bottom[1]->num());
// 改变data_blob_的形状以及label_blob_的形状
data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(),
bottom[1]->height(), bottom[1]->width());
const int data_datum_dim = bottom[0]->count() / bottom[0]->num();
const int label_datum_dim = bottom[1]->count() / bottom[1]->num();

// 从bottom[0]和[1]复制到data_blob_和label_blob_
for (int i = 0; i < bottom[0]->num(); ++i) {
caffe_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim],
&data_blob_.mutable_cpu_data()[i * data_datum_dim]);
caffe_copy(label_datum_dim, &bottom[1]->cpu_data()[i * label_datum_dim],
&label_blob_.mutable_cpu_data()[i * label_datum_dim]);
}
// 存放到文件
SaveBlobs();
}

// 不反传
template <typename Dtype>
void HDF5OutputLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
return;
}

#ifdef CPU_ONLY
STUB_GPU(HDF5OutputLayer);
#endif

INSTANTIATE_CLASS(HDF5OutputLayer);
REGISTER_LAYER_CLASS(HDF5Output);

}  // namespace caffe

第三个hdf5_data_layer的实现：

/*
TODO:
- load file in a separate thread ("prefetch")
- can be smarter about the memcpy call instead of doing it row-by-row
:: use util functions caffe_copy, and Blob->offset()
:: don't forget to update hdf5_daa_layer.cu accordingly
- add ability to shuffle filenames if flag is set
*/
#include <fstream>  // NOLINT(readability/streams)
#include <string>
#include <vector>

#include "hdf5.h"
#include "hdf5_hl.h"
#include "stdint.h"

#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/hdf5.hpp"

namespace caffe {

template <typename Dtype>
HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { }

// Load data and label from HDF5 filename into the class property blobs.
// 读取HDF5文件数据到hdf_blobs
template <typename Dtype>
void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
DLOG(INFO) << "Loading HDF5 file: " << filename;
// 打开文件
hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
if (file_id < 0) {
LOG(FATAL) << "Failed opening HDF5 file: " << filename;
}

int top_size = this->layer_param_.top_size();
hdf_blobs_.resize(top_size);

const int MIN_DATA_DIM = 1;
const int MAX_DATA_DIM = INT_MAX;

for (int i = 0; i < top_size; ++i) {
hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>());
// message LayerParameter {
// optional string name = 1; // the layer name
// optional string type = 2; // the layer type
// repeated string bottom = 3; // the name of each bottom blob
// repeated string top = 4; // the name of each top blob
hdf5_load_nd_dataset(file_id, this->layer_param_.top(i).c_str(),
MIN_DATA_DIM, MAX_DATA_DIM, hdf_blobs_[i].get());
}

herr_t status = H5Fclose(file_id);
CHECK_GE(status, 0) << "Failed to close HDF5 file: " << filename;

// MinTopBlobs==1 guarantees at least one top blob
CHECK_GE(hdf_blobs_[0]->num_axes(), 1) << "Input must have at least 1 axis.";
const int num = hdf_blobs_[0]->shape(0);
for (int i = 1; i < top_size; ++i) {
CHECK_EQ(hdf_blobs_[i]->shape(0), num);
}
// Default to identity permutation.
data_permutation_.clear();
data_permutation_.resize(hdf_blobs_[0]->shape(0));
for (int i = 0; i < hdf_blobs_[0]->shape(0); i++)
data_permutation_[i] = i;

// Shuffle if needed.
// 将数据索引映射表进行shuffle
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0)
<< " rows (shuffled)";
} else {
DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows";
}
}

// 主要的功能就是读取HDF5文件，并且设置top blob的形状
template <typename Dtype>
void HDF5DataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Refuse transformation parameters since HDF5 is totally generic.
CHECK(!this->layer_param_.has_transform_param()) <<
this->type() << " does not transform data.";
// Read the source to parse the filenames.
// 读取HDF列表文件
const string& source = this->layer_param_.hdf5_data_param().source();
LOG(INFO) << "Loading list of HDF5 filenames from: " << source;
hdf_filenames_.clear();
std::ifstream source_file(source.c_str());
if (source_file.is_open()) {
std::string line;
while (source_file >> line) {
hdf_filenames_.push_back(line);
}
} else {
LOG(FATAL) << "Failed to open source file: " << source;
}
source_file.close();
num_files_ = hdf_filenames_.size();
current_file_ = 0;
LOG(INFO) << "Number of HDF5 files: " << num_files_;
CHECK_GE(num_files_, 1) << "Must have at least 1 HDF5 filename listed in "
<< source;

file_permutation_.clear();
file_permutation_.resize(num_files_);
// 文件名字是否shuffle
// Default to identity permutation.
for (int i = 0; i < num_files_; i++) {
file_permutation_[i] = i;
}

// Shuffle if needed.
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(file_permutation_.begin(), file_permutation_.end());
}

// Load the first HDF5 file and initialize the line counter.
// 从给定的文件名列表中的第一个文件名读取数据到hdf_blobs
LoadHDF5FileData(hdf_filenames_[file_permutation_[current_file_]].c_str());
// 设置行指针
current_row_ = 0;

// Reshape blobs.
// 根据读取的hdf_blobs形状改变top的形状
const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
const int top_size = this->layer_param_.top_size();
vector<int> top_shape;
for (int i = 0; i < top_size; ++i) {
top_shape.resize(hdf_blobs_[i]->num_axes());
top_shape[0] = batch_size;
for (int j = 1; j < top_shape.size(); ++j) {
top_shape[j] = hdf_blobs_[i]->shape(j);
}
top[i]->Reshape(top_shape);
}
}

template <typename Dtype>
void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
for (int i = 0; i < batch_size; ++i, ++current_row_) {
// 因为SetUp里面已经读取了第一个文件的数据了
if (current_row_ == hdf_blobs_[0]->shape(0)) {
if (num_files_ > 1) {// 如果文件数目大于1
++current_file_;
// 如果current_file是最后一个文件的索引编号则
if (current_file_ == num_files_) {
current_file_ = 0;// 重置
// 混淆文件索引，再来一遍
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(file_permutation_.begin(),
file_permutation_.end());
}
DLOG(INFO) << "Looping around to first file.";
}
// 读取数据到hdf_blobs
LoadHDF5FileData(
hdf_filenames_[file_permutation_[current_file_]].c_str());
}// end of if (current_row_
current_row_ = 0;
// 混淆数据索引
if (this->layer_param_.hdf5_data_param().shuffle())
std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
}
// 复制数据到top
for (int j = 0; j < this->layer_param_.top_size(); ++j) {
int data_dim = top[j]->count() / top[j]->shape(0);
caffe_copy(data_dim,
&hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_]
* data_dim], &top[j]->mutable_cpu_data()[i * data_dim]);
}
}
}

#ifdef CPU_ONLY
STUB_GPU_FORWARD(HDF5DataLayer, Forward);
#endif

INSTANTIATE_CLASS(HDF5DataLayer);
REGISTER_LAYER_CLASS(HDF5Data);

}  // namespace caffe

第四个dummy_data_layer的实现：

#include <vector>

#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

template <typename Dtype>
void DummyDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 输出有几个
const int num_top = top.size();
// 获取该层的参数
const DummyDataParameter& param = this->layer_param_.dummy_data_param();
// 有几个filler
const int num_data_filler = param.data_filler_size();
// 检查filler的个数，要么为0、1、或者等于输出的个数
CHECK(num_data_filler == 0 || num_data_filler == 1 ||
num_data_filler == num_top)
<< "Number of data fillers must be 0, 1 or equal to the number of tops: "
<< num_top << "; you specified " << num_data_filler << " data fillers.";

// 判断是否全部为0
const bool legacy_dims = param.num_size() || param.channels_size() ||
param.height_size() || param.width_size();
// 下面就是检查参数是不是满足要求，1或者0或者等于num_top
if (legacy_dims) {// 如果不是全部为0
CHECK_EQ(0, param.shape_size())
<< "Both shape and legacy fields were specified";
// Using deprecated 4D output dim specifiers.
CHECK(param.num_size() == 1 || param.num_size() == num_top)
<< "Must specify 'num' once, or once per top blob "
<< "(" << num_top << "); specified " << param.num_size() << ".";
CHECK(param.channels_size() == 1 || param.channels_size() == num_top)
<< "Must specify 'channels' once, or once per top blob "
<< "(" << num_top << "); specified " << param.channels_size() << ".";
CHECK(param.height_size() == 1 || param.height_size() == num_top)
<< "Must specify 'height' once, or once per top blob "
<< "(" << num_top << "); specified " << param.height_size() << ".";
CHECK(param.width_size() == 1 || param.width_size() == num_top)
<< "Must specify 'width' once, or once per top blob "
<< "(" << num_top << "); specified " << param.width_size() << ".";
} else {
CHECK(param.shape_size() == 1 || param.shape_size() == num_top)
<< "Must specify 'shape' once, or once per top blob "
<< "(" << num_top << "); specified " << param.shape_size() << ".";
}
// refill_[i] tells Forward i whether or not to actually refill top Blob i.
// If refill_[i] is false, Forward does nothing for Blob i. We use this to
// avoid wastefully refilling "constant" Blobs in every forward pass.
// We first fill refill_ in with the INVERSE of its final values.
// The first time we run Forward from the LayerSetUp method, we'll fill only
// Blobs for which refill_ is normally false.  These Blobs will never be
// filled again.
// refill_表明是不是需要填充Blob，如果refill_[i]=false，那么就不会Blob i做任何事
//
refill_.clear();
fillers_.clear();
// 要么是0，要么是1
if (num_data_filler <= 1) {
// 定义了生成数据的参数
// 比如均值、方差等，详细请看其定义
FillerParameter filler_param;
if (num_data_filler == 0) {
// 如果没有指定，那么就是常数值填充
filler_param.set_type("constant");
filler_param.set_value(0);
} else {
// 否则复制filler到filler_param
filler_param.CopyFrom(param.data_filler(0));
}
// Refill on each iteration iff not using a constant filler,
// but use the inverse of this rule for the first run.
// 如果
refill_.resize(1);
refill_[0] = (strcmp(filler_param.type().c_str(), "constant") == 0);
fillers_.resize(1);
// 实例化填充器
fillers_[0].reset(GetFiller<Dtype>(filler_param));
} else {// 如果等于=num_top
refill_.resize(num_top);
fillers_.resize(num_top);
for (int i = 0; i < num_top; ++i) {
fillers_[i].reset(GetFiller<Dtype>(param.data_filler(i)));
// Refill on each iteration iff not using a constant filler,
// but use the inverse of this rule for the first run.
refill_[i] =
(strcmp(param.data_filler(i).type().c_str(), "constant") == 0);
}
}

// 改变形状
for (int i = 0; i < num_top; ++i) {
if (legacy_dims) {
const int num = (param.num_size() == 1) ? param.num(0) : param.num(i);
const int channels =
(param.channels_size() == 1) ? param.channels(0) : param.channels(i);
const int height =
(param.height_size() == 1) ? param.height(0) : param.height(i);
const int width =
(param.width_size() == 1) ? param.width(0) : param.width(i);
top[i]->Reshape(num, channels, height, width);
} else {
const int shape_index = (param.shape_size() == 1) ? 0 : i;
top[i]->Reshape(param.shape(shape_index));
}
}
// Run Forward once, with refill_ inverted, to fill the constant Blobs.
// 执行forward_cpu
this->Forward(bottom, top);
// Invert the inverted refill_ values to refill the desired (non-constant)
// Blobs in every usual forward pass.
for (int i = 0; i < refill_.size(); ++i) {
refill_[i] = !refill_[i];
}
}

// Forward里调用了该函数
template <typename Dtype>
void DummyDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 调用fillers_来进行錐ill
for (int i = 0; i < top.size(); ++i) {
const int filler_id = (fillers_.size() > 1) ? i : 0;
if (refill_[filler_id]) {
fillers_[filler_id]->Fill(top[i]);
}
}
}

// 初始化类
// 注册类
INSTANTIATE_CLASS(DummyDataLayer);
REGISTER_LAYER_CLASS(DummyData);

}  // namespace caffe

第五个memory_data_layer的实现：

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif  // USE_OPENCV

#include <vector>

#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"

namespace caffe {

template <typename Dtype>
void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 参数文件获取参数
batch_size_ = this->layer_param_.memory_data_param().batch_size();
channels_ = this->layer_param_.memory_data_param().channels();
height_ = this->layer_param_.memory_data_param().height();
width_ = this->layer_param_.memory_data_param().width();
size_ = channels_ * height_ * width_;
CHECK_GT(batch_size_ * size_, 0) <<
"batch_size, channels, height, and width must be specified and"
" positive in memory_data_param";
// 设置top的形状
vector<int> label_shape(1, batch_size_);
top[0]->Reshape(batch_size_, channels_, height_, width_);
top[1]->Reshape(label_shape);
// 设置内部变量added_data_和added_label_的形状
added_data_.Reshape(batch_size_, channels_, height_, width_);
added_label_.Reshape(label_shape);
data_ = NULL;
labels_ = NULL;
added_data_.cpu_data();
added_label_.cpu_data();
}

// 将Datum的vector放入到added_data_和added_label_
// 并进行预处理
template <typename Dtype>
void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
CHECK(!has_new_data_) <<
"Can't add data until current data has been consumed.";
size_t num = datum_vector.size();
CHECK_GT(num, 0) << "There is no datum to add.";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
// 改变形状
added_data_.Reshape(num, channels_, height_, width_);
added_label_.Reshape(num, 1, 1, 1);
// Apply data transformations (mirror, scale, crop...)
// 对数据进行预处理
this->data_transformer_->Transform(datum_vector, &added_data_);
// Copy Labels
// 复制类标到top_label
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
top_label[item_id] = datum_vector[item_id].label();
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
// 将数据、类标以及数据个数设置到该类的内部变量
Reset(top_data, top_label, num);
// 设置标记为true
has_new_data_ = true;
}

// 如果定义OPENCV，则对数据进行处理存放到added_data_和added_label_
#ifdef USE_OPENCV
template <typename Dtype>
void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<int>& labels) {
size_t num = mat_vector.size();
CHECK(!has_new_data_) <<
"Can't add mat until current data has been consumed.";
CHECK_GT(num, 0) << "There is no mat to add";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
added_data_.Reshape(num, channels_, height_, width_);
added_label_.Reshape(num, 1, 1, 1);
// Apply data transformations (mirror, scale, crop...)
// 预处理
this->data_transformer_->Transform(mat_vector, &added_data_);
// Copy Labels
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
top_label[item_id] = labels[item_id];
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
Reset(top_data, top_label, num);
has_new_data_ = true;
}
#endif  // USE_OPENCV

// 将数据和类标设置到内部的变量
// data_、labels_、n_
// 并且设置位置pos_=0
template <typename Dtype>
void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
CHECK(data);
CHECK(labels);
CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
// Warn with transformation parameters since a memory array is meant to
// be generic and no transformations are done with Reset().
if (this->layer_param_.has_transform_param()) {
LOG(WARNING) << this->type() << " does not transform array data on Reset()";
}
data_ = data;
labels_ = labels;
n_ = n;// batch_size
pos_ = 0;
}

// 设置内内部变量added_data_和added_label_的批数
template <typename Dtype>
void MemoryDataLayer<Dtype>::set_batch_size(int new_size) {
CHECK(!has_new_data_) <<
"Can't change batch_size until current data has been consumed.";
batch_size_ = new_size;
added_data_.Reshape(batch_size_, channels_, height_, width_);
added_label_.Reshape(batch_size_, 1, 1, 1);
}

// 将内部变量added_data_和added_label_复制到top传递给下一层
template <typename Dtype>
void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset";
// 这里直接使用内部变量将数据复制到top[0]、将类标复制到top[1]
top[0]->Reshape(batch_size_, channels_, height_, width_);
top[1]->Reshape(batch_size_, 1, 1, 1);
top[0]->set_cpu_data(data_ + pos_ * size_);
top[1]->set_cpu_data(labels_ + pos_);
pos_ = (pos_ + batch_size_) % n_;
if (pos_ == 0)
has_new_data_ = false;// 传过一次之后，就没有新数据啦
}

INSTANTIATE_CLASS(MemoryDataLayer);
REGISTER_LAYER_CLASS(MemoryData);

}  // namespace caffe

第六个image_data_layer的实现：

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>

#include <fstream>  // NOLINT(readability/streams)
#include <iostream>  // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>

#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

namespace caffe {

template <typename Dtype>
ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {
this->StopInternalThread();
}

template <typename Dtype>
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 根据参数文件设置参数
// 图像的高度、宽度、是否彩色图像、图像目录
const int new_height = this->layer_param_.image_data_param().new_height();
const int new_width  = this->layer_param_.image_data_param().new_width();
const bool is_color  = this->layer_param_.image_data_param().is_color();
string root_folder = this->layer_param_.image_data_param().root_folder();

// 当前只支持读取高度和宽度同样大小的图像
CHECK((new_height == 0 && new_width == 0) ||
(new_height > 0 && new_width > 0)) << "Current implementation requires "
"new_height and new_width to be set at the same time.";

// Read the file with filenames and labels
// 读取存放图像文件名和类标的列表文件
const string& source = this->layer_param_.image_data_param().source();
LOG(INFO) << "Opening file " << source;
std::ifstream infile(source.c_str());
string filename;
int label;
// lines_存放文件名和类标的pair
while (infile >> filename >> label) {
lines_.push_back(std::make_pair(filename, label));
}

// 是否需要打乱文件的顺序
if (this->layer_param_.image_data_param().shuffle()) {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
ShuffleImages();
}
LOG(INFO) << "A total of " << lines_.size() << " images.";

// 随机跳过的图像，调过的图像个数在[0, rand_skip-1]之间
lines_id_ = 0;
// Check if we would need to randomly skip a few data points
// 如果参数中的rand_skip大于1，则随机跳过[0,rand_skip-1]个图片
//
if (this->layer_param_.image_data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.image_data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
lines_id_ = skip;
}
// Read an image, and use it to initialize the top blob.
// 读取文件名到Mat
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
// Use data_transformer to infer the expected blob shape from a cv_image.
// 对数据的形状进行推断
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
// 设置transformed_data_的形状
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data and top[0] according to the batch_size.
// 设置batch_size
const int batch_size = this->layer_param_.image_data_param().batch_size();
CHECK_GT(batch_size, 0) << "Positive batch size required";
top_shape[0] = batch_size;
// 设置预取数组中数据的形状
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].data_.Reshape(top_shape);
}
// 设置输出的数据的形状
top[0]->Reshape(top_shape);

LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
// 设置输出的类标的形状
vector<int> label_shape(1, batch_size);
top[1]->Reshape(label_shape);
// 设置预取数组中类标的形状
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(label_shape);
}
}

// 产生打乱图像顺序的数组
template <typename Dtype>
void ImageDataLayer<Dtype>::ShuffleImages() {
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}

// This function is called on prefetch thread
// 该函数会被内部的线程调用
template <typename Dtype>
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
// 获取层参数，具体参见层参数的定义的解释
ImageDataParameter image_data_param = this->layer_param_.image_data_param();
const int batch_size = image_data_param.batch_size();
const int new_height = image_data_param.new_height();
const int new_width = image_data_param.new_width();
const bool is_color = image_data_param.is_color();
string root_folder = image_data_param.root_folder();

// Reshape according to the first image of each batch
// on single input batches allows for inputs of varying dimension.
// 读取跳过之后的第一幅图像，然后根据该图像设置相撞
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
// Use data_transformer to infer the expected blob shape from a cv_img.
// 推断图像形状
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
// 设置transformed_data_形状
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
// 设置batch_size
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);

Dtype* prefetch_data = batch->data_.mutable_cpu_data();
Dtype* prefetch_label = batch->label_.mutable_cpu_data();

// datum scales
// 读取一批图像，并进行预处理
const int lines_size = lines_.size();
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
timer.Start();
CHECK_GT(lines_size, lines_id_);
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
read_time += timer.MicroSeconds();
timer.Start();
// Apply transformations (mirror, crop...) to the image
// 进行预处理

// 根据图像的批次获得图像数据的偏移量
int offset = batch->data_.offset(item_id);
// 设置图像数据的指针到transformed_data_
this->transformed_data_.set_cpu_data(prefetch_data + offset);
// 进行预处理
this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
trans_time += timer.MicroSeconds();//统计预处理时间

// 复制类标到prefetch_label
prefetch_label[item_id] = lines_[lines_id_].second;
// go to the next iter
lines_id_++;
// 是否是图像目录中的最后一个图像
if (lines_id_ >= lines_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
// 打乱图像索引的顺序
if (this->layer_param_.image_data_param().shuffle()) {
ShuffleImages();
}
}
}
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
// 预处理时间
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

INSTANTIATE_CLASS(ImageDataLayer);
REGISTER_LAYER_CLASS(ImageData);

}  // namespace caffe
#endif  // USE_OPENCV

第七个重点data_layer的实现：

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif  // USE_OPENCV
#include <stdint.h>

#include <string>
#include <vector>

#include "caffe/common.hpp"
#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"

namespace caffe {

// 初始化DataReader，层参数
template <typename Dtype>
DataLayer<Dtype>::DataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param),
reader_(param) {
}

// 析构函数停止内部线程
template <typename Dtype>
DataLayer<Dtype>::~DataLayer() {
this->StopInternalThread();
}

// 数据层的初始化
template <typename Dtype>
void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 从层参数中读取batch_size
const int batch_size = this->layer_param_.data_param().batch_size();
// Read a data point, and use it to initialize the top blob.
// 从reader_中获取一个数据
Datum& datum = *(reader_.full().peek());

// Use data_transformer to infer the expected blob shape from datum.
// 用数据来推断blob的形状存放到top_shape
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape top[0] and prefetch_data according to the batch_size.
// 既然获取了数据的形状(channel,height,width)，那么这里再设置一下batch_size
// top_shape[0]=batch_size
// top_shape[1]=channel
// top_shape[2]=height
// top_shape[3]=width
top_shape[0] = batch_size;
// 根据形状设置top[0]的形状
top[0]->Reshape(top_shape);

// 设置预取数据的形状
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].data_.Reshape(top_shape);
}
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
// 如果输出类标的话则把top[1]的形状也弄一下
if (this->output_labels_) {
vector<int> label_shape(1, batch_size);
top[1]->Reshape(label_shape);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(label_shape);
}
}
}

// This function is called on prefetch thread
// 这个函数是在自己定义的线程执行函数内部执行的
template<typename Dtype>
void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());

// Reshape according to the first datum of each batch
// on single input batches allows for inputs of varying dimension.
// 意思是像以下这种做法这样的话，每个batch的数据的维度可以不一样
// 从参数文件获取batch_size
const int batch_size = this->layer_param_.data_param().batch_size();
// 获取第一个数据
Datum& datum = *(reader_.full().peek());
// Use data_transformer to infer the expected blob shape from datum.
// 使用第一个数据推断blob的形状
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);

// top_data存数据
Dtype* top_data = batch->data_.mutable_cpu_data();
Dtype* top_label = NULL;  // suppress warnings about uninitialized variables

// top_label存类标
if (this->output_labels_) {
top_label = batch->label_.mutable_cpu_data();
}

// 对这批数据进行处理
for (int item_id = 0; item_id < batch_size; ++item_id) {
timer.Start();
// get a datum
Datum& datum = *(reader_.full().pop("Waiting for data"));
read_time += timer.MicroSeconds();
timer.Start();
// Apply data transformations (mirror, scale, crop...)
// 对于给定批的数据获取offset，这里调用的是给定batchid，然后获取offset
int offset = batch->data_.offset(item_id);
this->transformed_data_.set_cpu_data(top_data + offset);
this->data_transformer_->Transform(datum, &(this->transformed_data_));
// Copy label.
// 复制类标
if (this->output_labels_) {
top_label[item_id] = datum.label();
}
// 数据传输时间
trans_time += timer.MicroSeconds();

// 将数据指针压到free队列
reader_.free().push(const_cast<Datum*>(&datum));
}
timer.Stop();
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

INSTANTIATE_CLASS(DataLayer);
REGISTER_LAYER_CLASS(Data);

}  // namespace caffe

第八个window_data_layer的实现：

#ifdef USE_OPENCV
#include <opencv2/highgui/highgui_c.h>
#include <stdint.h>

#include <algorithm>
#include <map>
#include <string>
#include <utility>
#include <vector>

#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"

#include "caffe/common.hpp"
#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

// caffe.proto > LayerParameter > WindowDataParameter
//   'source' field specifies the window_file
//   'crop_size' indicates the desired warped size

namespace caffe {

template <typename Dtype>
WindowDataLayer<Dtype>::~WindowDataLayer<Dtype>() {
this->StopInternalThread();
}

// 读取窗口数据文件的信息,并设置各个数据结构的形状
template <typename Dtype>
void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// LayerSetUp runs through the window_file and creates two structures
// that hold windows: one for foreground (object) windows and one
// for background (non-object) windows. We use an overlap threshold
// to decide which is which.

// window_file format
// repeated:
//    # image_index
//    img_path (abs path)
//    channels
//    height
//    width
//    num_windows
//    class_index overlap x1 y1 x2 y2

// 窗口文件的格式如下:
// # 图像索引(举例:# 1就表示第一个图像,注意#号与数字之间有空格)
// 图像的路径
// 图像通道数
// 图像高度
// 图像宽度
// 窗口数目
// 类标,overlap,x1,y1,x2,y2
// 注:x1,y1,x2,y2是窗口的左上和右下的坐标
// 我这里举个例子
// # 1 /1.jpg 3 720 480 100 1 1 0 0 100 100
// 上述的例子即使表示一个编号为1的图像相对路径为/1.jpg，通道为3，高度为720
// 宽度为480，窗口数目为100，类标为1，overlap为1，窗口的左上坐标为(0,0),右下坐标为(100,100)

LOG(INFO) << "Window data layer:" << std::endl
<< "  foreground (object) overlap threshold: "
<< this->layer_param_.window_data_param().fg_threshold() << std::endl
<< "  background (non-object) overlap threshold: "
<< this->layer_param_.window_data_param().bg_threshold() << std::endl
<< "  foreground sampling fraction: "
<< this->layer_param_.window_data_param().fg_fraction() << std::endl
<< "  cache_images: "
<< this->layer_param_.window_data_param().cache_images() << std::endl
<< "  root_folder: "
<< this->layer_param_.window_data_param().root_folder();

cache_images_ = this->layer_param_.window_data_param().cache_images();
string root_folder = this->layer_param_.window_data_param().root_folder();

// 根据参数文件中是否需要进行左右mirror，或者是否进行crop，
// 来判断是否需要初始化随机数种子
const bool prefetch_needs_rand =
this->transform_param_.mirror() ||
this->transform_param_.crop_size();
if (prefetch_needs_rand) {
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
} else {
prefetch_rng_.reset();
}

// 打开窗口文件
std::ifstream infile(this->layer_param_.window_data_param().source().c_str());
CHECK(infile.good()) << "Failed to open window file "
<< this->layer_param_.window_data_param().source() << std::endl;

// 这个是类标与类标出现的次数之间的映射
// 这里称之为类标直方图
map<int, int> label_hist;
label_hist.insert(std::make_pair(0, 0));

string hashtag;
int image_index, channels;
// 先从窗口文件中读取一个图像索引测试一下是否为空
if (!(infile >> hashtag >> image_index)) {
LOG(FATAL) << "Window file is empty";
}
do {
// 检查是否# 开头
CHECK_EQ(hashtag, "#");
// read image path
string image_path;
// 接下来读取图像的相对路径
// 将该路径与根目录路径拼接
infile >> image_path;
image_path = root_folder + image_path;
// read image dimensions
vector<int> image_size(3);
// 读取图像的维度信息，分别为channel，height , width
infile >> image_size[0] >> image_size[1] >> image_size[2];
channels = image_size[0];
// 将图像路径和图像大小压入到image_database_中
image_database_.push_back(std::make_pair(image_path, image_size));

// 如果需要缓存图像到内存的话，则用image_database_cache_进行存储
if (cache_images_) {
Datum datum;
// 将图像数据读取到Datum这个结构
if (!ReadFileToDatum(image_path, &datum)) {
LOG(ERROR) << "Could not open or find file " << image_path;
return;
}
// 将Datum结构的图像缓存到到image_database_cache_
image_database_cache_.push_back(std::make_pair(image_path, datum));
}
// read each box
int num_windows;
// 读取窗口个数
infile >> num_windows;
// 从参数文件获取前景和背景阈值
const float fg_threshold =
this->layer_param_.window_data_param().fg_threshold();
const float bg_threshold =
this->layer_param_.window_data_param().bg_threshold();
for (int i = 0; i < num_windows; ++i) {
int label, x1, y1, x2, y2;
float overlap;
// 读取  类标,与前景目标的重叠率,x1,y1,x2,y2
infile >> label >> overlap >> x1 >> y1 >> x2 >> y2;

// 按照顺序放在window这个数据结构里头
vector<float> window(WindowDataLayer::NUM);
window[WindowDataLayer::IMAGE_INDEX] = image_index;
window[WindowDataLayer::LABEL] = label;
window[WindowDataLayer::OVERLAP] = overlap;
window[WindowDataLayer::X1] = x1;
window[WindowDataLayer::Y1] = y1;
window[WindowDataLayer::X2] = x2;
window[WindowDataLayer::Y2] = y2;

// add window to foreground list or background list
// 下面是将窗口的前景和背景都装入到fg_windows_和bg_windows_中去
// 如果重叠的比例大于前景阈值，那么就认为是前景
if (overlap >= fg_threshold) {
int label = window[WindowDataLayer::LABEL];
// 类标必须大于0，因为重叠区域已经大于前景阈值了
// 此时如果类标不大于0，表明数据有误!
CHECK_GT(label, 0);
fg_windows_.push_back(window);
// 该类的直方图+1
label_hist.insert(std::make_pair(label, 0));
label_hist[label]++;
} else if (overlap < bg_threshold) {
// 如果重叠阈值小于背景阈值则认为是背景
// background window, force label and overlap to 0
window[WindowDataLayer::LABEL] = 0;
window[WindowDataLayer::OVERLAP] = 0;
bg_windows_.push_back(window);
// 0类的直方图(也就是背景的直方图)+1
label_hist[0]++;
}
}

// 每处理100个就显示一瞎
if (image_index % 100 == 0) {
LOG(INFO) << "num: " << image_index << " "
<< image_path << " "
<< image_size[0] << " "
<< image_size[1] << " "
<< image_size[2] << " "
<< "windows to process: " << num_windows;
}
} while (infile >> hashtag >> image_index);

// 读取完毕后输出图像的个数
LOG(INFO) << "Number of images: " << image_index+1;

// 输出统计的每个类别的个数
for (map<int, int>::iterator it = label_hist.begin();
it != label_hist.end(); ++it) {
LOG(INFO) << "class " << it->first << " has " << label_hist[it->first]
<< " samples";
}

LOG(INFO) << "Amount of context padding: "
<< this->layer_param_.window_data_param().context_pad();

LOG(INFO) << "Crop mode: "
<< this->layer_param_.window_data_param().crop_mode();

// image
// 获取crop_size
const int crop_size = this->transform_param_.crop_size();
CHECK_GT(crop_size, 0);
// 获取batch_size
const int batch_size = this->layer_param_.window_data_param().batch_size();
// 将top[0]设置为batch_size,channels, crop_size, crop_size大小的
top[0]->Reshape(batch_size, channels, crop_size, crop_size);
// 将prefetch_中的数据形状也这么设置
for (int i = 0; i < this->PREFETCH_COUNT; ++i)
this->prefetch_[i].data_.Reshape(
batch_size, channels, crop_size, crop_size);

LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
// 将top[1]设置为类标大小
vector<int> label_shape(1, batch_size);
top[1]->Reshape(label_shape);
// 将prefetch_中的类标形状也这么设置
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(label_shape);
}

// data mean
// 是否有均值文件或者有均值
has_mean_file_ = this->transform_param_.has_mean_file();
has_mean_values_ = this->transform_param_.mean_value_size() > 0;
if (has_mean_file_) {// 有均值文件就读
const string& mean_file =
this->transform_param_.mean_file();
LOG(INFO) << "Loading mean file from: " << mean_file;
BlobProto blob_proto;
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
data_mean_.FromProto(blob_proto);
}
if (has_mean_values_) {// 有均值就直接从参数中获取
CHECK(has_mean_file_ == false) <<
"Cannot specify mean_file and mean_value at the same time";
for (int c = 0; c < this->transform_param_.mean_value_size(); ++c) {
mean_values_.push_back(this->transform_param_.mean_value(c));
}

// 检查均值是不是等于1，或者等于图像的通道数
// 也就是要么所有通道都使用同一个均值
// 要么每个通道用一个均值
CHECK(mean_values_.size() == 1 || mean_values_.size() == channels) <<
"Specify either 1 mean_value or as many as channels: " << channels;
if (channels > 1 && mean_values_.size() == 1) {
// Replicate the mean_value for simplicity
for (int c = 1; c < channels; ++c) {
mean_values_.push_back(mean_values_[0]);
}
}
}
}

// 随机数生成器进行初始化并生成随机数
template <typename Dtype>
unsigned int WindowDataLayer<Dtype>::PrefetchRand() {
CHECK(prefetch_rng_);
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
return (*prefetch_rng)();
}

// 因为继承BasePrefetchingDataLayer所以要实现load_batch
// 以供线程调用
// This function is called on prefetch thread
template <typename Dtype>
void WindowDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
// At each iteration, sample N windows where N*p are foreground (object)
// windows and N*(1-p) are background (non-object) windows
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
// top数据和类标
Dtype* top_data = batch->data_.mutable_cpu_data();
Dtype* top_label = batch->label_.mutable_cpu_data();
// 缩放尺度
const Dtype scale = this->layer_param_.window_data_param().scale();
// batch_size
const int batch_size = this->layer_param_.window_data_param().batch_size();
// 上下文填充
const int context_pad = this->layer_param_.window_data_param().context_pad();
// crop_size
const int crop_size = this->transform_param_.crop_size();
// 是否镜像
const bool mirror = this->transform_param_.mirror();
// 前景比例
const float fg_fraction =
this->layer_param_.window_data_param().fg_fraction();
Dtype* mean = NULL;
int mean_off = 0;
int mean_width = 0;
int mean_height = 0;
// 如果有平均值文件则
if (this->has_mean_file_) {
mean = this->data_mean_.mutable_cpu_data();
// 经过crop之后的平均值图像的中心
mean_off = (this->data_mean_.width() - crop_size) / 2;
mean_width = this->data_mean_.width();
mean_height = this->data_mean_.height();
}
cv::Size cv_crop_size(crop_size, crop_size);
// 获取crop的模式，是warp还是square
const string& crop_mode = this->layer_param_.window_data_param().crop_mode();

bool use_square = (crop_mode == "square") ? true : false;

// zero out batch
caffe_set(batch->data_.count(), Dtype(0), top_data);

// 根据前景比例获得前景图像的数目
const int num_fg = static_cast<int>(static_cast<float>(batch_size)
* fg_fraction);
// 样本数量，是前景还是背景?[0]是背景[1]是前景
const int num_samples[2] = { batch_size - num_fg, num_fg };

int item_id = 0;
// sample from bg set then fg set
// 先对背景进行采样
// 再对前景进行采样
for (int is_fg = 0; is_fg < 2; ++is_fg) {
for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {
// sample a window
timer.Start();
// 生成一个随机数
const unsigned int rand_index = PrefetchRand();
// fg_windows_和bg_windows_存储的是对应的窗口信息
// 在SetUp中读取的窗口数据文件的时候获得的
// 从该图像的若干窗口中去随机选择一个窗口
vector<float> window = (is_fg) ?
fg_windows_[rand_index % fg_windows_.size()] :
bg_windows_[rand_index % bg_windows_.size()];

// 随机选择是否需要镜像
bool do_mirror = mirror && PrefetchRand() % 2;

// load the image containing the window
// 载入图像的路径以及类标
pair<std::string, vector<int> > image =
image_database_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];

// 读取图像
cv::Mat cv_img;
if (this->cache_images_) {
// 如果图像缓冲到内存则获得对应图像的Datum
pair<std::string, Datum> image_cached =
image_database_cache_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];
// 将图像的Datum解码为OpenCV的Mat
cv_img = DecodeDatumToCVMat(image_cached.second, true);
} else {
// 否则直接读取
cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR);
if (!cv_img.data) {
LOG(ERROR) << "Could not open or find file " << image.first;
return;
}
}
read_time += timer.MicroSeconds();
timer.Start();
const int channels = cv_img.channels();

// crop window out of image and warp it
// 窗口坐标
int x1 = window[WindowDataLayer<Dtype>::X1];
int y1 = window[WindowDataLayer<Dtype>::Y1];
int x2 = window[WindowDataLayer<Dtype>::X2];
int y2 = window[WindowDataLayer<Dtype>::Y2];

int pad_w = 0;
int pad_h = 0;
// context_pad也是个大小，具体什么含义，我没有具体研究
// 毕竟不是搞检测的
// context_scale = crop_size / (crop_size - 2*context_pad)
if (context_pad > 0 || use_square) {
// scale factor by which to expand the original region
// such that after warping the expanded region to crop_size x crop_size
// there's exactly context_pad amount of padding on each side
Dtype context_scale = static_cast<Dtype>(crop_size) /
static_cast<Dtype>(crop_size - 2*context_pad);

// compute the expanded region
// 高度的一半
Dtype half_height = static_cast<Dtype>(y2-y1+1)/2.0;
// 宽度的一半
Dtype half_width = static_cast<Dtype>(x2-x1+1)/2.0;
// x中心
Dtype center_x = static_cast<Dtype>(x1) + half_width;
// y中心
Dtype center_y = static_cast<Dtype>(y1) + half_height;
if (use_square) {// 如果使用正方形形状则将较大的那个赋值给小的
if (half_height > half_width) {
half_width = half_height;
} else {
half_height = half_width;
}
}

// 获取经过处理之后的x1,y1,x2,y2
x1 = static_cast<int>(round(center_x - half_width*context_scale));
x2 = static_cast<int>(round(center_x + half_width*context_scale));
y1 = static_cast<int>(round(center_y - half_height*context_scale));
y2 = static_cast<int>(round(center_y + half_height*context_scale));

// the expanded region may go outside of the image
// so we compute the clipped (expanded) region and keep track of
// the extent beyond the image
// 经过处理之后的窗口如果不在图像内部是有问题的
// 这里对窗口的坐标进行处理
// 使得窗口的左上角不超过图像的左上角
// 窗口的右下角不超过图像的右下角
// 所以这里叫clip bounds嘛
int unclipped_height = y2-y1+1;
int unclipped_width = x2-x1+1;
int pad_x1 = std::max(0, -x1);
int pad_y1 = std::max(0, -y1);
int pad_x2 = std::max(0, x2 - cv_img.cols + 1);
int pad_y2 = std::max(0, y2 - cv_img.rows + 1);
// clip bounds
x1 = x1 + pad_x1;
x2 = x2 - pad_x2;
y1 = y1 + pad_y1;
y2 = y2 - pad_y2;
CHECK_GT(x1, -1);
CHECK_GT(y1, -1);
CHECK_LT(x2, cv_img.cols);
CHECK_LT(y2, cv_img.rows);

// 经过clip之后的高度和宽度
int clipped_height = y2-y1+1;
int clipped_width = x2-x1+1;

// scale factors that would be used to warp the unclipped
// expanded region
// scale_x/scale_y=crop_size除以未经clip之后的宽度/高度
Dtype scale_x =
static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_width);
Dtype scale_y =
static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_height);

// size to warp the clipped expanded region to
// 用clip的宽度和高度乘以scale_x或者scale_y得到crop_size中的宽度和高度
cv_crop_size.width =
static_cast<int>(round(static_cast<Dtype>(clipped_width)*scale_x));
cv_crop_size.height =
static_cast<int>(round(static_cast<Dtype>(clipped_height)*scale_y));
// 再对pad的边界进行处理
pad_x1 = static_cast<int>(round(static_cast<Dtype>(pad_x1)*scale_x));
pad_x2 = static_cast<int>(round(static_cast<Dtype>(pad_x2)*scale_x));
pad_y1 = static_cast<int>(round(static_cast<Dtype>(pad_y1)*scale_y));
pad_y2 = static_cast<int>(round(static_cast<Dtype>(pad_y2)*scale_y));

pad_h = pad_y1;
// if we're mirroring, we mirror the padding too (to be pedantic)
// 如果需要镜像填充的部分也要镜像
if (do_mirror) {
pad_w = pad_x2;
} else {
pad_w = pad_x1;
}

// ensure that the warped, clipped region plus the padding fits in the
// crop_size x crop_size image (it might not due to rounding)
// 确保大小是在crop_size x crop_size以内的
if (pad_h + cv_crop_size.height > crop_size) {
cv_crop_size.height = crop_size - pad_h;
}
if (pad_w + cv_crop_size.width > crop_size) {
cv_crop_size.width = crop_size - pad_w;
}
}

cv::Rect roi(x1, y1, x2-x1+1, y2-y1+1);
// 进行crop
cv::Mat cv_cropped_img = cv_img(roi);
// 使用线性插值进行缩放，缩放到cv_crop_size
cv::resize(cv_cropped_img, cv_cropped_img,
cv_crop_size, 0, 0, cv::INTER_LINEAR);

// horizontal flip at random
if (do_mirror) {
// 对图像进行镜像
cv::flip(cv_cropped_img, cv_cropped_img, 1);
}

// copy the warped window into top_data
for (int h = 0; h < cv_cropped_img.rows; ++h) {
const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
int img_index = 0;
for (int w = 0; w < cv_cropped_img.cols; ++w) {
for (int c = 0; c < channels; ++c) {
int top_index = ((item_id * channels + c) * crop_size + h + pad_h)
* crop_size + w + pad_w;
// int top_index = (c * height + h) * width + w;
Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
if (this->has_mean_file_) {// 有均值文件减去均值文件中对应的数值
int mean_index = (c * mean_height + h + mean_off + pad_h)
* mean_width + w + mean_off + pad_w;
top_data[top_index] = (pixel - mean[mean_index]) * scale;
} else {
if (this->has_mean_values_) {// 有均值则减去均值
top_data[top_index] = (pixel - this->mean_values_[c]) * scale;
} else {
top_data[top_index] = pixel * scale;// 像素值进行缩放
}
}
}
}
}
trans_time += timer.MicroSeconds();
// get window label
top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];

#if 0
// useful debugging code for dumping transformed windows to disk
string file_id;
std::stringstream ss;
ss << PrefetchRand();
ss >> file_id;
std::ofstream inf((string("dump/") + file_id +
string("_info.txt")).c_str(), std::ofstream::out);
inf << image.first << std::endl
<< window[WindowDataLayer<Dtype>::X1]+1 << std::endl
<< window[WindowDataLayer<Dtype>::Y1]+1 << std::endl
<< window[WindowDataLayer<Dtype>::X2]+1 << std::endl
<< window[WindowDataLayer<Dtype>::Y2]+1 << std::endl
<< do_mirror << std::endl
<< top_label[item_id] << std::endl
<< is_fg << std::endl;
inf.close();
std::ofstream top_data_file((string("dump/") + file_id +
string("_data.txt")).c_str(),
std::ofstream::out | std::ofstream::binary);
for (int c = 0; c < channels; ++c) {
for (int h = 0; h < crop_size; ++h) {
for (int w = 0; w < crop_size; ++w) {
top_data_file.write(reinterpret_cast<char*>(
&top_data[((item_id * channels + c) * crop_size + h)
* crop_size + w]),
sizeof(Dtype));
}
}
}
top_data_file.close();
#endif

item_id++;
}
}
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

INSTANTIATE_CLASS(WindowDataLayer);
REGISTER_LAYER_CLASS(WindowData);

}  // namespace caffe
#endif  // USE_OPENCV

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航