Caffe源码中Pooling Layer文件分析

2017-03-09
Caffe源码(caffe version commit: 09868ac , date: 2015.08.15)中有一些重要的头文件,这里介绍下include/caffe/vision_layers文件中PoolingLayer类,在最新版caffe中,PoolingLayer类被单独放在了include/caffe/layers/pooling_layer.hpp文件中,这两个文件中PoolingLayer类的内容及实现是完全一致的:
1.  include文件:
2.  类PoolingLayer:池化层,Layer类的子类
Pooling layer的主要作用是降维,缩小feature map,图像降采样,方法有:

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {
* @brief Pools the input image by taking the max, average, etc. within regions.
* TODO(dox): thorough documentation for Forward, Backward, and proto params.
// 池化层,Layer类的子类,图像降采样,有三种Pooling方法:Max、Avx、Stochastic
template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
// 显示构造函数
explicit PoolingLayer(const LayerParameter& param) : Layer<Dtype>(param) {}
// 参数初始化,通过类PoolingParameter获取成员变量值,包括:
// global_pooling_、kernel_h_、kernel_w_、pad_h_、pad_w_、stride_h_、stride_w_
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// 调整top blobs的shape,并有可能会reshape rand_idx_或max_idx_;
// 获取成员变量值,包括:channels_、height_、width_、pooled_height_、pooled_width_
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// 获得Pooling layer的类型: Pooling
virtual inline const char* type() const { return "Pooling"; }
// 获得Pooling layer所需的bottom blobs的个数: 1
virtual inline int ExactNumBottomBlobs() const { return 1; }
// 获得Pooling layer所需的bottom blobs的最少个数: 1
virtual inline int MinTopBlobs() const { return 1; }
// MAX POOL layers can output an extra top blob for the mask;
// others can only output the pooled inputs.
// 获得Pooling layer所需的bottom blobs的最多个数: Max为2,其它(Avg, Stochastic)为1
virtual inline int MaxTopBlobs() const {
return (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_MAX) ? 2 : 1;

// CPU实现Pooling layer的前向传播,仅有Max和Ave两种方法实现
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// GPU实现Pooling layer的前向传播,Max、Ave、Stochastic三种方法实现
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// CPU实现Pooling layer的反向传播,仅有Max和Ave两种方法实现
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
// GPU实现Pooling layer的反向传播,Max、Ave、Stochastic三种方法实现
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

// Caffe中类的成员变量名都带有后缀"_",这样就容易区分临时变量和类成员变量
int kernel_h_, kernel_w_; // 滤波器(卷积核)大小
int stride_h_, stride_w_; // 步长大小
int pad_h_, pad_w_; // 图像扩充大小
int channels_; // 图像通道数
int height_, width_; // 图像高、宽
// 池化后图像高、宽
// pooled_height_ = (height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1
// pooled_width_ = (width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1
int pooled_height_, pooled_width_;
bool global_pooling_; // 是否全区域池化(将整幅图像降采样为1*1)
Blob<Dtype> rand_idx_; // 随机采样索引,Pooling方法为STOCHASTIC时用到并会Reshape
Blob<int> max_idx_; // 最大值采样索引,Pooling方法为MAX时用到并会Reshape

} // namespace caffe

#endif // CAFFE_POOLING_LAYER_HPP_ 在caffe.proto文件中,有一个message是与pooling layer相关的,如下:
message PoolingParameter { // Pooling层参数类
enum PoolMethod { // 枚举类型,Pooling的方法:Max(最大值采样)、AVE(均值采样)、STOCHASTIC(随机采样)
MAX = 0;
AVE = 1;
optional PoolMethod pool = 1 [default = MAX]; // The pooling method, pooling方法
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs.
optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X),图像扩充大小(添加图像边界的像素大小)
optional uint32 pad_h = 9 [default = 0]; // The padding height,图像扩充大小,Y
optional uint32 pad_w = 10 [default = 0]; // The padding width,图像扩充大小,X
optional uint32 kernel_size = 2; // The kernel size (square),滤波器(卷积核、滑动窗)的大小(高=宽)
optional uint32 kernel_h = 5; // The kernel height,滤波器(卷积核、滑动窗)的高
optional uint32 kernel_w = 6; // The kernel width,滤波器(卷积核、滑动窗)的宽
optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X),滑动步长(高=宽),卷积核卷积时平移的步幅
optional uint32 stride_h = 7; // The stride height,滑动步长,高
optional uint32 stride_w = 8; // The stride width,滑动步长,宽
enum Engine {
CAFFE = 1;
CUDNN = 2;
optional Engine engine = 11 [default = DEFAULT]; //
// If global_pooling then it will pool over the size of the bottom by doing
// kernel_h = bottom->height and kernel_w = bottom->width
optional bool global_pooling = 12 [default = false]; // 是否是全区域池化
pooling layer的测试代码如下:#include "funset.hpp"
#include <string>
#include <vector>
#include "common.hpp"

int test_caffe_layer_pooling()
caffe::Caffe::set_mode(caffe::Caffe::CPU); // set run caffe mode

// set layer parameter
caffe::LayerParameter layer_param;

// cv::Mat -> caffe::Blob
std::string image_name = "E:/GitCode/Caffe_Test/test_data/images/a.jpg";
cv::Mat mat1 = cv::imread(image_name, 1);
if (!mat1.data) {
fprintf(stderr, "read image fail: %s\n", image_name.c_str());
return -1;
mat1.convertTo(mat1, CV_32FC3);
std::vector<cv::Mat> mat2;
cv::split(mat1, mat2);
std::vector<int> mat_reshape{ 1, (int)mat2.size(), mat2[0].rows, mat2[0].cols };

caffe::Blob<float> blob;
size_t size = mat2[0].rows * mat2[0].cols;
float* data = new float[mat2.size() * size];
memcpy(data, mat2[0].data, size * sizeof(float));
memcpy(data + size, mat2[1].data, size * sizeof(float));
memcpy(data + 2 * size, mat2[2].data, size * sizeof(float));

for (int method = 0; method < 2; ++method) {
// set pooling parameter
caffe::PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
if (method == 0) pooling_param->set_pool(caffe::PoolingParameter::MAX);
else pooling_param->set_pool(caffe::PoolingParameter::AVE);

std::vector<caffe::Blob<float>*> bottom_blob{ &blob }, top_blob{ &caffe::Blob<float>()/*, &caffe::Blob<float>() */ };

// test PoolingLayer function
caffe::PoolingLayer<float> pooling_layer(layer_param);
pooling_layer.SetUp(bottom_blob, top_blob);
fprintf(stderr, "top blob info: channels: %d, height: %d, width: %d\n",
top_blob[0]->channels(), top_blob[0]->height(), top_blob[0]->width());

pooling_layer.Forward(bottom_blob, top_blob);

int height = top_blob[0]->height();
int width = top_blob[0]->width();
const float* p = top_blob[0]->cpu_data();
std::vector<cv::Mat> mat3{ cv::Mat(height, width, CV_32FC1, (float*)p),
cv::Mat(height, width, CV_32FC1, (float*)(p + height * width)),
cv::Mat(height, width, CV_32FC1, (float*)(p + height * width * 2)) };
cv::Mat mat4;
cv::merge(mat3, mat4);
mat4.convertTo(mat4, CV_8UC3);
if (method == 0) image_name = "E:/GitCode/Caffe_Test/test_data/images/forward0.jpg";
else image_name = "E:/GitCode/Caffe_Test/test_data/images/forward1.jpg";
cv::imwrite(image_name, mat4);

for (int i = 0; i < bottom_blob[0]->count(); ++i)
bottom_blob[0]->mutable_cpu_diff()[i] = bottom_blob[0]->cpu_data()[i];
for (int i = 0; i < top_blob[0]->count(); ++i)
top_blob[0]->mutable_cpu_diff()[i] = top_blob[0]->cpu_data()[i];

std::vector<bool> propagate_down{ true };
pooling_layer.Backward(top_blob, propagate_down, bottom_blob);

height = bottom_blob[0]->height();
width = bottom_blob[0]->width();
p = bottom_blob[0]->cpu_diff();
std::vector<cv::Mat> mat5{ cv::Mat(height, width, CV_32FC1, (float*)p),
cv::Mat(height, width, CV_32FC1, (float*)(p + height * width)),
cv::Mat(height, width, CV_32FC1, (float*)(p + height * width * 2)) };
cv::Mat mat6;
cv::merge(mat5, mat6);
mat6.convertTo(mat6, CV_8UC3);
if (method == 0) image_name = "E:/GitCode/Caffe_Test/test_data/images/backward0.jpg";
else image_name = "E:/GitCode/Caffe_Test/test_data/images/backward1.jpg";
cv::imwrite(image_name, mat6);

delete[] data;
return 0;
} 执行结果如下图:


