您的位置：首页 > Web前端
梳理caffe代码pooling_layer（二十）

2016-06-04 12:53 573 查看
pooling层的代码实现：
#include <algorithm>
#include <cfloat>
#include <vector>

#include "caffe/layers/pooling_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

using std::min;
using std::max;

template <typename Dtype>
void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
PoolingParameter pool_param = this->layer_param_.pooling_param();
if (pool_param.global_pooling()) {
CHECK(!(pool_param.has_kernel_size() ||
pool_param.has_kernel_h() || pool_param.has_kernel_w()))
<< "With Global_pooling: true Filter size cannot specified";//全局pooling可能是因为有了NIN
} else {
CHECK(!pool_param.has_kernel_size() !=
!(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
<< "Filter size is kernel_size OR kernel_h and kernel_w; not both";
CHECK(pool_param.has_kernel_size() ||
(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
<< "For non-square filters both kernel_h and kernel_w are required.";
}
CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
&& pool_param.has_pad_w())
|| (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
<< "pad is pad OR pad_h and pad_w are required.";
CHECK((!pool_param.has_stride() && pool_param.has_stride_h()
&& pool_param.has_stride_w())
|| (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
<< "Stride is stride OR stride_h and stride_w are required.";
global_pooling_ = pool_param.global_pooling();
if (global_pooling_) {
kernel_h_ = bottom[0]->height();
kernel_w_ = bottom[0]->width();
} else {
if (pool_param.has_kernel_size()) {
kernel_h_ = kernel_w_ = pool_param.kernel_size();
} else {
kernel_h_ = pool_param.kernel_h();
kernel_w_ = pool_param.kernel_w();
}//用户自定义的kernel大小
}
CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
if (!pool_param.has_pad_h()) {
pad_h_ = pad_w_ = pool_param.pad();
} else {
pad_h_ = pool_param.pad_h();
pad_w_ = pool_param.pad_w();
}//填充
if (!pool_param.has_stride_h()) {
stride_h_ = stride_w_ = pool_param.stride();
} else {
stride_h_ = pool_param.stride_h();
stride_w_ = pool_param.stride_w();
}//步长
if (global_pooling_) {
CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
<< "With Global_pooling: true; only pad = 0 and stride = 1";
}
if (pad_h_ != 0 || pad_w_ != 0) {
CHECK(this->layer_param_.pooling_param().pool()
== PoolingParameter_PoolMethod_AVE
|| this->layer_param_.pooling_param().pool()
== PoolingParameter_PoolMethod_MAX)
<< "Padding implemented only for average and max pooling.";
CHECK_LT(pad_h_, kernel_h_);
CHECK_LT(pad_w_, kernel_w_);
}
}//初始化一些参数

template <typename Dtype>
void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
if (global_pooling_) {
kernel_h_ = bottom[0]->height();
kernel_w_ = bottom[0]->width();
}
pooled_height_ = static_cast<int>(ceil(static_cast<float>(
height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
pooled_width_ = static_cast<int>(ceil(static_cast<float>(
width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
//pooling之后的height 和 width
if (pad_h_ || pad_w_) {
// If we have padding, ensure that the last pooling starts strictly
// inside the image (instead of at the padding); otherwise clip the last.
if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
--pooled_height_;
}
if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
--pooled_width_;
}
CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
}
top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
pooled_width_);//输出top blob 的shape
if (top.size() > 1) {
top[1]->ReshapeLike(*top[0]);
}
// If max pooling, we will initialize the vector index part.
if (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_MAX && top.size() == 1) {
max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
pooled_width_);
}//max pooling 反向求导时要用到取最大值的位置，max_idx_就是记录pooling过程中取max value 的index ，
//它是一个int型的blob 和输出top具有相同的shape
// If stochastic pooling(随机pooling), we will initialize the random index part.
if (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_STOCHASTIC) {
rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
pooled_width_);
}
}

// TODO(Yangqing): Is there a faster way to do pooling in the channel-first
// case?
template <typename Dtype>
void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
const int top_count = top[0]->count();
// We'll output the mask to top[1] if it's of size >1.
const bool use_top_mask = top.size() > 1;
int* mask = NULL;  // suppress warnings about uninitalized variables
Dtype* top_mask = NULL;
// Different pooling methods. We explicitly do the switch outside the for
// loop to save time, although this results in more code.
switch (this->layer_param_.pooling_param().pool()) {
case PoolingParameter_PoolMethod_MAX:
// Initialize
if (use_top_mask) {
top_mask = top[1]->mutable_cpu_data();
caffe_set(top_count, Dtype(-1), top_mask);
} else {
mask = max_idx_.mutable_cpu_data();
//模板类Blob的mutable_cpu_diff()方法中使用了强制类型转换static_cast<Dtype*>()
caffe_set(top_count, -1, mask);
} //（*1）设为负无穷
caffe_set(top_count, Dtype(-FLT_MAX), top_data);//FLT_MAX在头文件#include <cfloat>中定义
// The main loop
for (int n = 0; n < bottom[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int ph = 0; ph < pooled_height_; ++ph) {
for (int pw = 0; pw < pooled_width_; ++pw) {
int hstart = ph * stride_h_ - pad_h_;
int wstart = pw * stride_w_ - pad_w_;
int hend = min(hstart + kernel_h_, height_);
int wend = min(wstart + kernel_w_, width_);
//这四个量给出未pooling矩阵中确定pooling区域的两个顶点。
hstart = max(hstart, 0);
wstart = max(wstart, 0);//一般情况下从0开始，而不是从负下标开始
//caffe 数据存储是一维数组的形式
//ph为pooling后输出top的height index，pool_index为对应一维数组index。
const int pool_index = ph * pooled_width_ + pw;//池化后的（输出）特征图中元素的位置索引
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
const int index = h * width_ + w;//输入特征图中元素的位置索引
//对应一维数组的index
if (bottom_data[index] > top_data[pool_index]) {
top_data[pool_index] = bottom_data[index];
//由（*1）可知该循环将bottom中pooling区域（kernel的大小）的最大值放到对应top
if (use_top_mask) {
top_mask[pool_index] = static_cast<Dtype>(index);
} else {
mask[pool_index] = index;//每次Max_pooling操作最大元素的位置索引
}
}
}
}
}
}
// compute offset
bottom_data += bottom[0]->offset(0, 1);
//每次通过offset来确定新的bottom_data地址，offset()函数返回的其实仅仅是一个整数，大小为
//一个channel的元素的个数。也就是这样一个channel一个channel得遍历整个Blob。
top_data += top[0]->offset(0, 1);
if (use_top_mask) {
top_mask += top[0]->offset(0, 1);
} else {
mask += top[0]->offset(0, 1); //取下一个channel的mask
}
}
}
break;
case PoolingParameter_PoolMethod_AVE:
for (int i = 0; i < top_count; ++i) {
top_data[i] = 0;//将top初始化为0
}
// The main loop
for (int n = 0; n < bottom[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int ph = 0; ph < pooled_height_; ++ph) {
for (int pw = 0; pw < pooled_width_; ++pw) {
int hstart = ph * stride_h_ - pad_h_;
int wstart = pw * stride_w_ - pad_w_;
int hend = min(hstart + kernel_h_, height_ + pad_h_);
int wend = min(wstart + kernel_w_, width_ + pad_w_);
int pool_size = (hend - hstart) * (wend - wstart);
//pooling 区域的element 个数
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, height_);
wend = min(wend, width_);
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
top_data[ph * pooled_width_ + pw] +=
bottom_data[h * width_ + w];
} //将pooling区域的element个数加起来
}
top_data[ph * pooled_width_ + pw] /= pool_size;
}
}
// compute offset
bottom_data += bottom[0]->offset(0, 1);
top_data += top[0]->offset(0, 1);
}
}
break;
case PoolingParameter_PoolMethod_STOCHASTIC:
NOT_IMPLEMENTED;
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
}

template <typename Dtype>
void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
//propagate_down的妙处于此！caffe.proto里面也有一个相同名字的定义
if (!propagate_down[0]) {
return;
}
const Dtype* top_diff = top[0]->cpu_diff();
//模板类Blob的mutable_cpu_diff()方法中使用了强制类型转换
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();//初始化bottom_diff 为0
// Different pooling methods. We explicitly do the switch outside the for
// loop to save time, although this results in more codes.
caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
// We'll output the mask to top[1] if it's of size >1.
const bool use_top_mask = top.size() > 1;
const int* mask = NULL;  // suppress warnings about uninitialized variables
const Dtype* top_mask = NULL;
switch (this->layer_param_.pooling_param().pool()) {
case PoolingParameter_PoolMethod_MAX:
// The main loop
if (use_top_mask) {
top_mask = top[1]->cpu_data();
} else {
mask = max_idx_.cpu_data();//取数据成员max_idx_的地址
}
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int ph = 0; ph < pooled_height_; ++ph) {
for (int pw = 0; pw < pooled_width_; ++pw) {
const int index = ph * pooled_width_ + pw;//这里的index是前向传播池化后的特征图中元素的位置索引
const int bottom_index =
use_top_mask ? top_mask[index] : mask[index];
bottom_diff[bottom_index] += top_diff[index];//计算“敏感值”分布
}
}
bottom_diff += bottom[0]->offset(0, 1);
top_diff += top[0]->offset(0, 1);  //指向下一个channel
if (use_top_mask) {
top_mask += top[0]->offset(0, 1);
} else {
mask += top[0]->offset(0, 1);
}
}
}
break;
case PoolingParameter_PoolMethod_AVE:
// The main loop
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int ph = 0; ph < pooled_height_; ++ph) {
for (int pw = 0; pw < pooled_width_; ++pw) {
int hstart = ph * stride_h_ - pad_h_;
int wstart = pw * stride_w_ - pad_w_;
int hend = min(hstart + kernel_h_, height_ + pad_h_);
int wend = min(wstart + kernel_w_, width_ + pad_w_);
int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, height_);
wend = min(wend, width_);
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {//遍历pooling区域
//mean_pooling中，bottom的误差值按pooling窗口中的大小计算，从上一层进行填充后，再除窗口大小
bottom_diff[h * width_ + w] +=
top_diff[ph * pooled_width_ + pw] / pool_size;
//反向传播时各层间“误差敏感”总和不变，所以对应每个值需要平摊
}
}
}
}
// offset
bottom_diff += bottom[0]->offset(0, 1);
top_diff += top[0]->offset(0, 1);//指向下一个channel
}
}
break;
case PoolingParameter_PoolMethod_STOCHASTIC:
NOT_IMPLEMENTED;
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
}

#ifdef CPU_ONLY
STUB_GPU(PoolingLayer);
#endif

INSTANTIATE_CLASS(PoolingLayer);

}  // namespace caffe
内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理
标签：
相关文章推荐
新的分享
章节导航