梳理caffe代码pooling_layer(二十)
2016-06-04 12:53
573 查看
pooling层的代码实现:
#include <algorithm> #include <cfloat> #include <vector> #include "caffe/layers/pooling_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { using std::min; using std::max; template <typename Dtype> void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { PoolingParameter pool_param = this->layer_param_.pooling_param(); if (pool_param.global_pooling()) { CHECK(!(pool_param.has_kernel_size() || pool_param.has_kernel_h() || pool_param.has_kernel_w())) << "With Global_pooling: true Filter size cannot specified";//全局pooling可能是因为有了NIN } else { CHECK(!pool_param.has_kernel_size() != !(pool_param.has_kernel_h() && pool_param.has_kernel_w())) << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; CHECK(pool_param.has_kernel_size() || (pool_param.has_kernel_h() && pool_param.has_kernel_w())) << "For non-square filters both kernel_h and kernel_w are required."; } CHECK((!pool_param.has_pad() && pool_param.has_pad_h() && pool_param.has_pad_w()) || (!pool_param.has_pad_h() && !pool_param.has_pad_w())) << "pad is pad OR pad_h and pad_w are required."; CHECK((!pool_param.has_stride() && pool_param.has_stride_h() && pool_param.has_stride_w()) || (!pool_param.has_stride_h() && !pool_param.has_stride_w())) << "Stride is stride OR stride_h and stride_w are required."; global_pooling_ = pool_param.global_pooling(); if (global_pooling_) { kernel_h_ = bottom[0]->height(); kernel_w_ = bottom[0]->width(); } else { if (pool_param.has_kernel_size()) { kernel_h_ = kernel_w_ = pool_param.kernel_size(); } else { kernel_h_ = pool_param.kernel_h(); kernel_w_ = pool_param.kernel_w(); }//用户自定义的kernel大小 } CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; if (!pool_param.has_pad_h()) { pad_h_ = pad_w_ = pool_param.pad(); } else { pad_h_ = pool_param.pad_h(); pad_w_ = pool_param.pad_w(); }//填充 if (!pool_param.has_stride_h()) { stride_h_ = stride_w_ = pool_param.stride(); } else { stride_h_ = pool_param.stride_h(); stride_w_ = pool_param.stride_w(); }//步长 if (global_pooling_) { CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1) << "With Global_pooling: true; only pad = 0 and stride = 1"; } if (pad_h_ != 0 || pad_w_ != 0) { CHECK(this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_AVE || this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) << "Padding implemented only for average and max pooling."; CHECK_LT(pad_h_, kernel_h_); CHECK_LT(pad_w_, kernel_w_); } }//初始化一些参数 template <typename Dtype> void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); if (global_pooling_) { kernel_h_ = bottom[0]->height(); kernel_w_ = bottom[0]->width(); } pooled_height_ = static_cast<int>(ceil(static_cast<float>( height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1; pooled_width_ = static_cast<int>(ceil(static_cast<float>( width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1; //pooling之后的height 和 width if (pad_h_ || pad_w_) { // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) { --pooled_height_; } if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) { --pooled_width_; } CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_); CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_); } top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_, pooled_width_);//输出top blob 的shape if (top.size() > 1) { top[1]->ReshapeLike(*top[0]); } // If max pooling, we will initialize the vector index part. if (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_MAX && top.size() == 1) { max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_, pooled_width_); }//max pooling 反向求导时要用到取最大值的位置,max_idx_就是记录pooling过程中取max value 的index , //它是一个int型的blob 和输出top具有相同的shape // If stochastic pooling(随机pooling), we will initialize the random index part. if (this->layer_param_.pooling_param().pool() == PoolingParameter_PoolMethod_STOCHASTIC) { rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_, pooled_width_); } } // TODO(Yangqing): Is there a faster way to do pooling in the channel-first // case? template <typename Dtype> void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const int top_count = top[0]->count(); // We'll output the mask to top[1] if it's of size >1. const bool use_top_mask = top.size() > 1; int* mask = NULL; // suppress warnings about uninitalized variables Dtype* top_mask = NULL; // Different pooling methods. We explicitly do the switch outside the for // loop to save time, although this results in more code. switch (this->layer_param_.pooling_param().pool()) { case PoolingParameter_PoolMethod_MAX: // Initialize if (use_top_mask) { top_mask = top[1]->mutable_cpu_data(); caffe_set(top_count, Dtype(-1), top_mask); } else { mask = max_idx_.mutable_cpu_data(); //模板类Blob的mutable_cpu_diff()方法中使用了强制类型转换static_cast<Dtype*>() caffe_set(top_count, -1, mask); } //(*1)设为负无穷 caffe_set(top_count, Dtype(-FLT_MAX), top_data);//FLT_MAX在头文件#include <cfloat>中定义 // The main loop for (int n = 0; n < bottom[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { int hstart = ph * stride_h_ - pad_h_; int wstart = pw * stride_w_ - pad_w_; int hend = min(hstart + kernel_h_, height_); int wend = min(wstart + kernel_w_, width_); //这四个量给出未pooling矩阵中确定pooling区域的两个顶点。 hstart = max(hstart, 0); wstart = max(wstart, 0);//一般情况下从0开始,而不是从负下标开始 //caffe 数据存储是一维数组的形式 //ph为pooling后输出top的height index,pool_index为对应一维数组index。 const int pool_index = ph * pooled_width_ + pw;//池化后的(输出)特征图中元素的位置索引 for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { const int index = h * width_ + w;//输入特征图中元素的位置索引 //对应一维数组的index if (bottom_data[index] > top_data[pool_index]) { top_data[pool_index] = bottom_data[index]; //由(*1)可知该循环将bottom中pooling区域(kernel的大小)的最大值放到对应top if (use_top_mask) { top_mask[pool_index] = static_cast<Dtype>(index); } else { mask[pool_index] = index;//每次Max_pooling操作最大元素的位置索引 } } } } } } // compute offset bottom_data += bottom[0]->offset(0, 1); //每次通过offset来确定新的bottom_data地址,offset()函数返回的其实仅仅是一个整数,大小为 //一个channel的元素的个数。也就是这样一个channel一个channel得遍历整个Blob。 top_data += top[0]->offset(0, 1); if (use_top_mask) { top_mask += top[0]->offset(0, 1); } else { mask += top[0]->offset(0, 1); //取下一个channel的mask } } } break; case PoolingParameter_PoolMethod_AVE: for (int i = 0; i < top_count; ++i) { top_data[i] = 0;//将top初始化为0 } // The main loop for (int n = 0; n < bottom[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { int hstart = ph * stride_h_ - pad_h_; int wstart = pw * stride_w_ - pad_w_; int hend = min(hstart + kernel_h_, height_ + pad_h_); int wend = min(wstart + kernel_w_, width_ + pad_w_); int pool_size = (hend - hstart) * (wend - wstart); //pooling 区域的element 个数 hstart = max(hstart, 0); wstart = max(wstart, 0); hend = min(hend, height_); wend = min(wend, width_); for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { top_data[ph * pooled_width_ + pw] += bottom_data[h * width_ + w]; } //将pooling区域的element个数加起来 } top_data[ph * pooled_width_ + pw] /= pool_size; } } // compute offset bottom_data += bottom[0]->offset(0, 1); top_data += top[0]->offset(0, 1); } } break; case PoolingParameter_PoolMethod_STOCHASTIC: NOT_IMPLEMENTED; break; default: LOG(FATAL) << "Unknown pooling method."; } } template <typename Dtype> void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { //propagate_down的妙处于此!caffe.proto里面也有一个相同名字的定义 if (!propagate_down[0]) { return; } const Dtype* top_diff = top[0]->cpu_diff(); //模板类Blob的mutable_cpu_diff()方法中使用了强制类型转换 Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();//初始化bottom_diff 为0 // Different pooling methods. We explicitly do the switch outside the for // loop to save time, although this results in more codes. caffe_set(bottom[0]->count(), Dtype(0), bottom_diff); // We'll output the mask to top[1] if it's of size >1. const bool use_top_mask = top.size() > 1; const int* mask = NULL; // suppress warnings about uninitialized variables const Dtype* top_mask = NULL; switch (this->layer_param_.pooling_param().pool()) { case PoolingParameter_PoolMethod_MAX: // The main loop if (use_top_mask) { top_mask = top[1]->cpu_data(); } else { mask = max_idx_.cpu_data();//取数据成员max_idx_的地址 } for (int n = 0; n < top[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { const int index = ph * pooled_width_ + pw;//这里的index是前向传播池化后的特征图中元素的位置索引 const int bottom_index = use_top_mask ? top_mask[index] : mask[index]; bottom_diff[bottom_index] += top_diff[index];//计算“敏感值”分布 } } bottom_diff += bottom[0]->offset(0, 1); top_diff += top[0]->offset(0, 1); //指向下一个channel if (use_top_mask) { top_mask += top[0]->offset(0, 1); } else { mask += top[0]->offset(0, 1); } } } break; case PoolingParameter_PoolMethod_AVE: // The main loop for (int n = 0; n < top[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { int hstart = ph * stride_h_ - pad_h_; int wstart = pw * stride_w_ - pad_w_; int hend = min(hstart + kernel_h_, height_ + pad_h_); int wend = min(wstart + kernel_w_, width_ + pad_w_); int pool_size = (hend - hstart) * (wend - wstart); hstart = max(hstart, 0); wstart = max(wstart, 0); hend = min(hend, height_); wend = min(wend, width_); for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) {//遍历pooling区域 //mean_pooling中,bottom的误差值按pooling窗口中的大小计算,从上一层进行填充后,再除窗口大小 bottom_diff[h * width_ + w] += top_diff[ph * pooled_width_ + pw] / pool_size; //反向传播时各层间“误差敏感”总和不变,所以对应每个值需要平摊 } } } } // offset bottom_diff += bottom[0]->offset(0, 1); top_diff += top[0]->offset(0, 1);//指向下一个channel } } break; case PoolingParameter_PoolMethod_STOCHASTIC: NOT_IMPLEMENTED; break; default: LOG(FATAL) << "Unknown pooling method."; } } #ifdef CPU_ONLY STUB_GPU(PoolingLayer); #endif INSTANTIATE_CLASS(PoolingLayer); } // namespace caffe
相关文章推荐
- 性能卓越的js模板引擎--artTemplate
- jQuery源码解读二
- 浅谈html/CSS
- 重写alert,confirm 提示框样式
- JS动态添加div、li、img等元素及设置属性(实例)
- jQuery+CSS3实现404背景动画特效
- 让IE6 IE7 IE8 IE9 IE10 IE11支持Bootstrap的解决方法
- springmvc+bootstrap风格的树形地区
- webpack + react 优化:缩小js包体积
- JSP 使用%@include%报Duplicate local variable path 错误 解决方法 .
- html5的Form新特性
- DOM文档对象模型
- 梳理caffe代码conv_layer(十九)
- Jquery使用trigger()方法模拟事件
- 计算 JS 数组中最大最小值
- 修改 Input placeholder 的样式
- jQuery Mobile 触摸事件实例
- JavaScript对象总结一
- JS设计模式书籍、原则
- JavaScript之DOM实践