Caffe源码解读:conv_layer的前向传播与反向传播
2017-05-08 19:44
549 查看
正向传播原理请见: http://blog.csdn.net/xg123321123/article/details/53319080
误差反向传播原理请见:https://zhuanlan.zhihu.com/p/22860936
下面直接上conv_layer.cpp代码:
误差反向传播原理请见:https://zhuanlan.zhihu.com/p/22860936
下面直接上conv_layer.cpp代码:
//前向传播 template <typename Dtype> void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //blobs_[0]保存权值, blobs_[1]保存偏置 const Dtype* weight = this->blobs_[0]->cpu_data(); //bottom.size()是bottom中blob的数量,等于top中blob的数量 for (int i = 0; i < bottom.size(); ++i) { //获取输入,输出数据指针 const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); //第n张图片 for (int n = 0; n < this->num_; ++n) { //卷积操作,采用矩阵乘积实现 this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, top_data + n * this->top_dim_); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->cpu_data(); //加上偏置 this->forward_cpu_bias(top_data + n * this->top_dim_, bias); } } } } //反向传播 template <typename Dtype> void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down/*是否反传*/, const vector<Blob<Dtype>*>& bottom) { const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); for (int i = 0; i < top.size(); ++i) { //上一层传下来的导数 const Dtype* top_diff = top[i]->cpu_diff(); const Dtype* bottom_data = bottom[i]->cpu_data(); //传给下一层的导数 Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); // Bias gradient, if necessary. // 更新偏置,直接加上残差(每个偏置所对应的图内所有残差之和) if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); for (int n = 0; n < this->num_; ++n) { this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_); } } if (this->param_propagate_down_[0] || propagate_down[i]) { for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. // 对weight 计算导数(用来更新weight) // /将下一层残差与weight进行相关计算,得到卷积层的残差 if (this->param_propagate_down_[0]) { this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_, top_diff + n * this->top_dim_, weight_diff); } // gradient w.r.t. bottom data, if necessary. // 对bottom数据计算导数(传给下一层) // bottom_data与top_diff做相关计算,得到w权值更新量 if (propagate_down[i]) { this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight, bottom_diff + n * this->bottom_dim_); } } } } }
//卷积操作 //用矩阵乘法实现的 template <typename Dtype> void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input, const Dtype* weights, Dtype* output, bool skip_im2col) { const Dtype* col_buff = input; if (!is_1x1_) { if (!skip_im2col) { // 如果没有1x1卷积,也没有skip_im2col // 则使用conv_im2col_cpu对使用卷积核滑动过程中的每一个kernel大小的图像块 // 变成一个列向量,其中height=kernel_dim_ // width = 卷积后图像heght*卷积后图像width conv_im2col_cpu(input, col_buffer_.mutable_cpu_data()); } col_buff = col_buffer_.cpu_data(); } //使用caffe的cpu_gemm(调用cblas的矩阵乘法)来进行计算 for (int g = 0; g < group_; ++g) { // 分组分别进行计算 // conv_out_channels_ / group_是每个卷积组的输出的channel // kernel_dim_ = input channels per-group x kernel height x kernel width // 计算的是output[output_offset_ * g] = // weights[weight_offset_ * g] X col_buff[col_offset_ * g] // weights的形状是 [conv_out_channel x kernel_dim_] // col_buff的形状是[kernel_dim_ x (卷积后图像高度乘以卷积后图像宽度)] // 所以output的形状自然就是conv_out_channel X (卷积后图像高度乘以卷积后图像宽度) caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ / group_, conv_out_spatial_dim_, kernel_dim_, (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, (Dtype)0., output + output_offset_ * g); } }
相关文章推荐
- Caffe源码解读:relu_layer前向传播和反向传播
- Caffe源码解读:pooling_layer的前向传播与反向传播
- Caffe源码解读: SoftmaxLayer的前向与反向传播
- Caffe源码解读:dropout_layer的正向传播和反向传播
- Caffe源码解读: Softmax_loss_Layer的前向与反向传播
- caffe源码-conv_layer
- Caffe源码解读3--layer.hpp
- caffe源码解读(2)-center_loss_layer.cpp
- caffe源码解读(3)-contrastive_loss_layer.cpp
- caffe源码解读(9)-euclidean_loss_layer.cpp
- caffe源码:base_conv_layer
- caffe源码解读(10)-hinge_loss_layer.cpp
- Caffe源码解析5:Conv_Layer
- Caffe源码解读(十一):自定义一个layer
- caffe源码解读(5)-image_data_layer.cpp
- caffe 源码学习(4)_反向传播权重更新
- caffe源码解读(4)-concate_layer.cpp以及slice_layer.cpp
- Caffe源码(五):conv_layer 分析
- caffe源码解读(11)-triplet_loss_layer.cpp
- Caffe源码解读(十三):caffe.proto中Layer参数与源代码中Layer的相互关系