您的位置:首页 > Web前端

Caffe源码解读:conv_layer的前向传播与反向传播

2017-05-08 19:44 549 查看
正向传播原理请见: http://blog.csdn.net/xg123321123/article/details/53319080

误差反向传播原理请见:https://zhuanlan.zhihu.com/p/22860936

下面直接上conv_layer.cpp代码:

//前向传播
template <typename Dtype>
void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
//blobs_[0]保存权值, blobs_[1]保存偏置
const Dtype* weight = this->blobs_[0]->cpu_data();
//bottom.size()是bottom中blob的数量,等于top中blob的数量
for (int i = 0; i < bottom.size(); ++i) {
//获取输入,输出数据指针
const Dtype* bottom_data = bottom[i]->cpu_data();
Dtype* top_data = top[i]->mutable_cpu_data();
//第n张图片
for (int n = 0; n < this->num_; ++n) {
//卷积操作,采用矩阵乘积实现
this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
top_data + n * this->top_dim_);
if (this->bias_term_) {
const Dtype* bias = this->blobs_[1]->cpu_data();
//加上偏置
this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
}
}
}
}
//反向传播
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down/*是否反传*/, const vector<Blob<Dtype>*>& bottom) {
const Dtype* weight = this->blobs_[0]->cpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
for (int i = 0; i < top.size(); ++i) {
//上一层传下来的导数
const Dtype* top_diff = top[i]->cpu_diff();
const Dtype* bottom_data = bottom[i]->cpu_data();
//传给下一层的导数
Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
// Bias gradient, if necessary.
// 更新偏置,直接加上残差(每个偏置所对应的图内所有残差之和)
if (this->bias_term_ && this->param_propagate_down_[1]) {
Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
for (int n = 0; n < this->num_; ++n) {
this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
}
}
if (this->param_propagate_down_[0] || propagate_down[i]) {
for (int n = 0; n < this->num_; ++n) {
// gradient w.r.t. weight. Note that we will accumulate diffs.
// 对weight 计算导数(用来更新weight)
// /将下一层残差与weight进行相关计算,得到卷积层的残差
if (this->param_propagate_down_[0]) {
this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
top_diff + n * this->top_dim_, weight_diff);
}
// gradient w.r.t. bottom data, if necessary.
// 对bottom数据计算导数(传给下一层)
// bottom_data与top_diff做相关计算,得到w权值更新量
if (propagate_down[i]) {
this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
bottom_diff + n * this->bottom_dim_);
}
}
}
}
}
//卷积操作
//用矩阵乘法实现的
template <typename Dtype>
void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input,
const Dtype* weights, Dtype* output, bool skip_im2col) {
const Dtype* col_buff = input;
if (!is_1x1_) {
if (!skip_im2col) {
// 如果没有1x1卷积,也没有skip_im2col
// 则使用conv_im2col_cpu对使用卷积核滑动过程中的每一个kernel大小的图像块
// 变成一个列向量,其中height=kernel_dim_
// width = 卷积后图像heght*卷积后图像width
conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());
}
col_buff = col_buffer_.cpu_data();
}
//使用caffe的cpu_gemm(调用cblas的矩阵乘法)来进行计算
for (int g = 0; g < group_; ++g) {
// 分组分别进行计算
// conv_out_channels_ / group_是每个卷积组的输出的channel
// kernel_dim_ = input channels per-group x kernel height x kernel width
// 计算的是output[output_offset_ * g] =
// weights[weight_offset_ * g] X col_buff[col_offset_ * g]
// weights的形状是 [conv_out_channel x kernel_dim_]
// col_buff的形状是[kernel_dim_ x (卷积后图像高度乘以卷积后图像宽度)]
// 所以output的形状自然就是conv_out_channel X (卷积后图像高度乘以卷积后图像宽度)
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ /
group_, conv_out_spatial_dim_, kernel_dim_,
(Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,
(Dtype)0., output + output_offset_ * g);
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: