您的位置:首页 > Web前端

Caffe源码:inner_product_layer.cpp

2017-12-06 21:13 459 查看
@brief: Caffe全连接层ip2–>ip1反向传播(不包含ReluLayer)



以LeNet全连接层反向传播为例,ip2–>ip1,如图示,输入神经元个数500,输出神经元个数10,batchsize为64,则要更新的权重矩阵为500x10。

对每一个输出神经元 yi=∑djwji∗xj+biasmultiplier∗bi,∴∂yi∂wji=xj, bi为偏置项共10个,∴∂yi∂bi=biasmultiplier=1(caffe中设置偏置项乘子为1)

对权重矩阵求梯度核心代码:

//this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()
//top_diff N_ x M_  bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_
// C=alpha*A*B+beta*C
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
N_, K_, M_,
(Dtype)1., top_diff, bottom_data,
(Dtype)1., this->blobs_[0]->mutable_cpu_diff());


top_diff为yi,i∈(1,10)的输入梯度(10∗64),bottom_data为xd,d∈(1,500)的输入(64∗500),this->blobs_[0]->mutable_cpu_diff()为要更新的梯度矩阵 W(10∗500)。

采用矩阵形式运算,运算并更新



this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()


对偏置项求梯度核心代码:

if (bias_term_ && this->param_propagate_down_[1]) {
const Dtype* top_diff = top[0]->cpu_diff();
// Gradient with respect to bias
caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
bias_multiplier_.cpu_data(), (Dtype)1.,
this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ *  M_ ) *bias_multiplier_.cpu_data()  M_  +beta*this->blobs_[1]->mutable_cpu_diff()  N_
}//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子




对输入求梯度核心代码:

根据链式法则 ∵ 输入 xj 对输出 yi 都有贡献 ∴ 所有 yi 的输入梯度都要传到 xj 上。

if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
// Gradient with respect to bottom data
if (transpose_) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
M_, K_, N_,
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());
} else {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
M_, K_, N_,
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());
}
}




#include <vector>

#include "caffe/filler.hpp"
#include "caffe/layers/inner_product_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int num_output = this->layer_param_.inner_product_param().num_output();//内积输出个数
bias_term_ = this->layer_param_.inner_product_param().bias_term();//默认为true
transpose_ = this->layer_param_.inner_product_param().transpose();//默认为false
N_ = num_output;//全连接层所具有的输出神经元数目
const int axis = bottom[0]->CanonicalAxisIndex(
this->layer_param_.inner_product_param().axis());
// Dimensions starting from "axis" are "flattened" into a single
// length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
// and axis == 1, N inner products with dimension CHW are performed.
K_ = bottom[0]->count(axis);//如果是全连接层连到卷积层则是从axis向后的维度拉成一个向量 e.g ip1 64*50*4*4 则K_=50*4*4=800
//如果是全连接层连全连接层则是输入全连接层所有的输入神经元数目
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
if (bias_term_) {
this->blobs_.resize(2);
} else {
this->blobs_.resize(1);
}
// Initialize the weights
vector<int> weight_shape(2);
if (transpose_) {
weight_shape[0] = K_;
weight_shape[1] = N_;
} else {
weight_shape[0] = N_;
weight_shape[1] = K_;
}
this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
// fill the weights
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
this->layer_param_.inner_product_param().weight_filler()));
weight_filler->Fill(this->blobs_[0].get());//对全连接层的权重矩阵初始化
//debug info
/*Blob<Dtype>* blobip = this->blobs_[0].get();
Dtype* pointerip = blobip->mutable_cpu_data();
for (int i = 0; i < 400000; i++)
std::cout << "The num at " << i << " is " << *(pointerip + i);*/
// If necessary, intiialize and fill the bias term
if (bias_term_) {
vector<int> bias_shape(1, N_);
this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
this->layer_param_.inner_product_param().bias_filler()));
bias_filler->Fill(this->blobs_[1].get());//对全连接层的偏置初始化
////debug info
//Blob<Dtype>* blobip = this->blobs_[1].get();
//Dtype* pointerip = blobip->mutable_cpu_data();
//for (int i = 0; i < blobip->count(); i++)
//std::cout << "The num at " << i << " is " << *(pointerip + i);
}
} // parameter initialization
this->param_propagate_down_.resize(this->blobs_.size(), true);
}

template <typename Dtype>
void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Figure out the dimensions
const int axis = bottom[0]->CanonicalAxisIndex(
this->layer_param_.inner_product_param().axis());
const int new_K = bottom[0]->count(axis);
CHECK_EQ(K_, new_K)
<< "Input size incompatible with inner product parameters.";
// The first "axis" dimensions are independent inner products; the total
// number of these is M_, the product over these dimensions.
M_ = bottom[0]->count(0, axis);//对于全连接至卷积:输出featuremap(三维张量)的个数(样本个数)
//FC->FC 样本个数
// The top shape will be the bottom shape with the flattened axes dropped,
// and replaced by a single axis with dimension num_output (N_).
vector<int> top_shape = bottom[0]->shape();
top_shape.resize(axis + 1);
top_shape[axis] = N_;
top[0]->Reshape(top_shape);
// Set up the bias multiplier
if (bias_term_) {
vector<int> bias_shape(1, M_);
bias_multiplier_.Reshape(bias_shape);
caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());//设置偏置项乘子为1
}
}

template <typename Dtype>
void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
const Dtype* weight = this->blobs_[0]->cpu_data();
caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans,
M_, N_, K_, (Dtype)1.,
bottom_data, weight, (Dtype)0., top_data);//计算输出神经元的值
if (bias_term_) {//将偏置项加入到输出神经元的值
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
bias_multiplier_.cpu_data(),
this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
}
}

template <typename Dtype>
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (this->param_propagate_down_[0]) {
const Dtype* top_diff = top[0]->cpu_diff();//拿到顶层梯度指针
const Dtype* bottom_data = bottom[0]->cpu_data();//拿到输入神经元指针
// Gradient with respect to weight
if (transpose_) {
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
K_, N_, M_,
(Dtype)1., bottom_data, top_diff,
(Dtype)1., this->blobs_[0]->mutable_cpu_diff());
} else {
//this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()
//top_diff N_ x M_ bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_
// C=alpha*A*B+beta*C
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, //计算梯度并更新权重矩阵 math_functions中是按照M、N、K来排列
N_, K_, M_,
(Dtype)1., top_diff, bottom_data,
(Dtype)1., this->blobs_[0]->mutable_cpu_diff());//this->blobs_[0]->mutable_cpu_diff() 保存权重矩阵
}
}
if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ * M_ ) *bias_multiplier_.cpu_data() M_ +beta*this->blobs_[1]->mutable_cpu_diff() N_ }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子 bias_multiplier_是一个值为1的单位向量
//const Dtype* p = bias_multiplier_.cpu_data();
//std::cout << *(p + 1);

if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
// Gradient with respect to bottom data
if (transpose_) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
M_, K_, N_,
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());
} else {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
M_, K_, N_,//严格按照math_functions中的参数顺序来
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());//bottom[0]->mutable_cpu_diff() M_ * K_ =top_diff M_ * N_ *this->blobs_[0]->cpu_data() N_ * K_
}
}
}

#ifdef CPU_ONLY
STUB_GPU(InnerProductLayer);
#endif

INSTANTIATE_CLASS(InnerProductLayer);
REGISTER_LAYER_CLASS(InnerProduct);

} // namespace caffe
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: