2017-12-06 21:13 459 查看
@brief: Caffe全连接层ip2–>ip1反向传播(不包含ReluLayer)


对每一个输出神经元 yi=∑djwji∗xj+biasmultiplier∗bi,∴∂yi∂wji=xj, bi为偏置项共10个,∴∂yi∂bi=biasmultiplier=1(caffe中设置偏置项乘子为1)


top_diff为yi,i∈(1,10)的输入梯度(10∗64),bottom_data为xd,d∈(1,500)的输入(64∗500),this->blobs_[0]->mutable_cpu_diff()为要更新的梯度矩阵 W(10∗500)。




根据链式法则 ∵ 输入 xj 对输出 yi 都有贡献 ∴ 所有 yi 的输入梯度都要传到 xj 上。

#include <vector>

#include "caffe/filler.hpp"
#include "caffe/layers/inner_product_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int num_output = this->layer_param_.inner_product_param().num_output();//内积输出个数
bias_term_ = this->layer_param_.inner_product_param().bias_term();//默认为true
transpose_ = this->layer_param_.inner_product_param().transpose();//默认为false
N_ = num_output;//全连接层所具有的输出神经元数目
const int axis = bottom[0]->CanonicalAxisIndex(
// Dimensions starting from "axis" are "flattened" into a single
// length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
// and axis == 1, N inner products with dimension CHW are performed.
K_ = bottom[0]->count(axis);//如果是全连接层连到卷积层则是从axis向后的维度拉成一个向量 e.g ip1 64*50*4*4 则K_=50*4*4=800
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
if (bias_term_) {
} else {
// Initialize the weights
vector<int> weight_shape(2);
if (transpose_) {
weight_shape[0] = K_;
weight_shape[1] = N_;
} else {
weight_shape[0] = N_;
weight_shape[1] = K_;
this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
// fill the weights
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
// If necessary, intiialize and fill the bias term
if (bias_term_) {
vector<int> bias_shape(1, N_);
this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
} // parameter initialization
this->param_propagate_down_.resize(this->blobs_.size(), true);

template <typename Dtype>
void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Figure out the dimensions
const int axis = bottom[0]->CanonicalAxisIndex(
const int new_K = bottom[0]->count(axis);
CHECK_EQ(K_, new_K)
<< "Input size incompatible with inner product parameters.";
// The first "axis" dimensions are independent inner products; the total
// number of these is M_, the product over these dimensions.
M_ = bottom[0]->count(0, axis);//对于全连接至卷积:输出featuremap(三维张量)的个数(样本个数)
//FC->FC 样本个数
// The top shape will be the bottom shape with the flattened axes dropped,
// and replaced by a single axis with dimension num_output (N_).
vector<int> top_shape = bottom[0]->shape();
top_shape.resize(axis + 1);
top_shape[axis] = N_;
// Set up the bias multiplier
if (bias_term_) {
vector<int> bias_shape(1, M_);
caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());//设置偏置项乘子为1

template <typename Dtype>
void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
const Dtype* weight = this->blobs_[0]->cpu_data();
caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans,
M_, N_, K_, (Dtype)1.,
bottom_data, weight, (Dtype)0., top_data);//计算输出神经元的值
if (bias_term_) {//将偏置项加入到输出神经元的值
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
this->blobs_[1]->cpu_data(), (Dtype)1., top_data);

template <typename Dtype>
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (this->param_propagate_down_[0]) {
const Dtype* top_diff = top[0]->cpu_diff();//拿到顶层梯度指针
const Dtype* bottom_data = bottom[0]->cpu_data();//拿到输入神经元指针
// Gradient with respect to weight
if (transpose_) {
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
K_, N_, M_,
(Dtype)1., bottom_data, top_diff,
(Dtype)1., this->blobs_[0]->mutable_cpu_diff());
} else {
//top_diff N_ x M_ bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_
// C=alpha*A*B+beta*C
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, //计算梯度并更新权重矩阵 math_functions中是按照M、N、K来排列
N_, K_, M_,
(Dtype)1., top_diff, bottom_data,
(Dtype)1., this->blobs_[0]->mutable_cpu_diff());//this->blobs_[0]->mutable_cpu_diff() 保存权重矩阵
if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ * M_ ) *bias_multiplier_.cpu_data() M_ +beta*this->blobs_[1]->mutable_cpu_diff() N_ }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子 bias_multiplier_是一个值为1的单位向量
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
// Gradient with respect to bottom data
if (transpose_) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
M_, K_, N_,
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());
} else {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
M_, K_, N_,//严格按照math_functions中的参数顺序来
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());//bottom[0]->mutable_cpu_diff() M_ * K_ =top_diff M_ * N_ *this->blobs_[0]->cpu_data() N_ * K_

#ifdef CPU_ONLY


} // namespace caffe
