您的位置:首页 > Web前端

修改MTCNN中caffe源码,时计算loss时以对应标签是否为“-1”判别

2017-07-31 19:55 405 查看
MTCNN训练不收敛原因:
地址: https://github.com/dlunion/mtcnn

我们的训练数据标签格式:
wider face:
pos/001.jpg  1 x1 y1 x2 y2 (x1 y1 x2
y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2
y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
neg/001.jpg  0 -1 -1 -1 -1 (x1 y1 x2
y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

celebA:
landmark/001.jpg -1 -1 -1 -1 -1 pst1_x
pst1_y pst2_x pst2_y pst3_x
pst3_y pst4_x pst4_y pst5_x
pst5_y

作者要求的训练数据标签格式:

pos/001.jpg  1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x
pst1_y pst2_x pst2_y pst3_x
pst3_y pst4_x
pst4_y pst5_x
pst5_y

part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x
pst1_y pst2_x pst2_y pst3_x
pst3_y pst4_x
pst4_y pst5_x
pst5_y

neg/001.jpg  0 -1 -1 -1 -1 (x1 y1 x2 y2) pst1_x
pst1_y pst2_x pst2_y pst3_x
pst3_y pst4_x
pst4_y pst5_x
pst5_y

在“pts_loss”层(type:
"MTCNNEuclideanLoss")中,以"label"(分类的标签)来判断是否ignore。对于我们的训练数据标签格式:

class: ignore_label=-1, 可以正常分类;

bbox regression: ignore_label=0,
有landmark中-1参加计算,导致loss无法收敛;

landmark: ignore_label=0,
有part中-1参加计算,导致loss无法收敛;

解决思路:

在做class,bbox
regression,landmark任务时,判断便签值是否全部为—1,来作为ignore条件。

修改后"MTCNNEuclideanLoss.cpp"如下:

#include <vector>

#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

#include <iostream>
using namespace std;

namespace caffe {

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
<< "Inputs must have the same dimension.";

int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
if (has_ignore_label)
CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";

if (!has_ignore_label)
CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";

diff_.ReshapeLike(*bottom[0]);
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count();
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

if (has_ignore_label){
const Dtype* label = bottom[2]->cpu_data();
int countLabel = bottom[2]->num();

//label
Dtype* diff = diff_.mutable_cpu_data();
int channel = bottom[0]->channels();
//cout << "countLabel_forward: " << countLabel << endl;
//cout << "channel_forward: " << channel << endl;
//cout << "ignore_label_forward: " << ignore_label << endl;
memset(diff, 0, sizeof(Dtype)*count);

const Dtype* b0 = bottom[0]->cpu_data();
const Dtype* b1 = bottom[1]->cpu_data();
Dtype loss = 0;

// bbox regression
if (channel == 4)
{
for (int i = 0; i < countLabel; ++i)
{
//cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;

int dec = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label);
if ( dec==1 )
{
caffe_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
loss += dot / Dtype(2);

//cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
}
}
}

// landmark
else if (channel == 10)
{
for (int i = 0; i < countLabel; ++i)
{
//cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
//cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;

int dec1 = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label) && (b1[i*channel + 4] != ignore_label);
int dec2 = (b1[i*channel + 5] != ignore_label) && (b1[i*channel + 6] != ignore_label) && (b1[i*channel + 7] != ignore_label) && (b1[i*channel + 8] != ignore_label) && (b1[i*channel + 9] != ignore_label);
if (dec1==1 && dec2==1)
{
caffe_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
loss += dot / Dtype(2);

//cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
//cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
}
}
}

// ****************org data ********************
//for (int i = 0; i < countLabel; ++i){
// if (label[i] != ignore_label){
//  caffe_sub(
//	  channel,
//	  b0 + i * channel,
//	  b1 + i * channel,
//	  diff + i * channel);
//  Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
//  loss += dot / Dtype(2);
// }
//}
// ***************** ********************

top[0]->mutable_cpu_data()[0] = loss;
}
else{
caffe_sub(
count,
bottom[0]->cpu_data(),
bottom[1]->cpu_data(),
diff_.mutable_cpu_data());
Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());
Dtype loss = dot / bottom[0]->num() / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

if (has_ignore_label){
const Dtype* b1 = bottom[1]->cpu_data();
const Dtype* label = bottom[2]->cpu_data();
int countLabel = bottom[2]->num();
int channels = bottom[0]->channels();
//cout << "countLabel_backword: " << countLabel << endl;
//cout << "channels_backword: " << channels << endl;
//cout << "ignore_label_backword: " << ignore_label << endl;
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
memset(bottom[i]->mutable_cpu_diff(), 0, sizeof(Dtype)*bottom[i]->count());

const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();

// bbox regression
if (channels == 4)
{
for (int j = 0; j < countLabel; ++j)
{
int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
if (dec==1)
{
caffe_cpu_axpby(
channels,							// count
alpha,                              // alpha
diff_.cpu_data() + channels * j,                   // a
Dtype(0),                           // beta
bottom[i]->mutable_cpu_diff() + channels * j);  // b
}
}
}

// landmark
else if (channels == 10)
{
for (int j = 0; j < countLabel; ++j)
{
int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
if (dec1 == 1 && dec2 == 1)
{
caffe_cpu_axpby(
channels,							// count
alpha,                              // alpha
diff_.cpu_data() + channels * j,                   // a
Dtype(0),                           // beta
bottom[i]->mutable_cpu_diff() + channels * j);  // b
}
}
}

// ***********************org data********************
//for (int j = 0; j < countLabel; ++j){
//	if (label[j] != ignore_label){
//		caffe_cpu_axpby(
//			channels,							// count
//			alpha,                              // alpha
//			diff_.cpu_data() + channels * j,                   // a
//			Dtype(0),                           // beta
//			bottom[i]->mutable_cpu_diff() + channels * j);  // b
//	}
//}

}
}
}
else{
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_cpu_axpby(
bottom[i]->count(),              // count
alpha,                              // alpha
diff_.cpu_data(),                   // a
Dtype(0),                           // beta
bottom[i]->mutable_cpu_diff());  // b
}
}
}
}

#ifdef CPU_ONLY
STUB_GPU(MTCNNEuclideanLossLayer);
#endif

INSTANTIATE_CLASS(MTCNNEuclideanLossLayer);
REGISTER_LAYER_CLASS(MTCNNEuclideanLoss);

}  // namespace caffe


相应的"MTCNNEuclideanLoss.cu"如下:

#include <vector>

#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

#include <iostream>
using namespace std;

namespace caffe {

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
<< "Inputs must have the same dimension.";

int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
if (has_ignore_label)
CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";

if (!has_ignore_label)
CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";

diff_.ReshapeLike(*bottom[0]);
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count();
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

if (has_ignore_label){
//label
const Dtype* label = bottom[2]->cpu_data();
Dtype* diff = diff_.mutable_gpu_data();
int countLabel = bottom[2]->num();
int channel = bottom[0]->channels();
//cout << "ignore_label_forward: " << ignore_label << endl; //
caffe_gpu_memset(sizeof(Dtype)*count, 0, diff);

const Dtype* b0 = bottom[0]->gpu_data();
const Dtype* b1 = bottom[1]->gpu_data();
const Dtype* b1_cpu = bottom[1]->cpu_data();
Dtype loss = 0;

//cout << "channel_forward " << channel << endl;
// bbox regression
if (channel == 4)
{
for (int i = 0; i < countLabel; ++i)
{
//cout << "forware_b1_4: " << b1_cpu[i*channel + 0] << " " << b1_cpu[i*channel + 1] << " " << b1_cpu[i*channel + 2] << " " << b1_cpu[i*channel + 3] << endl;

int dec = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label);
if (dec == 1)
{
caffe_gpu_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot;
caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
loss += dot / Dtype(2);
}
}
}

// landmark
else if (channel == 10)
{
for (int i = 0; i < countLabel; ++i)
{
int dec1 = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label) && (b1_cpu[i*channel + 4] != ignore_label);
int dec2 = (b1_cpu[i*channel + 5] != ignore_label) && (b1_cpu[i*channel + 6] != ignore_label) && (b1_cpu[i*channel + 7] != ignore_label) && (b1_cpu[i*channel + 8] != ignore_label) && (b1_cpu[i*channel + 9] != ignore_label);
if (dec1 == 1 && dec2 == 1)
{
caffe_gpu_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot;
caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
loss += dot / Dtype(2);
}
}
}

// ***********************org data ********************
//for (int i = 0; i < countLabel; ++i){
// if (label[i] != ignore_label){
//  caffe_gpu_sub(
//	  channel,
//	  b0 + i * channel,
//	  b1 + i * channel,
//	  diff + i * channel);
//  Dtype dot;
//  caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
//  loss += dot / Dtype(2);
// }
//}
// ****************************  **********************

top[0]->mutable_cpu_data()[0] = loss;
}
else{
int count = bottom[0]->count();
caffe_gpu_sub(
count,
bottom[0]->gpu_data(),
bottom[1]->gpu_data(),
diff_.mutable_gpu_data());
Dtype dot;
caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);
Dtype loss = dot / bottom[0]->num() / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}
}

template <typename Dtype>
void MTCNNEuclideanLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

if (has_ignore_label){
const Dtype* b1 = bottom[1]->cpu_data();
const Dtype* label = bottom[2]->cpu_data();
int countLabel = bottom[2]->num();
int channels = bottom[0]->channels();
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
caffe_gpu_memset(sizeof(Dtype)*bottom[i]->count(), 0, bottom[i]->mutable_gpu_diff());

const Dtype sign = (i == 0) ? 1 : -1;

// bbox regression
if (channels == 4)
{
for (int j = 0; j < countLabel; ++j)
{
const Dtype alpha = sign * top[0]->cpu_diff()[0];
int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
if (dec == 1)
{
caffe_gpu_axpby(
channels,							// count
alpha,                              // alpha
diff_.gpu_data() + channels * j,                   // a
Dtype(0),                           // beta
bottom[i]->mutable_gpu_diff() + channels * j);  // b
}
}
}

// landmark
else if (channels == 10)
{
for (int j = 0; j < countLabel; ++j)
{
const Dtype alpha = sign * top[0]->cpu_diff()[0];
int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
if (dec1 == 1 && dec2 == 1)
{
caffe_gpu_axpby(
channels,							// count
alpha,                              // alpha
diff_.gpu_data() + channels * j,                   // a
Dtype(0),                           // beta
bottom[i]->mutable_gpu_diff() + channels * j);  // b
}
}
}

// ******************* org data**********************
//for (int j = 0; j < countLabel; ++j){
//	const Dtype alpha = sign * top[0]->cpu_diff()[0];
//	if (label[j] != ignore_label){
//		caffe_gpu_axpby(
//			channels,							// count
//			alpha,                              // alpha
//			diff_.gpu_data() + channels * j,                   // a
//			Dtype(0),                           // beta
//			bottom[i]->mutable_gpu_diff() + channels * j);  // b
//	}
//}

}
}
}
else{
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_gpu_axpby(
bottom[i]->count(),              // count
alpha,                              // alpha
diff_.gpu_data(),                   // a
Dtype(0),                           // beta
bottom[i]->mutable_gpu_diff());  // b
}
}
}
}

INSTANTIATE_LAYER_GPU_FUNCS(MTCNNEuclideanLossLayer);

}  // namespace caffe

小计,完成mtcnn_educlidean_loss_layer.cu的修改,发现:const Dtype* b1 = bottom[1]->gpu_data();,gpu读取的数据是不能打印和取出来的,改成cpu模式:const Dtype* b1_cpu = bottom[1]->cpu_data()就行了;
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: