您的位置：首页 > 产品设计 > 产品经理

DPM(Defomable Parts Model) 源码分析-训练（三）

2017-08-13 19:12 477 查看

原文转载自：http://blog.csdn.NET/ttransposition/article/details/12954631

DPM(Defomable Parts Model)原理

首先调用格式：

example:

pascal('person', 2); % train and evaluate a 2 component person model

pascal_train.m

[cpp] view
plaincopy

function model = pascal_train(cls, n) % n=2

% model = pascal_train(cls)

% Train a model using the PASCAL dataset.

globals;

%----------读取正负样本-----------------------

% pos.im,neg.im存储了图像路径，pos.x1..pos.y2为box,负样本无box

[pos, neg] = pascal_data(cls);

% 按照长宽比，分成等量的两部分? 即将 component label 固定，phase2时，该值为latent variable。 spos为索引

spos = split(pos, n);

% -----------phase 1 : train root filters using warped positives & random negatives-----------

try

load([cachedir cls '_random']);

catch

% -----------------------------phas 1--------------------------------

% 初始化 rootfilters

for i=1:n

models{i} = initmodel(spos{i});

%---------train-------------

% model.rootfilters{i}.w

% model.offsets{i}.w

models{i} = train(cls, models{i}, spos{i}, neg, 1, 1, 1, 1, 2^28);

end

save([cachedir cls '_random'], 'models');

end

% -----------------phase2-------------------------------------------

% :merge models and train using latent detections &
20000
hard negatives

try

load([cachedir cls '_hard']);

catch

model = mergemodels(models);

model = train(cls, model, pos, neg(1:200), 0, 0, 2, 2, 2^28, true, 0.7);

save([cachedir cls '_hard'], 'model');

end

%----------------phase 3----------------------------------------------

% add parts and update models using latent detections & hard negatives.

try

load([cachedir cls '_parts']);

catch

for i=1:n

model = addparts(model, i, 6);

end

% use more data mining iterations in the beginning

model = train(cls, model, pos, neg(1:200), 0, 0, 1, 4, 2^30, true, 0.7);

model = train(cls, model, pos, neg(1:200), 0, 0, 6, 2, 2^30, true, 0.7, true);

save([cachedir cls '_parts'], 'model');

end

% update models using full set of negatives.

try

load([cachedir cls '_mine']);

catch

model = train(cls, model, pos, neg, 0, 0, 1, 3, 2^30, true, 0.7, true, ...

0.003*model.numcomponents, 2);

save([cachedir cls '_mine'], 'model');

end

% train bounding box prediction

try

load([cachedir cls '_final']);

catch

% 论文中说用最小二乘，怎么直接相除了，都不考虑矩阵的奇异性

model = trainbox(cls, model, pos, 0.7);

save([cachedir cls '_final'], 'model');

end

initmodel.m

[cpp] view
plaincopy

function model = initmodel(pos, sbin, size)

% model = initmodel(pos, sbin, size)

% Initialize model structure.

%

% If not supplied the dimensions of the model template are computed

% from statistics in the postive examples.

%

% This should be documented! :-)

% model.sbin 8

% model.interval 10

% model.numblocks phase 1 ：单独训练rootfilter时为2，offset,rootfilter；phase 2，为 4

% model.numcomponents 1

% model.blocksizes （1）=1，（2）= root.h*root.w/2*31

% model.regmult 0,1

% model.learnmult 20,1

% model.maxsize root 的size

% model.minsize

% model.rootfilters{i}

% .size 以sbin为单位，尺寸为综合各样本的h/w，area计算出来的

% .w

% .blocklabel blocklabel是为编号，offset（2）,rootfilter（2）,partfilter（12 or less）,def （12 same as part）虽然意义不同但是放在一起统一编号

% model.partfilters{i}

% .w

% .blocklabel

% model.defs{i}

% .anchor

% .w

% .blocklabel

% model.offsets{i}

% .w 0

% .blocklabel 1

% model.components{i}

% .rootindex 1

% .parts{j}

% .partindex

% .defindex

% .offsetindex 1

% .dim 2 + model.blocksizes(1) + model.blocksizes(2)

% .numblocks 2

% pick mode of aspect ratios

h = [pos(:).y2]' - [pos(:).y1]' + 1;

w = [pos(:).x2]' - [pos(:).x1]' + 1;

xx = -2:.02:2;

filter = exp(-[-100:100].^2/400); % e^-25,e^25

aspects = hist(log(h./w), xx); %

aspects = convn(aspects, filter, 'same');

[peak, I] = max(aspects);

aspect = exp(xx(I)); %滤波后最大的h/w，作为最典型的h/w

% pick 20 percentile area

areas = sort(h.*w);

area = areas(floor(length(areas) * 0.2)); % 比它大的，可以缩放，比该尺寸小的呢？

area = max(min(area, 5000), 3000); %限制在 3000-5000

% pick dimensions

w = sqrt(area/aspect);

h = w*aspect;

% size of HOG features

if nargin < 4

model.sbin = 8;

else

model.sbin = sbin;

end

% size of root filter

if nargin < 5

model.rootfilters{1}.size = [round(h/model.sbin) round(w/model.sbin)];

else

model.rootfilters{1}.size = size;

end

% set up offset

model.offsets{1}.w = 0;

model.offsets{1}.blocklabel = 1;

model.blocksizes(1) = 1;

model.regmult(1) = 0;

model.learnmult(1) = 20;

model.lowerbounds{1} = -100;

% set up root filter

model.rootfilters{1}.w = zeros([model.rootfilters{1}.size 31]);

height = model.rootfilters{1}.size(1);

% root filter is symmetricf

width = ceil(model.rootfilters{1}.size(2)/2); % ？？？ /2

model.rootfilters{1}.blocklabel = 2;

model.blocksizes(2) = width * height * 31;

model.regmult(2) = 1;

model.learnmult(2) = 1;

model.lowerbounds{2} = -100*ones(model.blocksizes(2),1);

% set up one component model

model.components{1}.rootindex = 1;

model.components{1}.offsetindex = 1;

model.components{1}.parts = {};

model.components{1}.dim = 2 + model.blocksizes(1) + model.blocksizes(2);

model.components{1}.numblocks = 2;

% initialize the rest of the model structure

model.interval = 10;

model.numcomponents = 1;

model.numblocks = 2;

model.partfilters = {};

model.defs = {};

model.maxsize = model.rootfilters{1}.size;

model.minsize = model.rootfilters{1}.size;

learn.cc

[cpp] view
plaincopy

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <math.h>

#include <sys/time.h>

#include <errno.h>

/*

* Optimize LSVM objective function via gradient descent.

*

* We use an adaptive cache mechanism. After a negative example

* scores beyond the margin multiple times it is removed from the

* training set for a fixed number of iterations.

*/

// Data File Format

// EXAMPLE*

//

// EXAMPLE:

// long label ints

// blocks int

// dim int

// DATA{blocks}

//

// DATA:

// block label float

// block data floats

//

// Internal Binary Format

// len int (byte length of EXAMPLE)

// EXAMPLE <see above>

// unique flag byte

// number of iterations

#define ITER 5000000

// small cache parameters

#define INCACHE 3

#define WAIT 10

// error checking

#define check(e) \

(e ? (void)0 : (printf("%s:%u error: %s\n%s\n", __FILE__, __LINE__, #e, strerror(errno)), exit(1)))

// number of non-zero blocks in example ex

#define NUM_NONZERO(ex) (((int *)ex)[labelsize+1])

// float pointer to data segment of example ex

#define EX_DATA(ex) ((float *)(ex + sizeof(int)*(labelsize+3)))

// class label (+1 or -1) for the example

#define LABEL(ex) (((int *)ex)[1])

// block label (converted to 0-based index)

#define BLOCK_IDX(data) (((int)data[0])-1)

int labelsize;

int dim;

// comparison function for sorting examples

// 参见 http://blog.sina.com.cn/s/blog_5155e8d401009145.html
int comp(const void *a, const void *b) {

// sort by extended label first, and whole example second...

//逐字节比较的，当buf1<buf2时，返回值<0，当buf1=buf2时，返回值=0，当buf1>buf2时，返回值>0

// 先比较这五个量 [label id level x y]，也就是说按照样本类别->id->level->x->y排序样本

int c = memcmp(*((char **)a) + sizeof(int),

*((char **)b) + sizeof(int),

labelsize*sizeof(int));// 5

if (c) //label 不相等

return c;

// labels are the same ，怎么可能会一样呢 id在正负样本集内从1开始是递增的啊 phase 2 阶段同一张图片产生的样本，id都是一样的

int alen = **((int **)a);

int blen = **((int **)b);

if (alen == blen) //长度一样

return memcmp(*((char **)a) + sizeof(int),

*((char **)b) + sizeof(int),

alen); //真霸气，所有字节都比较……

return ((alen < blen) ? -1 : 1);//按长度排序

}

// a collapsed example is a sequence of examples

struct collapsed {

char **seq;

int num;

};

// set of collapsed examples

struct data {

collapsed *x;

int num;

int numblocks;

int *blocksizes;

float *regmult;

float *learnmult;

};

// seed the random number generator with the current time

void seed_time() {

struct timeval tp;

check(gettimeofday(&tp, NULL) == 0);

srand48((long)tp.tv_usec);

}

static inline double min(double x, double y) { return (x <= y ? x : y); }

static inline double max(double x, double y) { return (x <= y ? y : x); }

// gradient descent

//---------------参照论文公式17 后的步骤---------------------------------------

void gd(double C, double J, data X, double **w, double **lb) {

// C=0.0002, J=1, X, w==0, lb==-100);

//

int num = X.num; //组数

// state for random permutations

int *perm = (int *)malloc(sizeof(int)*X.num);

check(perm != NULL);

// state for small cache

int *W = (int *)malloc(sizeof(int)*num);

check(W != NULL);

for (int j = 0; j < num; j++)

W[j] = 0;

int t = 0;

while (t < ITER) { // 5000000 ，霸气……

// pick random permutation

for (int i = 0; i < num; i++) //组数

perm[i] = i;

//-------打乱顺序-----

// 论文中是随机选择一个样本，这里是随机排好序，再顺序取。

// 类似于随机取，但是这里能保证取到全部样本，避免单个样本重复被抽到，重复作用

for (int swapi = 0; swapi < num; swapi++) {

int swapj = (int)(drand48()*(num-swapi)) + swapi; //drand48 产生 0-1之间的均匀分布

int tmp = perm[swapi];

perm[swapi] = perm[swapj];

perm[swapj] = tmp;

}

// count number of examples in the small cache

int cnum = 0; //下面的循环部分的实际循环次数

for (int i = 0; i < num; i++) {

if (W[i] <= INCACHE) // 3

cnum++;

}

//-------------------------------------------------------

for (int swapi = 0; swapi < num; swapi++) {

// select example

int i = perm[swapi];

collapsed x = X.x[i];

// skip if example is not in small cache

//负样本分对一次+1，分错一次清为0

//连续三次都分对了，那么这个样本很有可能是 easy 样本

//直接让他罚停四次迭代

if (W[i] > INCACHE) { //3

W[i]--;

continue;

}

// learning rate

double T = t + 1000.0; //学习率，直接1/t太大了

double rateX = cnum * C / T;

double rateR = 1.0 / T;

if (t % 10000 == 0) {

printf(".");

fflush(stdout); //清除文件缓冲区，文件以写方式打开时将缓冲区内容写入文件

}

t++;

// compute max over latent placements

// -----step 3----

int M = -1;

double V = 0;

// 组内循环，选择 Zi=argmax β*f 即文中的第3部

// 训练rootfiter时，x.num=1,因为随机产生的负样本其id不同

for (int m = 0; m < x.num; m++) {

double val = 0;

char *ptr = x.seq[m];

float *data = EX_DATA(ptr); //特征数据的地址第9个数据开始，

//后面跟着是 block1 label | block2 data|block2 lable | block2 data

// 1 | 1 | 2 | h*w/2*31个float

int blocks = NUM_NONZERO(ptr); // phase 1，phase 2 : 2 个,offset,rootfilter

for (int j = 0; j < blocks; j++) {

int b = BLOCK_IDX(data); //

data++;

for (int k = 0; k < X.blocksizes[b]; k++)//（1）=1，（2）= root.h*root.w/2*31

val += w[b][k] * data[k]; //第一次循环是0

data += X.blocksizes[b];

}

if (M < 0 || val > V) {

M = m;

V = val;

}

}

// update model

//-----step.4 也算了step.5 的一半 ---------------

// 梯度下降，减小 w

for (int j = 0; j < X.numblocks; j++) {// 2

double mult = rateR * X.regmult[j] * X.learnmult[j]; // 0,1 20,1,1/T，对于block2,学习率at就是 1/t,block 1 为0

for (int k = 0; k < X.blocksizes[j]; k++) {

w[j][k] -= mult * w[j][k]; //不管是分对了，还是分错了，都要减掉 at*β,见公式17下的4,5

}

}

char *ptr = x.seq[M];

int label = LABEL(ptr);

//----step.5----------分错了，往梯度的负方向移动

if (label * V < 1.0)

{

W[i] = 0;

float *data = EX_DATA(ptr);

int blocks = NUM_NONZERO(ptr);

for (int j = 0; j < blocks; j++) {

int b = BLOCK_IDX(data);

// yi*cnum * C / T*1,见论文中公式16,17

double mult = (label > 0 ? J : -1) * rateX * X.learnmult[b];

data++;

for (int k = 0; k < X.blocksizes[b]; k++)

w[b][k] += mult * data[k];

data += X.blocksizes[b];

}

} else if (label == -1)

{

if (W[i] == INCACHE) //3

W[i] = WAIT; //10

else

W[i]++;

}

}

// apply lowerbounds

for (int j = 0; j < X.numblocks; j++) {

for (int k = 0; k < X.blocksizes[j]; k++) {

w[j][k] = max(w[j][k], lb[j][k]);

}

}

}

free(perm);

free(W);

}

// score examples

double *score(data X, char **examples, int num, double **w) {

double *s = (double *)malloc(sizeof(double)*num);

check(s != NULL);

for (int i = 0; i < num; i++) {

s[i] = 0.0;

float *data = EX_DATA(examples[i]);

int blocks = NUM_NONZERO(examples[i]);

for (int j = 0; j < blocks; j++) {

int b = BLOCK_IDX(data);

data++;

for (int k = 0; k < X.blocksizes[b]; k++)

s[i] += w[b][k] * data[k];

data += X.blocksizes[b];

}

}

return s;

}

// merge examples with identical labels

void collapse(data *X, char **examples, int num) {

//&X, sorted, num_unique

collapsed *x = (collapsed *)malloc(sizeof(collapsed)*num);

check(x != NULL);

int i = 0;

x[0].seq = examples;

x[0].num = 1;

for (int j = 1; j < num; j++) {

if (!memcmp(x[i].seq[0]+sizeof(int), examples[j]+sizeof(int),

labelsize*sizeof(int))) {

x[i].num++; //如果label 五个量相同

} else {

i++;

x[i].seq = &(examples[j]);

x[i].num = 1;

}

}

X->x = x;

X->num = i+1;

}

//调用参数 C=0.0002, J=1, hdrfile, datfile, modfile, inffile, lobfile

int main(int argc, char **argv) {

seed_time();

int count;

data X;

// command line arguments

check(argc == 8);

double C = atof(argv[1]);

double J = atof(argv[2]);

char *hdrfile = argv[3];

char *datfile = argv[4];

char *modfile = argv[5];

char *inffile = argv[6];

char *lobfile = argv[7];

// read header file

FILE *f = fopen(hdrfile, "rb");

check(f != NULL);

int header[3];

count = fread(header, sizeof(int), 3, f);

check(count == 3);

int num = header[0]; //正负样本总数

labelsize = header[1]; // labelsize = 5; [label id level x y]

X.numblocks = header[2]; // 2

X.blocksizes = (int *)malloc(X.numblocks*sizeof(int)); //（1）=1，（2）= root.h*root.w/2*31

count = fread(X.blocksizes, sizeof(int), X.numblocks, f);

check(count == X.numblocks);
2a9b0

X.regmult = (float *)malloc(sizeof(float)*X.numblocks); //0 ，1

check(X.regmult != NULL);

count = fread(X.regmult, sizeof(float), X.numblocks, f);

check(count == X.numblocks);

X.learnmult = (float *)malloc(sizeof(float)*X.numblocks);//20， 1

check(X.learnmult != NULL);

count = fread(X.learnmult, sizeof(float), X.numblocks, f);

check(count == X.numblocks);

check(num != 0);

fclose(f);

printf("%d examples with label size %d and %d blocks\n",

num, labelsize, X.numblocks);

printf("block size, regularization multiplier, learning rate multiplier\n");

dim = 0;

for (int i = 0; i < X.numblocks; i++) {

dim += X.blocksizes[i];

printf("%d, %.2f, %.2f\n", X.blocksizes[i], X.regmult[i], X.learnmult[i]);

}

// ---------------从 datfile 读取正负 examples----------------

// examples [i] 存储了第i个样本的信息长度为 1 int + 7 int +dim 个float + 1 byte

// 1 int legth 样本包括信息头在内的总字节长度

// 7 int [1/-1 id 0 0 0 2 dim] ,id为样本编号，[label id level centry_x centry_y]，2是block个数

// dim float feature,dim=2+1+root.h*root.w/2*31,意义如下

// block1 label | block2 data|block2 lable | block2 data

// 1 | 1 | 2 | h*w/2*31个float

// 1 byte unique=0

f = fopen(datfile, "rb");

check(f != NULL);

printf("Reading examples\n");

//+,-example数据

char **examples = (char **)malloc(num*sizeof(char *));

check(examples != NULL);

for (int i = 0; i < num; i++) {

// we use an extra byte in the end of each example to mark unique

// we use an extra int at the start of each example to store the

// example's byte length (excluding unique flag and this int)

//[legth label id level x y unique] unique=0

int buf[labelsize+2];

//写入时的值为[1/-1 i 0 0 0 2 dim]

count = fread(buf, sizeof(int), labelsize+2, f);

check(count == labelsize+2);

// byte length of an example's data segment

//---前面七个是头，后面dim个float是样本特征数据，dim=2+1+root.h*root.w/2*31

int len = sizeof(int)*(labelsize+2) + sizeof(float)*buf[labelsize+1];

// memory for data, an initial integer, and a final byte

examples[i] = (char *)malloc(sizeof(int)+len+1);

check(examples[i] != NULL);

// set data segment's byte length

((int *)examples[i])[0] = len;

// set the unique flag to zero

examples[i][sizeof(int)+len] = 0;

// copy label data into example

for (int j = 0; j < labelsize+2; j++)

((int *)examples[i])[j+1] = buf[j];

// read the rest of the data segment into the example

count = fread(examples[i]+sizeof(int)*(labelsize+3), 1,

len-sizeof(int)*(labelsize+2), f);

check(count == len-sizeof(int)*(labelsize+2));

}

fclose(f);

printf("done\n");

// sort

printf("Sorting examples\n");

char **sorted = (char **)malloc(num*sizeof(char *));

check(sorted != NULL);

memcpy(sorted, examples, num*sizeof(char *));

//qsort 库函数，真正的比较函数为 comp

//从小到大，快速排序

//依次按照样本类别->id->level->cx->cy 排序样本

//如果前面五个量都一样……

//1.等长度，比较所有字节；

//2.谁长谁小，长度不同是因为不同的component的尺寸不一致

qsort(sorted, num, sizeof(char *), comp);

printf("done\n");

// find unique examples

// 唯一的样本，unique flag=1,

// 相同的样本第一个样本的unique flag为1，其余为0 ，有的样本的位置被，unique替代了，但是并没有完全删除掉

int i = 0;

int len = *((int *)sorted[0]); //负样本的第一个

sorted[0][sizeof(int)+len] = 1; // unique flag 置 1

for (int j = 1; j < num; j++) {

int alen = *((int *)sorted[i]);

int blen = *((int *)sorted[j]);

if (alen != blen || memcmp(sorted[i] + sizeof(int), sorted[j] + sizeof(int), alen)) //component不同 || 不同样本

{

i++;

sorted[i] = sorted[j];

sorted[i][sizeof(int)+blen] = 1; //标记为 unique

}

}

int num_unique = i+1;

printf("%d unique examples\n", num_unique);

// -------------------collapse examples----------------

// 前面是找完全不一样的样本，这里是分组

// label 的五个量 [label id level centry_x centry_y] 相同的分为一组，在detect时，写入了datfile

// 负样本的 cx,cy都是相对于整张图片的，正样本是相对于剪切后的图像

// 前面五个全相同，

// 对于phase1 不可能，因为正负样本的id都不相同

// 对于phase2 正样本只保留了最有可能是正样本的样本，只有一种情况,

// rootfilter1,rootfilter2在同一张图片(id相同)，检测出来的 Hard负样本的cx,cy相同，因此一组最多应该只能出现2个（待验证）

// 原因是此时的latent variable 为（cx,cy,component），上述情况相下，我们只能保留component1或者component2

// 后续训练时，这两个量是连续使用的，为什么呢？？

// collapse.seq(char **) 记录了每一组的第一个样本

// collapse.num 每组的个数

// X.num 组数

// X.x=&collapse[0]，也就是第一个 collapse的地址

collapse(&X, sorted, num_unique);

printf("%d collapsed examples\n", X.num);

// initial model

// 读modfile文件，得到w的初始值。phase 1 初始化为全 0，phase 2 为上一次训练的结果……

double **w = (double **)malloc(sizeof(double *)*X.numblocks);//2

check(w != NULL);

f = fopen(modfile, "rb");

for (int i = 0; i < X.numblocks; i++) {

w[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]); //（1）=1，（2）= root.h*root.w/2*31

check(w[i] != NULL);

count = fread(w[i], sizeof(double), X.blocksizes[i], f);

check(count == X.blocksizes[i]);

}

fclose(f);

// lower bounds

// 读lobfile文件，初始化为全滤波器参数下线-100 ……

double **lb = (double **)malloc(sizeof(double *)*X.numblocks);

check(lb != NULL);

f = fopen(lobfile, "rb");

for (int i = 0; i < X.numblocks; i++) {

lb[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]);

check(lb[i] != NULL);

count = fread(lb[i], sizeof(double), X.blocksizes[i], f);

check(count == X.blocksizes[i]);

}

fclose(f);

printf("Training");

//-------------------------------- train -------------------------------

//-----梯度下降发训练参数 w，参见论文公式17 后面的步骤

gd(C, J, X, w, lb);

printf("done\n");

// save model

printf("Saving model\n");

f = fopen(modfile, "wb");

check(f != NULL);

// 存储 block1,block2的训练结果，w

for (int i = 0; i < X.numblocks; i++) {

count = fwrite(w[i], sizeof(double), X.blocksizes[i], f);

check(count == X.blocksizes[i]);

}

fclose(f);

// score examples

// ---所有的样本都的得分,没有乘以 label y

printf("Scoring\n");

double *s = score(X, examples, num, w);

// ---------Write info file-------------

printf("Writing info file\n");

f = fopen(inffile, "w");

check(f != NULL);

for (int i = 0; i < num; i++) {

int len = ((int *)examples[i])[0];

// label, score, unique flag

count = fprintf(f, "%d\t%f\t%d\n", ((int *)examples[i])[1], s[i],

(int)examples[i][sizeof(int)+len]);

check(count > 0);

}

fclose(f);

printf("Freeing memory\n");

for (int i = 0; i < X.numblocks; i++) {

free(w[i]);

free(lb[i]);

}

free(w);

free(lb);

free(s);

for (int i = 0; i < num; i++)

free(examples[i]);

free(examples);

free(sorted);

free(X.x);

free(X.blocksizes);

free(X.regmult);

free(X.learnmult);

return 0;

}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航