DPM(Defomable Parts Model) 源码分析-训练(三)
2017-08-13 19:12
477 查看
原文转载自:http://blog.csdn.NET/ttransposition/article/details/12954631
DPM(Defomable Parts Model)原理
首先调用格式:
example:
pascal('person', 2); % train and evaluate a 2 component person model
pascal_train.m
[cpp] view
plaincopy
function model = pascal_train(cls, n) % n=2
% model = pascal_train(cls)
% Train a model using the PASCAL dataset.
globals;
%----------读取正负样本-----------------------
% pos.im,neg.im存储了图像路径,pos.x1..pos.y2为box,负样本无box
[pos, neg] = pascal_data(cls);
% 按照长宽比,分成等量的两部分? 即将 component label 固定,phase2时,该值为latent variable。 spos为索引
spos = split(pos, n);
% -----------phase 1 : train root filters using warped positives & random negatives-----------
try
load([cachedir cls '_random']);
catch
% -----------------------------phas 1--------------------------------
% 初始化 rootfilters
for i=1:n
models{i} = initmodel(spos{i});
%---------train-------------
% model.rootfilters{i}.w
% model.offsets{i}.w
models{i} = train(cls, models{i}, spos{i}, neg, 1, 1, 1, 1, 2^28);
end
save([cachedir cls '_random'], 'models');
end
% -----------------phase2-------------------------------------------
% :merge models and train using latent detections &
20000
hard negatives
try
load([cachedir cls '_hard']);
catch
model = mergemodels(models);
model = train(cls, model, pos, neg(1:200), 0, 0, 2, 2, 2^28, true, 0.7);
save([cachedir cls '_hard'], 'model');
end
%----------------phase 3----------------------------------------------
% add parts and update models using latent detections & hard negatives.
try
load([cachedir cls '_parts']);
catch
for i=1:n
model = addparts(model, i, 6);
end
% use more data mining iterations in the beginning
model = train(cls, model, pos, neg(1:200), 0, 0, 1, 4, 2^30, true, 0.7);
model = train(cls, model, pos, neg(1:200), 0, 0, 6, 2, 2^30, true, 0.7, true);
save([cachedir cls '_parts'], 'model');
end
% update models using full set of negatives.
try
load([cachedir cls '_mine']);
catch
model = train(cls, model, pos, neg, 0, 0, 1, 3, 2^30, true, 0.7, true, ...
0.003*model.numcomponents, 2);
save([cachedir cls '_mine'], 'model');
end
% train bounding box prediction
try
load([cachedir cls '_final']);
catch
% 论文中说用最小二乘,怎么直接相除了,都不考虑矩阵的奇异性
model = trainbox(cls, model, pos, 0.7);
save([cachedir cls '_final'], 'model');
end
initmodel.m
[cpp] view
plaincopy
function model = initmodel(pos, sbin, size)
% model = initmodel(pos, sbin, size)
% Initialize model structure.
%
% If not supplied the dimensions of the model template are computed
% from statistics in the postive examples.
%
% This should be documented! :-)
% model.sbin 8
% model.interval 10
% model.numblocks phase 1 :单独训练rootfilter时为2,offset,rootfilter;phase 2,为 4
% model.numcomponents 1
% model.blocksizes (1)=1,(2)= root.h*root.w/2*31
% model.regmult 0,1
% model.learnmult 20,1
% model.maxsize root 的size
% model.minsize
% model.rootfilters{i}
% .size 以sbin为单位,尺寸为综合各样本的h/w,area计算出来的
% .w
% .blocklabel blocklabel是为编号,offset(2),rootfilter(2),partfilter(12 or less),def (12 same as part)虽然意义不同但是放在一起统一编号
% model.partfilters{i}
% .w
% .blocklabel
% model.defs{i}
% .anchor
% .w
% .blocklabel
% model.offsets{i}
% .w 0
% .blocklabel 1
% model.components{i}
% .rootindex 1
% .parts{j}
% .partindex
% .defindex
% .offsetindex 1
% .dim 2 + model.blocksizes(1) + model.blocksizes(2)
% .numblocks 2
% pick mode of aspect ratios
h = [pos(:).y2]' - [pos(:).y1]' + 1;
w = [pos(:).x2]' - [pos(:).x1]' + 1;
xx = -2:.02:2;
filter = exp(-[-100:100].^2/400); % e^-25,e^25
aspects = hist(log(h./w), xx); %
aspects = convn(aspects, filter, 'same');
[peak, I] = max(aspects);
aspect = exp(xx(I)); %滤波后最大的h/w,作为最典型的h/w
% pick 20 percentile area
areas = sort(h.*w);
area = areas(floor(length(areas) * 0.2)); % 比它大的,可以缩放,比该尺寸小的呢?
area = max(min(area, 5000), 3000); %限制在 3000-5000
% pick dimensions
w = sqrt(area/aspect);
h = w*aspect;
% size of HOG features
if nargin < 4
model.sbin = 8;
else
model.sbin = sbin;
end
% size of root filter
if nargin < 5
model.rootfilters{1}.size = [round(h/model.sbin) round(w/model.sbin)];
else
model.rootfilters{1}.size = size;
end
% set up offset
model.offsets{1}.w = 0;
model.offsets{1}.blocklabel = 1;
model.blocksizes(1) = 1;
model.regmult(1) = 0;
model.learnmult(1) = 20;
model.lowerbounds{1} = -100;
% set up root filter
model.rootfilters{1}.w = zeros([model.rootfilters{1}.size 31]);
height = model.rootfilters{1}.size(1);
% root filter is symmetricf
width = ceil(model.rootfilters{1}.size(2)/2); % ??? /2
model.rootfilters{1}.blocklabel = 2;
model.blocksizes(2) = width * height * 31;
model.regmult(2) = 1;
model.learnmult(2) = 1;
model.lowerbounds{2} = -100*ones(model.blocksizes(2),1);
% set up one component model
model.components{1}.rootindex = 1;
model.components{1}.offsetindex = 1;
model.components{1}.parts = {};
model.components{1}.dim = 2 + model.blocksizes(1) + model.blocksizes(2);
model.components{1}.numblocks = 2;
% initialize the rest of the model structure
model.interval = 10;
model.numcomponents = 1;
model.numblocks = 2;
model.partfilters = {};
model.defs = {};
model.maxsize = model.rootfilters{1}.size;
model.minsize = model.rootfilters{1}.size;
learn.cc
[cpp] view
plaincopy
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/time.h>
#include <errno.h>
/*
* Optimize LSVM objective function via gradient descent.
*
* We use an adaptive cache mechanism. After a negative example
* scores beyond the margin multiple times it is removed from the
* training set for a fixed number of iterations.
*/
// Data File Format
// EXAMPLE*
//
// EXAMPLE:
// long label ints
// blocks int
// dim int
// DATA{blocks}
//
// DATA:
// block label float
// block data floats
//
// Internal Binary Format
// len int (byte length of EXAMPLE)
// EXAMPLE <see above>
// unique flag byte
// number of iterations
#define ITER 5000000
// small cache parameters
#define INCACHE 3
#define WAIT 10
// error checking
#define check(e) \
(e ? (void)0 : (printf("%s:%u error: %s\n%s\n", __FILE__, __LINE__, #e, strerror(errno)), exit(1)))
// number of non-zero blocks in example ex
#define NUM_NONZERO(ex) (((int *)ex)[labelsize+1])
// float pointer to data segment of example ex
#define EX_DATA(ex) ((float *)(ex + sizeof(int)*(labelsize+3)))
// class label (+1 or -1) for the example
#define LABEL(ex) (((int *)ex)[1])
// block label (converted to 0-based index)
#define BLOCK_IDX(data) (((int)data[0])-1)
int labelsize;
int dim;
// comparison function for sorting examples
// 参见 http://blog.sina.com.cn/s/blog_5155e8d401009145.html
int comp(const void *a, const void *b) {
// sort by extended label first, and whole example second...
//逐字节比较的,当buf1<buf2时,返回值<0,当buf1=buf2时,返回值=0,当buf1>buf2时,返回值>0
// 先比较这五个量 [label id level x y],也就是说按照 样本类别->id->level->x->y排序样本
int c = memcmp(*((char **)a) + sizeof(int),
*((char **)b) + sizeof(int),
labelsize*sizeof(int));// 5
if (c) //label 不相等
return c;
// labels are the same ,怎么可能会一样呢 id在正负样本集内从1开始是递增的啊 phase 2 阶段同一张图片产生的样本,id都是一样的
int alen = **((int **)a);
int blen = **((int **)b);
if (alen == blen) //长度一样
return memcmp(*((char **)a) + sizeof(int),
*((char **)b) + sizeof(int),
alen); //真霸气,所有字节都比较……
return ((alen < blen) ? -1 : 1);//按长度排序
}
// a collapsed example is a sequence of examples
struct collapsed {
char **seq;
int num;
};
// set of collapsed examples
struct data {
collapsed *x;
int num;
int numblocks;
int *blocksizes;
float *regmult;
float *learnmult;
};
// seed the random number generator with the current time
void seed_time() {
struct timeval tp;
check(gettimeofday(&tp, NULL) == 0);
srand48((long)tp.tv_usec);
}
static inline double min(double x, double y) { return (x <= y ? x : y); }
static inline double max(double x, double y) { return (x <= y ? y : x); }
// gradient descent
//---------------参照论文公式17 后的步骤---------------------------------------
void gd(double C, double J, data X, double **w, double **lb) {
// C=0.0002, J=1, X, w==0, lb==-100);
//
int num = X.num; //组数
// state for random permutations
int *perm = (int *)malloc(sizeof(int)*X.num);
check(perm != NULL);
// state for small cache
int *W = (int *)malloc(sizeof(int)*num);
check(W != NULL);
for (int j = 0; j < num; j++)
W[j] = 0;
int t = 0;
while (t < ITER) { // 5000000 ,霸气……
// pick random permutation
for (int i = 0; i < num; i++) //组数
perm[i] = i;
//-------打乱顺序-----
// 论文中是随机选择一个样本,这里是随机排好序,再顺序取。
// 类似于随机取,但是这里能保证取到全部样本,避免单个样本重复被抽到,重复作用
for (int swapi = 0; swapi < num; swapi++) {
int swapj = (int)(drand48()*(num-swapi)) + swapi; //drand48 产生 0-1之间的均匀分布
int tmp = perm[swapi];
perm[swapi] = perm[swapj];
perm[swapj] = tmp;
}
// count number of examples in the small cache
int cnum = 0; //下面的循环部分的实际循环次数
for (int i = 0; i < num; i++) {
if (W[i] <= INCACHE) // 3
cnum++;
}
//-------------------------------------------------------
for (int swapi = 0; swapi < num; swapi++) {
// select example
int i = perm[swapi];
collapsed x = X.x[i];
// skip if example is not in small cache
//负样本分对一次+1,分错一次清为0
//连续三次都分对了,那么这个样本很有可能是 easy 样本
//直接让他罚停四次迭代
if (W[i] > INCACHE) { //3
W[i]--;
continue;
}
// learning rate
double T = t + 1000.0; //学习率,直接1/t太大了
double rateX = cnum * C / T;
double rateR = 1.0 / T;
if (t % 10000 == 0) {
printf(".");
fflush(stdout); //清除文件缓冲区,文件以写方式打开时将缓冲区内容写入文件
}
t++;
// compute max over latent placements
// -----step 3----
int M = -1;
double V = 0;
// 组内循环,选择 Zi=argmax β*f 即文中的第3部
// 训练rootfiter时,x.num=1,因为随机产生的负样本其id不同
for (int m = 0; m < x.num; m++) {
double val = 0;
char *ptr = x.seq[m];
float *data = EX_DATA(ptr); //特征数据的地址 第9个数据开始,
//后面跟着是 block1 label | block2 data|block2 lable | block2 data
// 1 | 1 | 2 | h*w/2*31个float
int blocks = NUM_NONZERO(ptr); // phase 1,phase 2 : 2 个,offset,rootfilter
for (int j = 0; j < blocks; j++) {
int b = BLOCK_IDX(data); //
data++;
for (int k = 0; k < X.blocksizes[b]; k++)//(1)=1,(2)= root.h*root.w/2*31
val += w[b][k] * data[k]; //第一次循环是0
data += X.blocksizes[b];
}
if (M < 0 || val > V) {
M = m;
V = val;
}
}
// update model
//-----step.4 也算了step.5 的一半 ---------------
// 梯度下降,减小 w
for (int j = 0; j < X.numblocks; j++) {// 2
double mult = rateR * X.regmult[j] * X.learnmult[j]; // 0,1 20,1,1/T,对于block2,学习率at就是 1/t,block 1 为0
for (int k = 0; k < X.blocksizes[j]; k++) {
w[j][k] -= mult * w[j][k]; //不管是分对了,还是分错了,都要减掉 at*β,见公式17下的4,5
}
}
char *ptr = x.seq[M];
int label = LABEL(ptr);
//----step.5----------分错了,往梯度的负方向移动
if (label * V < 1.0)
{
W[i] = 0;
float *data = EX_DATA(ptr);
int blocks = NUM_NONZERO(ptr);
for (int j = 0; j < blocks; j++) {
int b = BLOCK_IDX(data);
// yi*cnum * C / T*1,见论文中 公式16,17
double mult = (label > 0 ? J : -1) * rateX * X.learnmult[b];
data++;
for (int k = 0; k < X.blocksizes[b]; k++)
w[b][k] += mult * data[k];
data += X.blocksizes[b];
}
} else if (label == -1)
{
if (W[i] == INCACHE) //3
W[i] = WAIT; //10
else
W[i]++;
}
}
// apply lowerbounds
for (int j = 0; j < X.numblocks; j++) {
for (int k = 0; k < X.blocksizes[j]; k++) {
w[j][k] = max(w[j][k], lb[j][k]);
}
}
}
free(perm);
free(W);
}
// score examples
double *score(data X, char **examples, int num, double **w) {
double *s = (double *)malloc(sizeof(double)*num);
check(s != NULL);
for (int i = 0; i < num; i++) {
s[i] = 0.0;
float *data = EX_DATA(examples[i]);
int blocks = NUM_NONZERO(examples[i]);
for (int j = 0; j < blocks; j++) {
int b = BLOCK_IDX(data);
data++;
for (int k = 0; k < X.blocksizes[b]; k++)
s[i] += w[b][k] * data[k];
data += X.blocksizes[b];
}
}
return s;
}
// merge examples with identical labels
void collapse(data *X, char **examples, int num) {
//&X, sorted, num_unique
collapsed *x = (collapsed *)malloc(sizeof(collapsed)*num);
check(x != NULL);
int i = 0;
x[0].seq = examples;
x[0].num = 1;
for (int j = 1; j < num; j++) {
if (!memcmp(x[i].seq[0]+sizeof(int), examples[j]+sizeof(int),
labelsize*sizeof(int))) {
x[i].num++; //如果label 五个量相同
} else {
i++;
x[i].seq = &(examples[j]);
x[i].num = 1;
}
}
X->x = x;
X->num = i+1;
}
//调用参数 C=0.0002, J=1, hdrfile, datfile, modfile, inffile, lobfile
int main(int argc, char **argv) {
seed_time();
int count;
data X;
// command line arguments
check(argc == 8);
double C = atof(argv[1]);
double J = atof(argv[2]);
char *hdrfile = argv[3];
char *datfile = argv[4];
char *modfile = argv[5];
char *inffile = argv[6];
char *lobfile = argv[7];
// read header file
FILE *f = fopen(hdrfile, "rb");
check(f != NULL);
int header[3];
count = fread(header, sizeof(int), 3, f);
check(count == 3);
int num = header[0]; //正负样本总数
labelsize = header[1]; // labelsize = 5; [label id level x y]
X.numblocks = header[2]; // 2
X.blocksizes = (int *)malloc(X.numblocks*sizeof(int)); //(1)=1,(2)= root.h*root.w/2*31
count = fread(X.blocksizes, sizeof(int), X.numblocks, f);
check(count == X.numblocks);
2a9b0
X.regmult = (float *)malloc(sizeof(float)*X.numblocks); //0 ,1
check(X.regmult != NULL);
count = fread(X.regmult, sizeof(float), X.numblocks, f);
check(count == X.numblocks);
X.learnmult = (float *)malloc(sizeof(float)*X.numblocks);//20, 1
check(X.learnmult != NULL);
count = fread(X.learnmult, sizeof(float), X.numblocks, f);
check(count == X.numblocks);
check(num != 0);
fclose(f);
printf("%d examples with label size %d and %d blocks\n",
num, labelsize, X.numblocks);
printf("block size, regularization multiplier, learning rate multiplier\n");
dim = 0;
for (int i = 0; i < X.numblocks; i++) {
dim += X.blocksizes[i];
printf("%d, %.2f, %.2f\n", X.blocksizes[i], X.regmult[i], X.learnmult[i]);
}
// ---------------从 datfile 读取 正负 examples----------------
// examples [i] 存储了第i个样本的信息 长度为 1 int + 7 int +dim 个float + 1 byte
// 1 int legth 样本包括信息头在内的总字节长度
// 7 int [1/-1 id 0 0 0 2 dim] ,id为样本编号,[label id level centry_x centry_y],2是block个数
// dim float feature,dim=2+1+root.h*root.w/2*31,意义如下
// block1 label | block2 data|block2 lable | block2 data
// 1 | 1 | 2 | h*w/2*31个float
// 1 byte unique=0
f = fopen(datfile, "rb");
check(f != NULL);
printf("Reading examples\n");
//+,-example数据
char **examples = (char **)malloc(num*sizeof(char *));
check(examples != NULL);
for (int i = 0; i < num; i++) {
// we use an extra byte in the end of each example to mark unique
// we use an extra int at the start of each example to store the
// example's byte length (excluding unique flag and this int)
//[legth label id level x y unique] unique=0
int buf[labelsize+2];
//写入时的值为[1/-1 i 0 0 0 2 dim]
count = fread(buf, sizeof(int), labelsize+2, f);
check(count == labelsize+2);
// byte length of an example's data segment
//---前面七个是头,后面dim个float是样本特征数据,dim=2+1+root.h*root.w/2*31
int len = sizeof(int)*(labelsize+2) + sizeof(float)*buf[labelsize+1];
// memory for data, an initial integer, and a final byte
examples[i] = (char *)malloc(sizeof(int)+len+1);
check(examples[i] != NULL);
// set data segment's byte length
((int *)examples[i])[0] = len;
// set the unique flag to zero
examples[i][sizeof(int)+len] = 0;
// copy label data into example
for (int j = 0; j < labelsize+2; j++)
((int *)examples[i])[j+1] = buf[j];
// read the rest of the data segment into the example
count = fread(examples[i]+sizeof(int)*(labelsize+3), 1,
len-sizeof(int)*(labelsize+2), f);
check(count == len-sizeof(int)*(labelsize+2));
}
fclose(f);
printf("done\n");
// sort
printf("Sorting examples\n");
char **sorted = (char **)malloc(num*sizeof(char *));
check(sorted != NULL);
memcpy(sorted, examples, num*sizeof(char *));
//qsort 库函数,真正的比较函数为 comp
//从小到大,快速排序
//依次按照 样本类别->id->level->cx->cy 排序样本
//如果前面五个量都一样……
//1.等长度,比较所有字节;
//2.谁长谁小,长度不同是因为不同的component的 尺寸不一致
qsort(sorted, num, sizeof(char *), comp);
printf("done\n");
// find unique examples
// 唯一的样本,unique flag=1,
// 相同的样本第一个样本的unique flag为1,其余为0 ,有的样本的位置被,unique替代了,但是并没有完全删除掉
int i = 0;
int len = *((int *)sorted[0]); //负样本的第一个
sorted[0][sizeof(int)+len] = 1; // unique flag 置 1
for (int j = 1; j < num; j++) {
int alen = *((int *)sorted[i]);
int blen = *((int *)sorted[j]);
if (alen != blen || memcmp(sorted[i] + sizeof(int), sorted[j] + sizeof(int), alen)) //component不同 || 不同样本
{
i++;
sorted[i] = sorted[j];
sorted[i][sizeof(int)+blen] = 1; //标记为 unique
}
}
int num_unique = i+1;
printf("%d unique examples\n", num_unique);
// -------------------collapse examples----------------
// 前面是找完全不一样的样本,这里是分组
// label 的五个量 [label id level centry_x centry_y] 相同的分为一组,在detect时,写入了datfile
// 负样本的 cx,cy都是相对于整张图片的,正样本是相对于剪切后的图像
// 前面五个全相同,
// 对于phase1 不可能,因为正负样本的id都不相同
// 对于phase2 正样本只保留了最有可能是正样本的样本,只有一种情况,
// rootfilter1,rootfilter2在同一张图片(id相同),检测出来的 Hard负样本 的cx,cy相同,因此一组最多应该只能出现2个 (待验证)
// 原因是此时的latent variable 为(cx,cy,component),上述情况相下,我们只能保留component1或者component2
// 后续训练时,这两个量是连续使用的,为什么呢??
// collapse.seq(char **) 记录了每一组的第一个样本
// collapse.num 每组的个数
// X.num 组数
// X.x=&collapse[0],也就是第一个 collapse的地址
collapse(&X, sorted, num_unique);
printf("%d collapsed examples\n", X.num);
// initial model
// 读modfile文件,得到w的初始值。phase 1 初始化为全 0,phase 2 为上一次训练的结果……
double **w = (double **)malloc(sizeof(double *)*X.numblocks);//2
check(w != NULL);
f = fopen(modfile, "rb");
for (int i = 0; i < X.numblocks; i++) {
w[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]); //(1)=1,(2)= root.h*root.w/2*31
check(w[i] != NULL);
count = fread(w[i], sizeof(double), X.blocksizes[i], f);
check(count == X.blocksizes[i]);
}
fclose(f);
// lower bounds
// 读lobfile文件,初始化为全 滤波器参数下线-100 ……
double **lb = (double **)malloc(sizeof(double *)*X.numblocks);
check(lb != NULL);
f = fopen(lobfile, "rb");
for (int i = 0; i < X.numblocks; i++) {
lb[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]);
check(lb[i] != NULL);
count = fread(lb[i], sizeof(double), X.blocksizes[i], f);
check(count == X.blocksizes[i]);
}
fclose(f);
printf("Training");
//-------------------------------- train -------------------------------
//-----梯度下降发训练参数 w,参见论文 公式17 后面的步骤
gd(C, J, X, w, lb);
printf("done\n");
// save model
printf("Saving model\n");
f = fopen(modfile, "wb");
check(f != NULL);
// 存储 block1,block2的训练结果,w
for (int i = 0; i < X.numblocks; i++) {
count = fwrite(w[i], sizeof(double), X.blocksizes[i], f);
check(count == X.blocksizes[i]);
}
fclose(f);
// score examples
// ---所有的样本都的得分,没有乘以 label y
printf("Scoring\n");
double *s = score(X, examples, num, w);
// ---------Write info file-------------
printf("Writing info file\n");
f = fopen(inffile, "w");
check(f != NULL);
for (int i = 0; i < num; i++) {
int len = ((int *)examples[i])[0];
// label, score, unique flag
count = fprintf(f, "%d\t%f\t%d\n", ((int *)examples[i])[1], s[i],
(int)examples[i][sizeof(int)+len]);
check(count > 0);
}
fclose(f);
printf("Freeing memory\n");
for (int i = 0; i < X.numblocks; i++) {
free(w[i]);
free(lb[i]);
}
free(w);
free(lb);
free(s);
for (int i = 0; i < num; i++)
free(examples[i]);
free(examples);
free(sorted);
free(X.x);
free(X.blocksizes);
free(X.regmult);
free(X.learnmult);
return 0;
}
DPM(Defomable Parts Model)原理
首先调用格式:
example:
pascal('person', 2); % train and evaluate a 2 component person model
pascal_train.m
[cpp] view
plaincopy
function model = pascal_train(cls, n) % n=2
% model = pascal_train(cls)
% Train a model using the PASCAL dataset.
globals;
%----------读取正负样本-----------------------
% pos.im,neg.im存储了图像路径,pos.x1..pos.y2为box,负样本无box
[pos, neg] = pascal_data(cls);
% 按照长宽比,分成等量的两部分? 即将 component label 固定,phase2时,该值为latent variable。 spos为索引
spos = split(pos, n);
% -----------phase 1 : train root filters using warped positives & random negatives-----------
try
load([cachedir cls '_random']);
catch
% -----------------------------phas 1--------------------------------
% 初始化 rootfilters
for i=1:n
models{i} = initmodel(spos{i});
%---------train-------------
% model.rootfilters{i}.w
% model.offsets{i}.w
models{i} = train(cls, models{i}, spos{i}, neg, 1, 1, 1, 1, 2^28);
end
save([cachedir cls '_random'], 'models');
end
% -----------------phase2-------------------------------------------
% :merge models and train using latent detections &
20000
hard negatives
try
load([cachedir cls '_hard']);
catch
model = mergemodels(models);
model = train(cls, model, pos, neg(1:200), 0, 0, 2, 2, 2^28, true, 0.7);
save([cachedir cls '_hard'], 'model');
end
%----------------phase 3----------------------------------------------
% add parts and update models using latent detections & hard negatives.
try
load([cachedir cls '_parts']);
catch
for i=1:n
model = addparts(model, i, 6);
end
% use more data mining iterations in the beginning
model = train(cls, model, pos, neg(1:200), 0, 0, 1, 4, 2^30, true, 0.7);
model = train(cls, model, pos, neg(1:200), 0, 0, 6, 2, 2^30, true, 0.7, true);
save([cachedir cls '_parts'], 'model');
end
% update models using full set of negatives.
try
load([cachedir cls '_mine']);
catch
model = train(cls, model, pos, neg, 0, 0, 1, 3, 2^30, true, 0.7, true, ...
0.003*model.numcomponents, 2);
save([cachedir cls '_mine'], 'model');
end
% train bounding box prediction
try
load([cachedir cls '_final']);
catch
% 论文中说用最小二乘,怎么直接相除了,都不考虑矩阵的奇异性
model = trainbox(cls, model, pos, 0.7);
save([cachedir cls '_final'], 'model');
end
initmodel.m
[cpp] view
plaincopy
function model = initmodel(pos, sbin, size)
% model = initmodel(pos, sbin, size)
% Initialize model structure.
%
% If not supplied the dimensions of the model template are computed
% from statistics in the postive examples.
%
% This should be documented! :-)
% model.sbin 8
% model.interval 10
% model.numblocks phase 1 :单独训练rootfilter时为2,offset,rootfilter;phase 2,为 4
% model.numcomponents 1
% model.blocksizes (1)=1,(2)= root.h*root.w/2*31
% model.regmult 0,1
% model.learnmult 20,1
% model.maxsize root 的size
% model.minsize
% model.rootfilters{i}
% .size 以sbin为单位,尺寸为综合各样本的h/w,area计算出来的
% .w
% .blocklabel blocklabel是为编号,offset(2),rootfilter(2),partfilter(12 or less),def (12 same as part)虽然意义不同但是放在一起统一编号
% model.partfilters{i}
% .w
% .blocklabel
% model.defs{i}
% .anchor
% .w
% .blocklabel
% model.offsets{i}
% .w 0
% .blocklabel 1
% model.components{i}
% .rootindex 1
% .parts{j}
% .partindex
% .defindex
% .offsetindex 1
% .dim 2 + model.blocksizes(1) + model.blocksizes(2)
% .numblocks 2
% pick mode of aspect ratios
h = [pos(:).y2]' - [pos(:).y1]' + 1;
w = [pos(:).x2]' - [pos(:).x1]' + 1;
xx = -2:.02:2;
filter = exp(-[-100:100].^2/400); % e^-25,e^25
aspects = hist(log(h./w), xx); %
aspects = convn(aspects, filter, 'same');
[peak, I] = max(aspects);
aspect = exp(xx(I)); %滤波后最大的h/w,作为最典型的h/w
% pick 20 percentile area
areas = sort(h.*w);
area = areas(floor(length(areas) * 0.2)); % 比它大的,可以缩放,比该尺寸小的呢?
area = max(min(area, 5000), 3000); %限制在 3000-5000
% pick dimensions
w = sqrt(area/aspect);
h = w*aspect;
% size of HOG features
if nargin < 4
model.sbin = 8;
else
model.sbin = sbin;
end
% size of root filter
if nargin < 5
model.rootfilters{1}.size = [round(h/model.sbin) round(w/model.sbin)];
else
model.rootfilters{1}.size = size;
end
% set up offset
model.offsets{1}.w = 0;
model.offsets{1}.blocklabel = 1;
model.blocksizes(1) = 1;
model.regmult(1) = 0;
model.learnmult(1) = 20;
model.lowerbounds{1} = -100;
% set up root filter
model.rootfilters{1}.w = zeros([model.rootfilters{1}.size 31]);
height = model.rootfilters{1}.size(1);
% root filter is symmetricf
width = ceil(model.rootfilters{1}.size(2)/2); % ??? /2
model.rootfilters{1}.blocklabel = 2;
model.blocksizes(2) = width * height * 31;
model.regmult(2) = 1;
model.learnmult(2) = 1;
model.lowerbounds{2} = -100*ones(model.blocksizes(2),1);
% set up one component model
model.components{1}.rootindex = 1;
model.components{1}.offsetindex = 1;
model.components{1}.parts = {};
model.components{1}.dim = 2 + model.blocksizes(1) + model.blocksizes(2);
model.components{1}.numblocks = 2;
% initialize the rest of the model structure
model.interval = 10;
model.numcomponents = 1;
model.numblocks = 2;
model.partfilters = {};
model.defs = {};
model.maxsize = model.rootfilters{1}.size;
model.minsize = model.rootfilters{1}.size;
learn.cc
[cpp] view
plaincopy
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/time.h>
#include <errno.h>
/*
* Optimize LSVM objective function via gradient descent.
*
* We use an adaptive cache mechanism. After a negative example
* scores beyond the margin multiple times it is removed from the
* training set for a fixed number of iterations.
*/
// Data File Format
// EXAMPLE*
//
// EXAMPLE:
// long label ints
// blocks int
// dim int
// DATA{blocks}
//
// DATA:
// block label float
// block data floats
//
// Internal Binary Format
// len int (byte length of EXAMPLE)
// EXAMPLE <see above>
// unique flag byte
// number of iterations
#define ITER 5000000
// small cache parameters
#define INCACHE 3
#define WAIT 10
// error checking
#define check(e) \
(e ? (void)0 : (printf("%s:%u error: %s\n%s\n", __FILE__, __LINE__, #e, strerror(errno)), exit(1)))
// number of non-zero blocks in example ex
#define NUM_NONZERO(ex) (((int *)ex)[labelsize+1])
// float pointer to data segment of example ex
#define EX_DATA(ex) ((float *)(ex + sizeof(int)*(labelsize+3)))
// class label (+1 or -1) for the example
#define LABEL(ex) (((int *)ex)[1])
// block label (converted to 0-based index)
#define BLOCK_IDX(data) (((int)data[0])-1)
int labelsize;
int dim;
// comparison function for sorting examples
// 参见 http://blog.sina.com.cn/s/blog_5155e8d401009145.html
int comp(const void *a, const void *b) {
// sort by extended label first, and whole example second...
//逐字节比较的,当buf1<buf2时,返回值<0,当buf1=buf2时,返回值=0,当buf1>buf2时,返回值>0
// 先比较这五个量 [label id level x y],也就是说按照 样本类别->id->level->x->y排序样本
int c = memcmp(*((char **)a) + sizeof(int),
*((char **)b) + sizeof(int),
labelsize*sizeof(int));// 5
if (c) //label 不相等
return c;
// labels are the same ,怎么可能会一样呢 id在正负样本集内从1开始是递增的啊 phase 2 阶段同一张图片产生的样本,id都是一样的
int alen = **((int **)a);
int blen = **((int **)b);
if (alen == blen) //长度一样
return memcmp(*((char **)a) + sizeof(int),
*((char **)b) + sizeof(int),
alen); //真霸气,所有字节都比较……
return ((alen < blen) ? -1 : 1);//按长度排序
}
// a collapsed example is a sequence of examples
struct collapsed {
char **seq;
int num;
};
// set of collapsed examples
struct data {
collapsed *x;
int num;
int numblocks;
int *blocksizes;
float *regmult;
float *learnmult;
};
// seed the random number generator with the current time
void seed_time() {
struct timeval tp;
check(gettimeofday(&tp, NULL) == 0);
srand48((long)tp.tv_usec);
}
static inline double min(double x, double y) { return (x <= y ? x : y); }
static inline double max(double x, double y) { return (x <= y ? y : x); }
// gradient descent
//---------------参照论文公式17 后的步骤---------------------------------------
void gd(double C, double J, data X, double **w, double **lb) {
// C=0.0002, J=1, X, w==0, lb==-100);
//
int num = X.num; //组数
// state for random permutations
int *perm = (int *)malloc(sizeof(int)*X.num);
check(perm != NULL);
// state for small cache
int *W = (int *)malloc(sizeof(int)*num);
check(W != NULL);
for (int j = 0; j < num; j++)
W[j] = 0;
int t = 0;
while (t < ITER) { // 5000000 ,霸气……
// pick random permutation
for (int i = 0; i < num; i++) //组数
perm[i] = i;
//-------打乱顺序-----
// 论文中是随机选择一个样本,这里是随机排好序,再顺序取。
// 类似于随机取,但是这里能保证取到全部样本,避免单个样本重复被抽到,重复作用
for (int swapi = 0; swapi < num; swapi++) {
int swapj = (int)(drand48()*(num-swapi)) + swapi; //drand48 产生 0-1之间的均匀分布
int tmp = perm[swapi];
perm[swapi] = perm[swapj];
perm[swapj] = tmp;
}
// count number of examples in the small cache
int cnum = 0; //下面的循环部分的实际循环次数
for (int i = 0; i < num; i++) {
if (W[i] <= INCACHE) // 3
cnum++;
}
//-------------------------------------------------------
for (int swapi = 0; swapi < num; swapi++) {
// select example
int i = perm[swapi];
collapsed x = X.x[i];
// skip if example is not in small cache
//负样本分对一次+1,分错一次清为0
//连续三次都分对了,那么这个样本很有可能是 easy 样本
//直接让他罚停四次迭代
if (W[i] > INCACHE) { //3
W[i]--;
continue;
}
// learning rate
double T = t + 1000.0; //学习率,直接1/t太大了
double rateX = cnum * C / T;
double rateR = 1.0 / T;
if (t % 10000 == 0) {
printf(".");
fflush(stdout); //清除文件缓冲区,文件以写方式打开时将缓冲区内容写入文件
}
t++;
// compute max over latent placements
// -----step 3----
int M = -1;
double V = 0;
// 组内循环,选择 Zi=argmax β*f 即文中的第3部
// 训练rootfiter时,x.num=1,因为随机产生的负样本其id不同
for (int m = 0; m < x.num; m++) {
double val = 0;
char *ptr = x.seq[m];
float *data = EX_DATA(ptr); //特征数据的地址 第9个数据开始,
//后面跟着是 block1 label | block2 data|block2 lable | block2 data
// 1 | 1 | 2 | h*w/2*31个float
int blocks = NUM_NONZERO(ptr); // phase 1,phase 2 : 2 个,offset,rootfilter
for (int j = 0; j < blocks; j++) {
int b = BLOCK_IDX(data); //
data++;
for (int k = 0; k < X.blocksizes[b]; k++)//(1)=1,(2)= root.h*root.w/2*31
val += w[b][k] * data[k]; //第一次循环是0
data += X.blocksizes[b];
}
if (M < 0 || val > V) {
M = m;
V = val;
}
}
// update model
//-----step.4 也算了step.5 的一半 ---------------
// 梯度下降,减小 w
for (int j = 0; j < X.numblocks; j++) {// 2
double mult = rateR * X.regmult[j] * X.learnmult[j]; // 0,1 20,1,1/T,对于block2,学习率at就是 1/t,block 1 为0
for (int k = 0; k < X.blocksizes[j]; k++) {
w[j][k] -= mult * w[j][k]; //不管是分对了,还是分错了,都要减掉 at*β,见公式17下的4,5
}
}
char *ptr = x.seq[M];
int label = LABEL(ptr);
//----step.5----------分错了,往梯度的负方向移动
if (label * V < 1.0)
{
W[i] = 0;
float *data = EX_DATA(ptr);
int blocks = NUM_NONZERO(ptr);
for (int j = 0; j < blocks; j++) {
int b = BLOCK_IDX(data);
// yi*cnum * C / T*1,见论文中 公式16,17
double mult = (label > 0 ? J : -1) * rateX * X.learnmult[b];
data++;
for (int k = 0; k < X.blocksizes[b]; k++)
w[b][k] += mult * data[k];
data += X.blocksizes[b];
}
} else if (label == -1)
{
if (W[i] == INCACHE) //3
W[i] = WAIT; //10
else
W[i]++;
}
}
// apply lowerbounds
for (int j = 0; j < X.numblocks; j++) {
for (int k = 0; k < X.blocksizes[j]; k++) {
w[j][k] = max(w[j][k], lb[j][k]);
}
}
}
free(perm);
free(W);
}
// score examples
double *score(data X, char **examples, int num, double **w) {
double *s = (double *)malloc(sizeof(double)*num);
check(s != NULL);
for (int i = 0; i < num; i++) {
s[i] = 0.0;
float *data = EX_DATA(examples[i]);
int blocks = NUM_NONZERO(examples[i]);
for (int j = 0; j < blocks; j++) {
int b = BLOCK_IDX(data);
data++;
for (int k = 0; k < X.blocksizes[b]; k++)
s[i] += w[b][k] * data[k];
data += X.blocksizes[b];
}
}
return s;
}
// merge examples with identical labels
void collapse(data *X, char **examples, int num) {
//&X, sorted, num_unique
collapsed *x = (collapsed *)malloc(sizeof(collapsed)*num);
check(x != NULL);
int i = 0;
x[0].seq = examples;
x[0].num = 1;
for (int j = 1; j < num; j++) {
if (!memcmp(x[i].seq[0]+sizeof(int), examples[j]+sizeof(int),
labelsize*sizeof(int))) {
x[i].num++; //如果label 五个量相同
} else {
i++;
x[i].seq = &(examples[j]);
x[i].num = 1;
}
}
X->x = x;
X->num = i+1;
}
//调用参数 C=0.0002, J=1, hdrfile, datfile, modfile, inffile, lobfile
int main(int argc, char **argv) {
seed_time();
int count;
data X;
// command line arguments
check(argc == 8);
double C = atof(argv[1]);
double J = atof(argv[2]);
char *hdrfile = argv[3];
char *datfile = argv[4];
char *modfile = argv[5];
char *inffile = argv[6];
char *lobfile = argv[7];
// read header file
FILE *f = fopen(hdrfile, "rb");
check(f != NULL);
int header[3];
count = fread(header, sizeof(int), 3, f);
check(count == 3);
int num = header[0]; //正负样本总数
labelsize = header[1]; // labelsize = 5; [label id level x y]
X.numblocks = header[2]; // 2
X.blocksizes = (int *)malloc(X.numblocks*sizeof(int)); //(1)=1,(2)= root.h*root.w/2*31
count = fread(X.blocksizes, sizeof(int), X.numblocks, f);
check(count == X.numblocks);
2a9b0
X.regmult = (float *)malloc(sizeof(float)*X.numblocks); //0 ,1
check(X.regmult != NULL);
count = fread(X.regmult, sizeof(float), X.numblocks, f);
check(count == X.numblocks);
X.learnmult = (float *)malloc(sizeof(float)*X.numblocks);//20, 1
check(X.learnmult != NULL);
count = fread(X.learnmult, sizeof(float), X.numblocks, f);
check(count == X.numblocks);
check(num != 0);
fclose(f);
printf("%d examples with label size %d and %d blocks\n",
num, labelsize, X.numblocks);
printf("block size, regularization multiplier, learning rate multiplier\n");
dim = 0;
for (int i = 0; i < X.numblocks; i++) {
dim += X.blocksizes[i];
printf("%d, %.2f, %.2f\n", X.blocksizes[i], X.regmult[i], X.learnmult[i]);
}
// ---------------从 datfile 读取 正负 examples----------------
// examples [i] 存储了第i个样本的信息 长度为 1 int + 7 int +dim 个float + 1 byte
// 1 int legth 样本包括信息头在内的总字节长度
// 7 int [1/-1 id 0 0 0 2 dim] ,id为样本编号,[label id level centry_x centry_y],2是block个数
// dim float feature,dim=2+1+root.h*root.w/2*31,意义如下
// block1 label | block2 data|block2 lable | block2 data
// 1 | 1 | 2 | h*w/2*31个float
// 1 byte unique=0
f = fopen(datfile, "rb");
check(f != NULL);
printf("Reading examples\n");
//+,-example数据
char **examples = (char **)malloc(num*sizeof(char *));
check(examples != NULL);
for (int i = 0; i < num; i++) {
// we use an extra byte in the end of each example to mark unique
// we use an extra int at the start of each example to store the
// example's byte length (excluding unique flag and this int)
//[legth label id level x y unique] unique=0
int buf[labelsize+2];
//写入时的值为[1/-1 i 0 0 0 2 dim]
count = fread(buf, sizeof(int), labelsize+2, f);
check(count == labelsize+2);
// byte length of an example's data segment
//---前面七个是头,后面dim个float是样本特征数据,dim=2+1+root.h*root.w/2*31
int len = sizeof(int)*(labelsize+2) + sizeof(float)*buf[labelsize+1];
// memory for data, an initial integer, and a final byte
examples[i] = (char *)malloc(sizeof(int)+len+1);
check(examples[i] != NULL);
// set data segment's byte length
((int *)examples[i])[0] = len;
// set the unique flag to zero
examples[i][sizeof(int)+len] = 0;
// copy label data into example
for (int j = 0; j < labelsize+2; j++)
((int *)examples[i])[j+1] = buf[j];
// read the rest of the data segment into the example
count = fread(examples[i]+sizeof(int)*(labelsize+3), 1,
len-sizeof(int)*(labelsize+2), f);
check(count == len-sizeof(int)*(labelsize+2));
}
fclose(f);
printf("done\n");
// sort
printf("Sorting examples\n");
char **sorted = (char **)malloc(num*sizeof(char *));
check(sorted != NULL);
memcpy(sorted, examples, num*sizeof(char *));
//qsort 库函数,真正的比较函数为 comp
//从小到大,快速排序
//依次按照 样本类别->id->level->cx->cy 排序样本
//如果前面五个量都一样……
//1.等长度,比较所有字节;
//2.谁长谁小,长度不同是因为不同的component的 尺寸不一致
qsort(sorted, num, sizeof(char *), comp);
printf("done\n");
// find unique examples
// 唯一的样本,unique flag=1,
// 相同的样本第一个样本的unique flag为1,其余为0 ,有的样本的位置被,unique替代了,但是并没有完全删除掉
int i = 0;
int len = *((int *)sorted[0]); //负样本的第一个
sorted[0][sizeof(int)+len] = 1; // unique flag 置 1
for (int j = 1; j < num; j++) {
int alen = *((int *)sorted[i]);
int blen = *((int *)sorted[j]);
if (alen != blen || memcmp(sorted[i] + sizeof(int), sorted[j] + sizeof(int), alen)) //component不同 || 不同样本
{
i++;
sorted[i] = sorted[j];
sorted[i][sizeof(int)+blen] = 1; //标记为 unique
}
}
int num_unique = i+1;
printf("%d unique examples\n", num_unique);
// -------------------collapse examples----------------
// 前面是找完全不一样的样本,这里是分组
// label 的五个量 [label id level centry_x centry_y] 相同的分为一组,在detect时,写入了datfile
// 负样本的 cx,cy都是相对于整张图片的,正样本是相对于剪切后的图像
// 前面五个全相同,
// 对于phase1 不可能,因为正负样本的id都不相同
// 对于phase2 正样本只保留了最有可能是正样本的样本,只有一种情况,
// rootfilter1,rootfilter2在同一张图片(id相同),检测出来的 Hard负样本 的cx,cy相同,因此一组最多应该只能出现2个 (待验证)
// 原因是此时的latent variable 为(cx,cy,component),上述情况相下,我们只能保留component1或者component2
// 后续训练时,这两个量是连续使用的,为什么呢??
// collapse.seq(char **) 记录了每一组的第一个样本
// collapse.num 每组的个数
// X.num 组数
// X.x=&collapse[0],也就是第一个 collapse的地址
collapse(&X, sorted, num_unique);
printf("%d collapsed examples\n", X.num);
// initial model
// 读modfile文件,得到w的初始值。phase 1 初始化为全 0,phase 2 为上一次训练的结果……
double **w = (double **)malloc(sizeof(double *)*X.numblocks);//2
check(w != NULL);
f = fopen(modfile, "rb");
for (int i = 0; i < X.numblocks; i++) {
w[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]); //(1)=1,(2)= root.h*root.w/2*31
check(w[i] != NULL);
count = fread(w[i], sizeof(double), X.blocksizes[i], f);
check(count == X.blocksizes[i]);
}
fclose(f);
// lower bounds
// 读lobfile文件,初始化为全 滤波器参数下线-100 ……
double **lb = (double **)malloc(sizeof(double *)*X.numblocks);
check(lb != NULL);
f = fopen(lobfile, "rb");
for (int i = 0; i < X.numblocks; i++) {
lb[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]);
check(lb[i] != NULL);
count = fread(lb[i], sizeof(double), X.blocksizes[i], f);
check(count == X.blocksizes[i]);
}
fclose(f);
printf("Training");
//-------------------------------- train -------------------------------
//-----梯度下降发训练参数 w,参见论文 公式17 后面的步骤
gd(C, J, X, w, lb);
printf("done\n");
// save model
printf("Saving model\n");
f = fopen(modfile, "wb");
check(f != NULL);
// 存储 block1,block2的训练结果,w
for (int i = 0; i < X.numblocks; i++) {
count = fwrite(w[i], sizeof(double), X.blocksizes[i], f);
check(count == X.blocksizes[i]);
}
fclose(f);
// score examples
// ---所有的样本都的得分,没有乘以 label y
printf("Scoring\n");
double *s = score(X, examples, num, w);
// ---------Write info file-------------
printf("Writing info file\n");
f = fopen(inffile, "w");
check(f != NULL);
for (int i = 0; i < num; i++) {
int len = ((int *)examples[i])[0];
// label, score, unique flag
count = fprintf(f, "%d\t%f\t%d\n", ((int *)examples[i])[1], s[i],
(int)examples[i][sizeof(int)+len]);
check(count > 0);
}
fclose(f);
printf("Freeing memory\n");
for (int i = 0; i < X.numblocks; i++) {
free(w[i]);
free(lb[i]);
}
free(w);
free(lb);
free(s);
for (int i = 0; i < num; i++)
free(examples[i]);
free(examples);
free(sorted);
free(X.x);
free(X.blocksizes);
free(X.regmult);
free(X.learnmult);
return 0;
}
相关文章推荐
- DPM(Defomable Parts Model) 源码分析-训练(三)
- DPM(Defomable Parts Model) 源码分析-训练
- DPM(Defomable Parts Model) 源码分析-训练(三)
- DPM(Defomable Parts Model) 源码分析-训练(三)
- DPM(Defomable Parts Model) 源码分析-训练(三)
- DPM(Defomable Parts Model) 源码分析-检测(二)
- DPM(Defomable Parts Model) 源码分析
- DPM(Defomable Parts Model) 源码分析-检测(二)
- DPM(Defomable Parts Model) 源码分析-检测(二)
- DPM(Defomable Parts Model) 源码分析-检测(二)
- DPM(Defomable Parts Model) 源码分析-检测(二)
- DPM(Defomable Parts Model) 源码分析-检测(二)
- 在windows下运行Felzenszwalb的Deformable Part Model(DPM)源码voc-release3.1来训练自己的模型
- DPM——训练源码分析
- 在windows下运行Felzenszwalb的Deformable Part Model(DPM)源码voc-release3.1来训练自己的模型
- Deformable Parts Model (DPM) 简介
- asp.net mvc源码分析-Action篇 IModelBinder
- DPM(Deformable Parts Model)--原理(一)
- Asp.net MVC源码分析 -- 获取ModelBinder的优先级
- springMVC源码分析--ModelFactory