您的位置:首页 > 其它

基于HOG特征的Adaboost行人检测

2014-03-14 16:24 477 查看
原地址:http://blog.csdn.net/van_ruin/article/details/9166591

1.方向梯度直方图(Histogramof Oriented Gradient, HOG)特征是一种在计算机视觉和图像处理中用来进行物体检测的特征描述子。它通过计算和统计图像局部区域的梯度方向直方图来构成特征。基本知识可以参考博客:http://blog.csdn.net/zouxy09/article/details/7929348

2.Adaboost的基础知识可以参考书籍:统计学习方法,第八章-提升方法adaboost。

这里利用HOG来训练Adaboost行人检测。在Haar-Adaboost算法中,弱分类器仅对一维分类。但是在Hog特征中,特征是每个block的串联。如果仅对一维分类(一个cell的其中一个方向的权值),就不能有效利用block的归一化效果。所以我们使用logistic弱分类器对每个block进行分类(实验中,每个block包含4个cell,每个cell有9个bin,即36维特征)。

本实验需要注意的地方:
1.  adaboost误差率需要计算权重
2.  logistic回归需要使用带权重的logistic分类器
3.  logistic分类可能与数据分布相反。需要计算两次。(相反的情况下,拟合没有意义,需要将数据反转(1->0,0->1))
发现总结与问题
1. 公理1.  对于任何数据的二值分类,能够得到大于等于0.5的线性分类器。
2. 推论. 对于任何带权重数据的二值分类,能够得到大于等于0.5的线性分类器。
3. 推论?  对于任何带权重数据的n值分类,能够得到大于等于1/n的线性分类器。
4. 对于与logistic函数分布相反的数据,应该如何处理?(本实验的处理方式如前面所述)。
实验结果后的猜想
ß猜想1:Adaboost弱分类器所选取的特征仍然要保持一定的颗粒度。像素级的特征是无效的。
实验结果与分析
训练集: 500/500;测试集: 19/22(200个弱分类器)
测试数据较少,但是训练集的高正确率至少证明其能够由弱分类器(错误率普遍在0.25左右)提高样本数据集的精度。
17张图片中,有部分图片较模糊,行人影像较小,可能导致难以分辨。

下面给出代码,希望各位能够指正错误。说明:本代码全部由自己编写,所用函数未调用OpenCV实用库函数,及机器学习库函数(基本数据除外)。

/***********************************************************/
/** Copyright by Weidi Xu, S.C.U.T in Guangzhou, Guangdong**/
/***********************************************************/

#include <opencv2\opencv.hpp>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <ctime>

using std::clock_t;
using std::clock;
using namespace cv;

//const parameters for image
const int NUM_NEGIMAGE = 1000;
const int NUM_POSIMAGE = 500;
const int NUM_IMAGE = 1500;
const int NUM_TESTIMAGE = 22;
const int MAX_DIMENSION = 3781;
const int IMAGE_ROWS = 128;
const int IMAGE_COLS = 64;
const int CELLSIZE = 8;
const int BLOCKSIZE = 16;
const int MOVELENGTH = 8;
const int BINSIZE = 9;
const double PI = 2*acos(0.0);
const double eps = 1e-8;

//mediate parameter
const int NUM_BLOCK_ROWS = (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH+1;
const int NUM_BLOCK_COLS = (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH+1;
const int NUM_BLOCK_FEATURES = (BLOCKSIZE/CELLSIZE)*(BLOCKSIZE/CELLSIZE)*BINSIZE+1;//zero for theta[0]

//data from image
//since the features in the adaboost should contain a single block, it's better to define the feature of 3-dimension;
double features[NUM_IMAGE][NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES];
double type[NUM_IMAGE]; //1 - pos, 0 - neg
double y[NUM_IMAGE]; //1 - pos, -1 - neg

//number of weak classifier(changing in experiment)
const int NUM_WEAKCLASSIFIER = 100;

//data for adaboost
double weight[NUM_IMAGE];

//logistic function(dimension is given by NUM_BLOCK_FEATURES(37 in this setting))
double logistic(double theta[], double x[])
{
double ans = 0;
for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
{
ans += theta[i]*x[i];
}
return 1/(1+std::exp(-ans));
}

struct WeakClassifier
{
double _theta[NUM_BLOCK_FEATURES]; //threshold classifier
int _index_row;  //classify by the features in block[_block_row][_block_y]
int _index_col;
int _isreverse; //1 for (> := pos, < := neg); 0 for (< := pos, >:= neg)
double _alpha;
double _error;
void clear()
{
memset(_theta, 0.0, NUM_BLOCK_FEATURES*sizeof(double));
_alpha = 0.0;
_error = 1;
_index_row = -1;
_index_col = -1;
_isreverse = true;
}

//return 1 or -1
int cal(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])
{
int ans = logistic(_theta, x[_index_row][_index_col]);
if(ans > 0.5)
{
if(_isreverse)
return -1;
else
return 1;
}
else
{
if(_isreverse)
return 1;
else
return -1;
}
}

void print()
{
//theta
for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
printf("%lf ", _theta[i]);
printf("\n");

//int _index_row;
printf("%d ",_index_row);

//int _index_col;
printf("%d ",_index_col);

//int _isreverse; //1 for (> := pos, < := neg); 0 for (< := pos, >:= neg)
printf("%d ",_isreverse);

//double _alpha;
printf("%lf ",_alpha);

//double _error;
printf("%lf \n",_error);
}
}weakClassifier[NUM_WEAKCLASSIFIER];

//Util Function
double arc2angle(double arc)
{
return arc/PI*180.0;
}

double angle2arc(double angle)
{
return angle/180.0*PI;
}

void posfilename(int i, char* filename)
{
sprintf(filename, "pos/pos (%d).png", i);
return;
}

void negfilename(int i, char* filename)
{
sprintf(filename, "neg/neg (%d).png", i);
return;
}

void testfilename(int i, char* filename)
{
sprintf(filename, "test_pos/test (%d).png", i);
return ;
}

//I(x,y) = sqrt(I(x,y))
void normalizeImage(Mat& inputImage)
{
// accept only char type matrices
CV_Assert(inputImage.depth() != sizeof(uchar));
int channels = inputImage.channels();
int nRows = inputImage.rows ;
int nCols = inputImage.cols* channels;
if (inputImage.isContinuous())
{
nCols *= nRows;
nRows = 1;
}
int i,j;
uchar* p;
for( i = 0; i < nRows; ++i)
{
p = inputImage.ptr<uchar>(i);
for ( j = 0; j < nCols; ++j)
{
p[j] = int(sqrt(p[j]*1.0));
}
}
return;
}

//I(x,y) 第一维的梯度为xGradient
void calGredient(const Mat& inputImage, double xGradient[IMAGE_ROWS][IMAGE_COLS], double yGradient[IMAGE_ROWS][IMAGE_COLS])
{
uchar* dataptr = inputImage.data;
int nrows = inputImage.rows;
int ncols = inputImage.cols;

//cal xgradient
for(int i = 1 ; i < nrows - 1; i++)
{
for(int j = 0 ; j < ncols; j++)
{
xGradient[i][j] = inputImage.at<uchar>(i+1,j) - inputImage.at<uchar>(i-1,j);
}
}

//cal margin
for(int i = 0 ; i < ncols; i++)
{
xGradient[0][i] = (inputImage.at<uchar>(1,i) - inputImage.at<uchar>(0,i))*2;
xGradient[nrows-1][i] = (inputImage.at<uchar>(nrows-1,i) - inputImage.at<uchar>(nrows-2,i))*2;
}

//cal ygradient
for(int i = 0 ; i < nrows ; i++)
{
for(int j = 1 ; j < ncols - 1; j++)
{
yGradient[i][j] = inputImage.at<uchar>(i,j+1) - inputImage.at<uchar>(i,j-1);
}
}

//cal margin
for(int i = 0 ; i < nrows; i++)
{
xGradient[i][0] = (inputImage.at<uchar>(i,1) - inputImage.at<uchar>(i,0))*2;
xGradient[i][ncols-1] = (inputImage.at<uchar>(i,ncols-1) - inputImage.at<uchar>(i,ncols-2))*2;
}
}

//cal the HogFeatures by block
void calHogFeatures(Mat& inputImage, double outputFeature[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])
{
int nrows = inputImage.rows;
int ncols = inputImage.cols;
int type = inputImage.type();

if(nrows != IMAGE_ROWS || ncols != IMAGE_COLS)
abort();

//cal x,yGradient
double xGradient[IMAGE_ROWS][IMAGE_COLS];
double yGradient[IMAGE_ROWS][IMAGE_COLS];
calGredient(inputImage, xGradient, yGradient);

//computation median
double gradient[IMAGE_ROWS][IMAGE_COLS];
double direction[IMAGE_ROWS][IMAGE_COLS];

for(int i = 0 ; i < nrows; i++)
{
for(int j = 0 ; j < ncols; j++)
{
double gx = xGradient[i][j];
double gy = yGradient[i][j];
gradient[i][j] = sqrt(gx*gx + gy*gy);
direction[i][j] = arc2angle(atan2(gy, gx));
}
}

//compute cellinfo 8*8
double cellinfo[IMAGE_ROWS/CELLSIZE][IMAGE_COLS/CELLSIZE][BINSIZE];
memset(cellinfo, 0, sizeof(cellinfo));

for(int i = 0; i < IMAGE_ROWS/CELLSIZE; i++)
{
for(int j = 0 ; j < IMAGE_COLS/CELLSIZE; j++)
{
double* cell = cellinfo[i][j];

//cal single cellinfo of 8*8
for(int ci = 0 ; ci < CELLSIZE; ci++)
{
for(int cj = 0; cj < CELLSIZE; cj++)
{
//find org pix;
int px = i*CELLSIZE + ci;
int py = j*CELLSIZE + cj;

int binindex = int((direction[px][py]+180.0)/(360.0/BINSIZE));
//handle bound
if(fabs(direction[px][py]-180) < eps)
{
binindex = BINSIZE-1;
}
if(fabs(direction[px][py]+180) < eps)
{
binindex = 0;
}
if(binindex < 0 || binindex >= BINSIZE)
{
printf("Wrong binindex: %d %lf %lf %lf", binindex, xGradient[px][py], yGradient[px][py], direction[px][py]);
abort();
}

cell[binindex] += gradient[px][py];
}
}
}
}

/*double blockinfo[(IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH+1][(IMAGE_COLS-BLOCKSIZE)/MOVELENGTH+1][(BLOCKSIZE/CELLSIZE)*(BLOCKSIZE/CELLSIZE)*BINSIZE];*/

if(MOVELENGTH%CELLSIZE != 0)
{
printf("MOVELENGTH%CELLSIZE != 0");
abort();
}

//cal blockinfo
for(int i = 0 ; i < (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH + 1; i++)
{
for(int j = 0 ; j < (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH + 1; j++)
{
int bfindex = 0; outputFeature[i][j][bfindex++] = 1;

//cal the position of this block
for(int c1 = 0; c1 < BLOCKSIZE/CELLSIZE; c1++)
{
for(int c2 = 0 ; c2 < BLOCKSIZE/CELLSIZE; c2++)
{
//cal the index of cell
int cx = i*MOVELENGTH/CELLSIZE+c1;
int cy = j*MOVELENGTH/CELLSIZE+c2;

for(int binindex = 0 ; binindex < BINSIZE; binindex++)
{
outputFeature[i][j][bfindex++] = cellinfo[cx][cy][binindex];
}
}
}
}
}
return;
}

//use global variables
void trainLogisticRegression(int block_row,int block_col, double theta[], double& errorrate, int& isreverse)
{
double theta1[NUM_BLOCK_FEATURES], theta2[NUM_BLOCK_FEATURES];
memset(theta1, 0, NUM_BLOCK_FEATURES*sizeof(double));
memset(theta2, 0, NUM_BLOCK_FEATURES*sizeof(double));
double errorrate1 = 0;
double errorrate2 = 0;
double rightnum1 = 0;
double rightnum2 = 0;
isreverse = 0;

//cal parameter thetas
for(int k = 0 ; k < 100000; k++)
{
int i = rand()%NUM_IMAGE;
int j = rand()%NUM_BLOCK_FEATURES;
theta1[j] = theta1[j] + weight[i]*0.01*(type[i] - logistic(theta1, features[i][block_row][block_col]))*features[i][block_row][block_col][j];
}

for(int i = 0 ; i < NUM_IMAGE; i++)
{
double tmp = logistic(theta1, features[i][block_row][block_col]);
if(tmp > 0.5 && fabs(type[i] - 1) < eps)
rightnum1 += 1.0*weight[i];
if(tmp < 0.5 && fabs(type[i] - 0) < eps)
rightnum1 += 1.0*weight[i];
}
errorrate1 = 1 - rightnum1;

//calreverse
for(int k = 0 ; k < 100000; k++)
{
int i = rand()%NUM_IMAGE;
int j = rand()%NUM_BLOCK_FEATURES;
theta2[j] = theta2[j] + weight[i]*0.01*(1- type[i] - logistic(theta2, features[i][block_row][block_col]))*features[i][block_row][block_col][j];
}

for(int i = 0 ; i < NUM_IMAGE; i++)
{
double tmp = logistic(theta2, features[i][block_row][block_col]);
if(tmp > 0.5 && fabs(type[i] - 0) < eps)
rightnum2 += 1.0*weight[i];
if(tmp < 0.5 && fabs(type[i] - 1) < eps)
rightnum2 += 1.0*weight[i];
}
errorrate2 = 1 - rightnum2;

if(errorrate1 < errorrate2)
{
for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
{
theta[i] = theta1[i];
}
isreverse = 0;
errorrate = errorrate1 + eps;
}
else
{
for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
{
theta[i] = theta2[i];
}
isreverse = 1;
errorrate = errorrate2 + eps;
}
return;
}

WeakClassifier trainClassifier()
{
WeakClassifier ansclassifier;
double theta[NUM_BLOCK_FEATURES];
double errorrate = 1;
int isreverse = 0;
double best_theta[NUM_BLOCK_FEATURES];
double best_errorrate = 1;
int best_row = -1;
int best_col = -1;
int best_isreverse = 0;

//select best weak classifier
for(int i = 0 ; i < NUM_BLOCK_ROWS; i++)
{
for(int j = 0 ; j < NUM_BLOCK_COLS; j++)
{
trainLogisticRegression(i,j,theta,errorrate, isreverse);

if(errorrate < 0)
{
printf("Wrong errorrate < 0 : %lf", errorrate);
abort();
}

if(errorrate < best_errorrate)
{
for(int tempi = 0 ; tempi < NUM_BLOCK_FEATURES; tempi++)
{
best_theta[tempi] = theta[tempi];
}
best_errorrate = errorrate;
best_row = i;
best_col = j;
best_isreverse = isreverse;
}
}
}

if(best_errorrate > 0.5)
{
printf("The best_errorrate is greater than 0.5.\n");
abort();
}

//set parameters;
ansclassifier._alpha = 1.0/2*std::log((1-best_errorrate)/best_errorrate);
ansclassifier._error = best_errorrate;
ansclassifier._index_col = best_col;
ansclassifier._index_row = best_row;
ansclassifier._isreverse = best_isreverse;
for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) ansclassifier._theta[i] = best_theta[i];

return ansclassifier;
}

int calByStrongClassifier(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])
{
double ans = 0;
for(int i = 0 ; i < NUM_WEAKCLASSIFIER; i++)
{
ans += weakClassifier[i]._alpha * weakClassifier[i].cal(x);
}
if(ans > 0)
return 1;
else
return -1;
}

/*
size: 128*64;
type: CV_8UC1;
Block大小为18*18;
Cell大小为6*6;
Block在检测窗口中上下移动尺寸为6*6;
1个cell的梯度直方图化成9个bin;
滑动窗口在检测图片中滑动的尺寸为6*6;
*/

int main()
{
char filename[100];
IplImage* inputImage = NULL;
clock_t timecount = clock();

//load posimage
for(int i = 0 ; i < NUM_POSIMAGE; i++)
{
posfilename(i+1 ,filename);

//load grey image: set the parameter to 0;
inputImage = cvLoadImage(filename, 0);

//cal features;
Mat inputMat(inputImage);
calHogFeatures(inputMat, features[i]);
type[i] = 1;
y[i] = 1;
//printf("%d \n", inputMat.cols);

//release memory
inputMat.release();
cvReleaseImage(&inputImage);
inputImage = NULL;
}

printf("The feature process of pos-image have done in %d second.\n", (clock()-timecount)/1000);
timecount = clock();

//load neg images
for(int i = 0; i < NUM_NEGIMAGE; i++)
{
negfilename(i+1, filename);

//load grey image: set the parameter to 0;
inputImage = cvLoadImage(filename, 0);
type[NUM_POSIMAGE+i] = 0;
y[NUM_POSIMAGE+i] = -1;

Mat inputMat(inputImage);
calHogFeatures(inputMat, features[NUM_POSIMAGE+i]);

//release memory
inputMat.release();
cvReleaseImage(&inputImage);
inputImage = NULL;
}

printf("The feature process of neg-image have done in %d second.\n", (clock()-timecount)/1000);
timecount = clock();

//init weight array
for(int i = 0 ; i < NUM_IMAGE; i++)
{
weight[i] = 1.0/NUM_IMAGE;
}

//freopen
freopen("HOG_CLASSIFIER.txt", "w", stdout);

//print number of weakclassifiers;
printf("%d\n", NUM_WEAKCLASSIFIER);

//adaboost framework
for(int classifierindex = 0 ; classifierindex < NUM_WEAKCLASSIFIER; classifierindex++)
{
weakClassifier[classifierindex] = trainClassifier();

double error = weakClassifier[classifierindex]._error;
double alpha = weakClassifier[classifierindex]._alpha;

//printf("%d classifier: %lf ====\n",classifierindex, error);
//printf("_index_row %d _index_col %d\n", weakClassifier[classifierindex]._index_row, weakClassifier[classifierindex]._index_col);

double identitysum = 0;
for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++)
{
weight[sampleindex] *= std::exp(-alpha*y[sampleindex]*weakClassifier[classifierindex].cal(features[sampleindex]));
identitysum += weight[sampleindex];
}

//reweight
for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++)
{
weight[sampleindex] /= identitysum;
}

weakClassifier[classifierindex].print();
}

freopen("CON", "w", stdout);
int rightnum = 0;
for(int testindex = 0 ;testindex < NUM_TESTIMAGE; testindex ++)
{
//posfilename(testindex+1, filename);
testfilename(testindex+1, filename);
inputImage = cvLoadImage(filename, 0);

double testfeatures[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES];
memset(testfeatures, 0, sizeof(testfeatures));

Mat inputMat(inputImage);
calHogFeatures(inputMat, testfeatures);

if(calByStrongClassifier(testfeatures) == 1)
{
rightnum++;
//printf("Yes\n");
}
else
//printf("No\n");

inputMat.release();
}
printf("Accuracy: %d\n", rightnum);
}
//测试数据是网上流行的128*64灰度行人图像数据。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: