您的位置:首页 > 其它

纯手工实现adaboost(adaboost源码)

2016-05-30 11:49 363 查看
adaboost --ensemble learning的一种

说明几点 测试集ionosphere.data可以自行下载

检查过几遍 难免疏漏 有问题欢迎提出

感觉大家对adaboost的实现兴趣都不大

[cpp] view
plain copy







#include "stdio.h"

#include "assert.h"

#include "string.h"

#include "stdlib.h"

#include "math.h"

#include "adaboost.h"

#define DATA_NAME "..//dataset//ionosphere.data"

SampleHeader sampleHeader;

IdxHeader idx;

//==================================================================

//函数名: sort

//作者: qiurenbo

//日期: 2014-11-25

//功能: 冒泡排序

//输入参数:double a[]

// n 数组长度

//返回值: 无

//修改记录:

//==================================================================

void sort(double a[], int n)

{

double tmp;

for (int i = 0; i < n - 1; i++)

{

for (int j = 0; j < n - i - 1; j++)

{

if (a[j] > a[j+1])

{

tmp = a[j];

a[j] = a[j+1];

a[j+1] = tmp;

}

}

}

}

//==================================================================

//函数名: countFeature

//作者: qiurenbo

//日期: 2014-11-25

//功能: 计算一行上有多少特征

//输入参数:char* buf 文本的一行

//返回值: 特征个数

//修改记录:

//==================================================================

int countFeature(const char* buf)

{

const char* p = buf;

int cnt = 0;

while(*p != NULL)

{

if (*p == ',')

cnt++;

p++;

}

return cnt;

}

//==================================================================

//函数名: setFeature

//作者: qiurenbo

//日期: 2014-11-25

//功能: 将读取的特征分配到正负样本结构体上

//输入参数:char* buf 文本的一行

//返回值: 无

//修改记录:

//==================================================================

void setFeature(char* buf)

{

int i = 0;

struct Sample sample;

char*p = strtok(buf, ",");

sample.feature[i++] = atof(p);

while(1)

{

if (*p != 'g' && *p != 'b')

sample.feature[i++] = atof(p);

else

break;

p = strtok(NULL, ",");

}

if (*p == 'g')

sample.indicate = 1;

else if (*p == 'b')

sample.indicate = -1;

else

assert(0);

//idx 每行是所有样本的同一特征

for(i = 0; i < sampleHeader.featureNum; i++)

idx.feature[i][idx.samplesNum] = sample.feature[i];

sampleHeader.samples[sampleHeader.samplesNum] = sample;

sampleHeader.samplesNum++;

idx.samplesNum++;

};

//==================================================================

//函数名: loadData

//作者: qiurenbo

//日期: 2014-11-25

//功能: 读取文本数据

//输入参数:char* buf 文本的一行

//返回值: 无

//修改记录:

//==================================================================

void loadData()

{

FILE *fp = NULL;

char buf[1000];

int featureCnt = 0;

double* featrue = NULL;

double* featruePtr = NULL;

int i = 0;

fp = fopen(DATA_NAME, "r");

assert(fp);

fgets(buf, 1000, fp);

idx.featureNum = sampleHeader.featureNum = countFeature(buf);

setFeature(buf);

//统计样本数

while(!feof(fp))

{

fgets(buf, 1000, fp);

setFeature(buf);

}

fclose(fp);

for (i = 0; i < idx.featureNum; i++)

sort(idx.feature[i], idx.samplesNum);

}

//==================================================================

//函数名: CreateStump

//作者: qiurenbo

//日期: 2014-11-26

//功能: 创建一个stump分类器

//输入参数:无

//返回值: stump

//修改记录:

//==================================================================

Stump CreateStump()

{

int i,j,k;

Stump stump;

double min = 0xffffffff;

double err = 0;

double flipErr = 0;

double feature;

int indicate;

double weight;

double pre;

for( i = 0; i < idx.featureNum; i++)

{

pre = 0xffffffff;

for(j = 0; j < idx.samplesNum; j++)

{

err = 0;

double rootFeature = idx.feature[i][j];

//跳过相同的值

if (pre == rootFeature)

continue;

for (k = 0; k < sampleHeader.samplesNum; k++)

{

feature = sampleHeader.samples[k].feature[i];

indicate = sampleHeader.samples[k].indicate;

weight = sampleHeader.samples[k].weight;

if ((feature < rootFeature && indicate != 1) ||\

(feature >= rootFeature && indicate != -1)

)

err += weight;

}

//左边是1,还是右边是1,选取error最小的组合

flipErr = 1 - err;

err = err < flipErr ? err:flipErr;

//选取具有最小err的特征rootFeature

if (err < min)

{

min = err;

stump.fIdx = i;

stump.ft = rootFeature;

if (err < flipErr)

{

stump.left = 1;

stump.right = -1;

}

else

{

stump.left = -1;

stump.right = 1;

}

}

pre = rootFeature;

}

}

stump.alpha = 0.5*log(1.0/min - 1);

return stump;

}

//==================================================================

//函数名: reSetWeight

//作者: qiurenbo

//日期: 2014-11-26

//功能: 每次迭代重新调整权重

//输入参数:stump

//返回值: 无

//修改记录:

//==================================================================

void reSetWeight(struct Stump stump)

{

int i;

double z = 0;

//计算规范化因子z

for(i = 0; i < sampleHeader.samplesNum; i++)

{

double feature = (sampleHeader.samples[i]).feature[stump.fIdx];

double rs = feature < stump.ft ? stump.left:stump.right;

rs = stump.alpha * rs * sampleHeader.samples[i].indicate;

z += sampleHeader.samples[i].weight * exp(-1.0 * rs);

}

//调整各个样本的权值

for(i = 0; i < sampleHeader.samplesNum; i++)

{

double feature = (sampleHeader.samples[i]).feature[stump.fIdx];

double rs = feature < stump.ft ? stump.left:stump.right;

rs = stump.alpha * rs * sampleHeader.samples[i].indicate;

sampleHeader.samples[i].weight= sampleHeader.samples[i].weight * exp(-1.0 * rs) / z;

}

#ifdef DEBUG

//debug

for(i = 0; i < 10; i++)

{

double feature = (sampleHeader.samples[i]).feature[stump.fIdx];

double rs = feature < stump.ft ? stump.left:stump.right;

rs = stump.alpha * rs * sampleHeader.samples[i].indicate;

printf("weight:%lf, rs:%lf\n",sampleHeader.samples[i].weight , rs);

}

//getchar();

#endif

}

//==================================================================

//函数名: AdaBoost

//作者: qiurenbo

//日期: 2014-11-26

//功能: adaboost训练弱分类器

//输入参数:interation 迭代次数

//返回值: 无

//修改记录:

//==================================================================

void AdaBoost(int interation)

{

int i;

struct ClassifierHeader head;

struct Classifier* pCls = NULL;

struct Classifier* tmp = NULL;

head.classifierNum = interation;

loadData();

//设置初始样本权重

for(i = 0; i < sampleHeader.samplesNum; i++)

sampleHeader.samples[i].weight = 1.0 / sampleHeader.samplesNum;

head.classifier = (struct Classifier*)malloc(sizeof(struct Classifier));

pCls = head.classifier;

pCls->stump = CreateStump();

reSetWeight(pCls->stump);

//printf("completed:%lf%%\r", 1.0/head.classifierNum*100);

printf("+-----------+--+-------+\n" );

printf("| alpha |id| ft |\n");

printf("+-----------+--+-------+\n" );

printf("|%.9lf|%2d|%+.4lf|\n", pCls->stump.alpha, pCls->stump.fIdx, pCls->stump.ft);

printf( "+-----------+--+-------+\n" );

for (i = 1; i < head.classifierNum; i++)

{

pCls = pCls->next = (struct Classifier*)malloc(sizeof(struct Classifier));

pCls->stump = CreateStump();

reSetWeight(pCls->stump);

printf("|%.9lf|%2d|%+.4lf|\n", pCls->stump.alpha, pCls->stump.fIdx, pCls->stump.ft);

printf( "+-----------+--+-------+\n" );

//printf("completed:%lf%%\r", 1.0*(i+1)/head.classifierNum*100);

}

printf("\n");

for(i = 0, pCls = head.classifier; i < head.classifierNum; i++)

{

tmp = pCls;

pCls = tmp->next;

free(tmp);

}

}

void main()

{

AdaBoost(100);

}

adaboost.h

[cpp] view
plain copy







#ifndef _ADABOOST_H_

#define _ADABOOST_H_

#define MAX_FEATURE 100

#define MAX_SAMPLES 500

//#define DEBUG

struct Sample

{

double weight;

double feature[MAX_FEATURE];

int indicate;

};

struct SampleHeader

{

int samplesNum;

int featureNum;

//double feature[MAX_SAMPLES][MAX_FEATURE];

struct Sample samples[MAX_SAMPLES];

};

struct Stump

{

int left;

int right;

double alpha;

int fIdx;

double ft;

};

struct Classifier

{

struct Stump stump;

struct Classifier* next;

};

struct ClassifierHeader

{

int classifierNum;

struct Classifier* classifier;

};

struct IdxHeader

{

int samplesNum;

int featureNum;

double feature[MAX_FEATURE][MAX_SAMPLES];

};

#endif
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  adaboost