您的位置：首页 > 其它

机器学习-采用决策树对wine分类

2017-11-15 21:37 477 查看

1.数据集的准备

我采用UCI中的Wine Data Set下载地址：http://download.csdn.net/download/tiankong_/10120450数据描述:第一列为类属性，用1,2,3表示，后面13列为特征属性，分别为Alcohol，Malicacid，Ash，Alcalinity of ash，Magnesium，Total phenols，Flavanoids，Nonflavanoid phenols，Proanthocyanins，Color intensity，Hue，OD280/OD315 of diluted wines，Proline

2.代码实例

#include "opencv2/ml/ml.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/core/utility.hpp"
#include <stdio.h>
#include <string>
#include <map>
#include <vector>
#include<iostream>

using namespace std;
using namespace cv;
using namespace cv::ml;

static void help()
{
printf(
"\nThis sample demonstrates how to use different decision trees and forests including boosting and random trees.\n"
"Usage:\n\t./tree_engine [-r <response_column>] [-ts type_spec] <csv filename>\n"
"where -r <response_column> specified the 0-based index of the response (0 by default)\n"
"-ts specifies the var type spec in the form ord[n1,n2-n3,n4-n5,...]cat[m1-m2,m3,m4-m5,...]\n"
"<csv filename> is the name of training data file in comma-separated value format\n\n");
}

static void train_and_print_errs(Ptr<StatModel> model, const Ptr<TrainData>& data)
{
bool ok = model->train(data);
if (!ok)
{
printf("Training failed\n");
}
else
{
printf("train error: %f\n", model->calcError(data, false, noArray()));
printf("test error: %f\n\n", model->calcError(data, true, noArray()));
}
}

int main(int argc, char** argv)
{
if (argc < 2)
{
help();
return 0;
}
const char* filename = 0;
int response_idx = 0;
std::string typespec;

for (int i = 1; i < argc; i++)
{
if (strcmp(argv[i], "-r") == 0)
sscanf(argv[++i], "%d", &response_idx);
else if (strcmp(argv[i], "-ts") == 0)
typespec = argv[++i];
else if (argv[i][0] != '-')
filename = argv[i];
else
{
printf("Error. Invalid option %s\n", argv[i]);
help();
return -1;
}
}

printf("\nReading in %s...\n\n", filename);
const double train_test_split_ratio = 0.5;
//加载训练数据
Ptr<TrainData> data = TrainData::loadFromCSV(filename, 0, response_idx, response_idx + 1, typespec);
if (data.empty())
{
printf("ERROR: File %s can not be read\n", filename);
return 0;
}

data->setTrainTestSplitRatio(train_test_split_ratio);
//预测数据
float test1[] = { 14.23, 1.71, 2.43, 15.6, 127, 2.8, 3.06, .28, 2.29, 5.64, 1.04, 3.92, 1065 };
float test2[] = { 12.37, .94, 1.36, 10.6, 88, 1.98, .57, .28, .42, 1.95, 1.05, 1.82, 520 };
float test3[] = { 12.86, 1.35, 2.32, 18, 122, 1.51, 1.25, .21, .94, 4.1, .76, 1.29, 630 };
Mat test1Map(1, 13, CV_32FC1, test1);
Mat test2Map(1, 13, CV_32FC1, test2);
Mat test3Map(1, 13, CV_32FC1, test3);

printf("======DTREE=====\n");
//创建决策树
Ptr<DTrees> dtree = DTrees::create();
dtree->setMaxDepth(10);    //设置决策树的最大深度
dtree->setMinSampleCount(2);  //设置决策树叶子节点的最小样本数
dtree->setRegressionAccuracy(0);  //设置回归精度
dtree->setUseSurrogates(false);   //不使用替代分叉属性
dtree->setMaxCategories(16);   //设置最大的类数量
dtree->setCVFolds(0);  //设置不交叉验证
dtree->setUse1SERule(false);  //不使用1SE规则
dtree->setTruncatePrunedTree(false);  //不对分支进行修剪
dtree->setPriors(Mat());  //设置先验概率
train_and_print_errs(dtree, data);
dtree->save("dtree_result.xml");
//读取模型，强行使用一下，为了强调这种用法，当然此处完全没必要
Ptr<DTrees> dtree2 = DTrees::load<DTrees>("dtree_result.xml");
cout << dtree2->predict(test1Map) << endl;
cout << dtree2->predict(test2Map) << endl;
cout << dtree2->predict(test3Map) << endl;
cout << "============================================" << endl;
return 0;
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航