knn算法C++实现
2016-05-12 21:02
591 查看
knn的C++实现,有不对的地方请各位指正!谢谢!
knn原理请见:http://blog.csdn.net/u013593585/article/details/51284537
#include <iostream>
#include <vector>
#include <fstream>
#include <string>
#include <sstream>
using namespace std;
typedef vector<float> Tuple;
const int k = 4;
const int dims = 3;
const int categories = 3; // 1,2,3
int RunKnn(vector<Tuple>& trainData,vector<int>& label, Tuple& testData, int count);
float getDist(Tuple& trainData, Tuple& testData);
void sortDist(Tuple& dists, vector<int>& label, int count);
void showDist(Tuple& dists, int count);
int main()
{
// 准备训练数据
cout << "Preparing the Trainning data----" << endl;
string fileName = "datingTestSet.txt";
fstream file(fileName);
if (!file)
{
cout << "can not open the file " << endl;
return 0;
}
vector<Tuple> trainData;
vector<int> label;
int count = 0;
while(!file.eof())
{
string str;
getline(file,str);
stringstream ss(str);
ss << str;
Tuple temp(dims,0);
for(int i = 0; i < dims; i++)
ss >> temp[i];
trainData.push_back(temp);
int t;
ss >> t;
label.push_back(t);
count ++;
}
//训练数据归一化
Tuple maxnum(dims,0);
Tuple minnum(dims,0);
for (int i = 0; i < count; i++)
{
for (int j = 0; j < dims; j++)
{
if (trainData[i][j] > maxnum[j])
maxnum[j] = trainData[i][j];
if (trainData[i][j] < minnum[j])
minnum[j] = trainData[i][j];
}
}
float chazhi;
for (int i = 0; i < dims; i++)
{
chazhi = maxnum[i] - minnum[i];
for (int j = 0; j < count; j++)
trainData[j][i] = (trainData[j][i] - minnum[i])/chazhi;
}
//准备测试数据
cout << "Preparing the testing data-----" << endl;
int ind = 80;
Tuple testData = trainData[ind];
int truthLabel = label[ind];
//knn开始
cout << "Running" << endl;
int testLabel = RunKnn(trainData,label,testData,count);
cout << "testLabel: " << testLabel << endl;
cout << "truthLabel: " << truthLabel << endl;
if(testLabel == truthLabel)
cout << "Yes!" << endl;
else
cout << "No!" << endl;
system("pause");
return 0;
}
float getDist(Tuple& trainData, Tuple& testData)
{
float dist = 0;
for(int i = 0; i < dims; i++)
dist += (trainData[i] - testData[i]) * (trainData[i] - testData[i]);
return dist;
}
void sortDist(Tuple& dists, vector<int>& label,int count)
{
for (int i = 1; i < count; i++)
{
for (int j = 0; j < count-i-1; j++)
{
if(dists[j] > dists[j+1])
{
float temp1 = dists[j];
dists[j] = dists[j+1];
dists[j+1] = temp1;
int temp2 = label[j];
label[j] = label[j+1];
label[j+1] = temp2;
}
}
}
}
int RunKnn(vector<Tuple>& trainData,vector<int>& label, Tuple& testData, int count)
{
//计算距离
Tuple dists(count,0);
for(int i = 0; i < count; i++)
dists[i] = getDist(trainData[i],testData);
showDist(dists,count);
//距离排序
sortDist(dists,label,count);
showDist(dists,count);
//统计前k个;
int sumLabels[3] = {0};
for(int i = 0; i < k; i++)
sumLabels[label[i]-1] ++;
//统计最多的那个label
int temp = 0;
int testLabel;
for(int i = 0; i < k; i++)
{
if(sumLabels[i] > temp)
{
temp = sumLabels[i];
testLabel = i + 1;
}
}
return testLabel;
}
void showDist(Tuple& dists, int count)
{
for (int i = 0; i < count; i++)
cout << dists[i] << " ";
cout << endl;
}
knn原理请见:http://blog.csdn.net/u013593585/article/details/51284537
#include <iostream>
#include <vector>
#include <fstream>
#include <string>
#include <sstream>
using namespace std;
typedef vector<float> Tuple;
const int k = 4;
const int dims = 3;
const int categories = 3; // 1,2,3
int RunKnn(vector<Tuple>& trainData,vector<int>& label, Tuple& testData, int count);
float getDist(Tuple& trainData, Tuple& testData);
void sortDist(Tuple& dists, vector<int>& label, int count);
void showDist(Tuple& dists, int count);
int main()
{
// 准备训练数据
cout << "Preparing the Trainning data----" << endl;
string fileName = "datingTestSet.txt";
fstream file(fileName);
if (!file)
{
cout << "can not open the file " << endl;
return 0;
}
vector<Tuple> trainData;
vector<int> label;
int count = 0;
while(!file.eof())
{
string str;
getline(file,str);
stringstream ss(str);
ss << str;
Tuple temp(dims,0);
for(int i = 0; i < dims; i++)
ss >> temp[i];
trainData.push_back(temp);
int t;
ss >> t;
label.push_back(t);
count ++;
}
//训练数据归一化
Tuple maxnum(dims,0);
Tuple minnum(dims,0);
for (int i = 0; i < count; i++)
{
for (int j = 0; j < dims; j++)
{
if (trainData[i][j] > maxnum[j])
maxnum[j] = trainData[i][j];
if (trainData[i][j] < minnum[j])
minnum[j] = trainData[i][j];
}
}
float chazhi;
for (int i = 0; i < dims; i++)
{
chazhi = maxnum[i] - minnum[i];
for (int j = 0; j < count; j++)
trainData[j][i] = (trainData[j][i] - minnum[i])/chazhi;
}
//准备测试数据
cout << "Preparing the testing data-----" << endl;
int ind = 80;
Tuple testData = trainData[ind];
int truthLabel = label[ind];
//knn开始
cout << "Running" << endl;
int testLabel = RunKnn(trainData,label,testData,count);
cout << "testLabel: " << testLabel << endl;
cout << "truthLabel: " << truthLabel << endl;
if(testLabel == truthLabel)
cout << "Yes!" << endl;
else
cout << "No!" << endl;
system("pause");
return 0;
}
float getDist(Tuple& trainData, Tuple& testData)
{
float dist = 0;
for(int i = 0; i < dims; i++)
dist += (trainData[i] - testData[i]) * (trainData[i] - testData[i]);
return dist;
}
void sortDist(Tuple& dists, vector<int>& label,int count)
{
for (int i = 1; i < count; i++)
{
for (int j = 0; j < count-i-1; j++)
{
if(dists[j] > dists[j+1])
{
float temp1 = dists[j];
dists[j] = dists[j+1];
dists[j+1] = temp1;
int temp2 = label[j];
label[j] = label[j+1];
label[j+1] = temp2;
}
}
}
}
int RunKnn(vector<Tuple>& trainData,vector<int>& label, Tuple& testData, int count)
{
//计算距离
Tuple dists(count,0);
for(int i = 0; i < count; i++)
dists[i] = getDist(trainData[i],testData);
showDist(dists,count);
//距离排序
sortDist(dists,label,count);
showDist(dists,count);
//统计前k个;
int sumLabels[3] = {0};
for(int i = 0; i < k; i++)
sumLabels[label[i]-1] ++;
//统计最多的那个label
int temp = 0;
int testLabel;
for(int i = 0; i < k; i++)
{
if(sumLabels[i] > temp)
{
temp = sumLabels[i];
testLabel = i + 1;
}
}
return testLabel;
}
void showDist(Tuple& dists, int count)
{
for (int i = 0; i < count; i++)
cout << dists[i] << " ";
cout << endl;
}
相关文章推荐
- C语言之sprintf()字符串的构造
- C/C++中const关键字详解
- C++ 类的静态成员详细讲解
- 《离散数学》用C++实现第二类Stirling数的递归与(非递归)递推的方法 ,并且在实现了s(n-k)=S(n-1,k-1)+k*S(n-1,k)用递归、非递归方式之后,比教两个实验的调试速度、耗时
- 多连块(c++ 模拟法)
- C++对象内存模型
- 单循环链表(C语言实现)
- PAT (Basic Level) Practise (中文)1042. 字符统计(20)
- C++中的类与对象
- Win7 64下Visual C++ 6.0不兼容
- C++ auto_ptr智能指针的用法
- 操作系统c实现银行家算法
- c++ 多线程——c++复习(十)
- Understanding C/C++ Strict Aliasing
- vc++6.0的scanf问题
- MOOC北京理工《C语言程序设计(上)》第5周第2题:寻找特殊偶数
- C++ 容器及选用总结
- MOOC北京理工《C语言程序设计(上)》第5周第1题:锻炼身体吧
- C++中int与string的相互转换
- xcode 中 c++ Standard Library的选择 和 如何混编c++