您的位置:首页 > 编程语言 > Java开发

自适应中心点个数的K-means java实现

2017-02-10 09:25 489 查看
private int minPoint[];
private int belongs[];

public static void main(String[] args) {
int times[] = { 10, 11, 123, 121, 123, 245 };
KMeans m = new KMeans();
Map<Integer, List> map = m.TimeKMeans(times);
Set<Integer> kset = map.keySet();
for (Integer integer : kset) {
System.out.println(integer);
List<Integer> ilist = map.get(integer);
for (Integer integer2 : ilist) {
System.out.print(integer2 + " ");
}
System.out.println();
}
}

// 返回值为 中心店,所在簇集的点位置的列表
public Map<Integer, List> TimeKMeans(int[] times) {

// 记录最小的bias
double fPoints[] = new double[1];
double maxRank = -1;
double[] points = null;
belongs = new int[times.length];
// 中心点数的尝试 从1个点到length个点都尝试一遍 每次均进行评分 每个点都尝试三遍
for (int i = 1; i <= times.length; i++) {
double biasRank = -1;
for (int l = 0; l < 10; l++) {
System.out.println(i + "个中心点");
Random random = new Random();
points = new double[i];

// 初始化中心点
for (int j = 0; j < i; j++) {
// 中心点在最近时间-最早时间之间随机
points[j] = times[0] + random.nextInt(times[times.length - 1] - times[0]);
}

for (int t = 0; t < 1000; t++) {
double prePoints[] = new double[i];
System.arraycopy(points, 0, prePoints, 0, i);
// System.out.println("starting clusting" + t);
biasRank = clusting(points, times, i);
if (Arrays.equals(prePoints, points)) {
// System.out.println("已收敛");
break;
}
}
System.out.println(biasRank);

if (biasRank > maxRank) {
maxRank = biasRank;

fPoints = new double[i];
System.arraycopy(points, 0, fPoints, 0, i);
System.arraycopy(minPoint, 0, belongs, 0, times.length);
}
}
}
// System.out.println("max rank:" + maxRank);

Map<Integer, List> map = new HashMap<>();
for (int i = 0; i < fPoints.length; i++) {
int t = 1;
List<Integer> ilist = new ArrayList<>();
for (int k = 0; k < times.length; k++) {
if (belongs[k] == i) {
ilist.add(k);
t++;
}
}
map.put((int) fPoints[i], ilist);

}

return map;
}

// 返回轮廓系数 衡量这次聚类的效果
private double clusting(double points[], int times[], int i) {

double[][] distanse = new double[i][times.length];
for (int j = 0; j < i; j++) {
for (int k = 0; k < times.length; k++) {
distanse[j][k] = Math.abs(times[k] - points[j]);
}
}
// 最小距离和最小距离对应的点 minPoint 是第几个元素属于第几个簇
minPoint = new int[times.length];
double minDis[] = new double[times.length];
for (int k = 0; k < times.length; k++) {
minDis[k] = Integer.MAX_VALUE;
}
for (int k = 0; k < times.length; k++) {
for (int j = 0; j < i; j++) {
if (distanse[j][k] < minDis[k]) {
minPoint[k] = j;
minDis[k] = distanse[j][k];
}
}
}
// 移动中心点
for (int j = 0; j < i; j++) {
int n = 0;
int sum = 0;
for (int k = 0; k < times.length; k++) {
if (minPoint[k] == j) {
sum += times[k];
n++;
}
}
if (sum != 0 && n != 0)
points[j] = devide(sum, n);
else
points[j] = times[0];
}

// 得到每个簇的成分数量
int clasCount[] = new int[i];
for (int k = 0; k < times.length; k++) {
clasCount[minPoint[k]]++;
}

// 如果某个簇里没有成分 那么中心点过多 直接返回最低评分
for (int j : clasCount) {
if (j == 0)
return -1;

}

// 计算廊阔值 衡量误差
// double biasRank = 0;
// for (int j = 0; j &l
b783
t; i; j++) {
// int disSum[] = new int[i];
// double separation = 0;
// double cohension = 0;
// // 得到该中心点与其他簇的距离
// for (int k = 0; k < times.length; k++) {
// disSum[minPoint[k]] += distanse[j][k];
// }
// double disScore[] = new double[i];
// for (int p = 0; p < disSum.length; p++) {
// //中心点到每个簇的距离除以簇内点数
// disScore[p] = devide(disSum[p], clasCount[p]);
// }
// cohension = disScore[j];
// Arrays.sort(disScore);
// separation = disScore[0];
// if(cohension==0)
// cohension=0.01;
// if (cohension == separation && disScore.length >= 2)
// separation = disScore[1];
// biasRank += devide(separation - cohension, Math.max(separation,
// cohension));
// }

double biasRank = 0;
double[][] allDistanse = new double[times.length][times.length];
for (int j = 0; j < times.length; j++) {
for (int k = 0; k < times.length; k++) {
allDistanse[j][k] = Math.abs(times[k] - times[j]);
}
}
double ranks[] = new double[points.length];
// 这个点到其余簇的距离
for (int j = 0; j < times.length; j++) {
double cohension = 0;
double separation = 0;
int disSum[] = new int[points.length];
double disRank[] = new double[points.length];
// 填充dissum 该点到各簇的距离和 dissum[该店][第几簇]
for (int k = 0; k < times.length; k++) {
disSum[minPoint[k]] += allDistanse[j][k];
}
for (int p = 0; p < disSum.length; p++) {
disRank[p] = devide(disSum[p], clasCount[p]);
}
cohension = disRank[0]*1.1;
if (cohension == 0)
cohension = 0.01;
Arrays.sort(disRank);
//擅自调整了分离度 使分离度评分变低 强调聚合度
separation = disRank[0]*0.9;
if (cohension == separation && disRank.length >= 2)
separation = disRank[1];
biasRank += devide( devide(separation - cohension, Math.max(separation,cohension)),times.length);
}

return devide(biasRank, i);

}

private double devide(int a, int b) {
BigDecimal b1 = new BigDecimal(a);
BigDecimal b2 = new BigDecimal(b);
return b1.divide(b2, 10, RoundingMode.HALF_UP).doubleValue();
}

private double devide(double a, double b) {
BigDecimal b1 = new BigDecimal(a);
BigDecimal b2 = new BigDecimal(b);
return b1.divide(b2, 10, RoundingMode.HALF_UP).doubleValue();
}

private double devide(double a, int b) {
BigDecimal b1 = new BigDecimal(a);
BigDecimal b2 = new BigDecimal(b);
return b1.divide(b2, 10, RoundingMode.HALF_UP).doubleValue();
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  java