自适应中心点个数的K-means java实现
2017-02-10 09:25
489 查看
private int minPoint[]; private int belongs[]; public static void main(String[] args) { int times[] = { 10, 11, 123, 121, 123, 245 }; KMeans m = new KMeans(); Map<Integer, List> map = m.TimeKMeans(times); Set<Integer> kset = map.keySet(); for (Integer integer : kset) { System.out.println(integer); List<Integer> ilist = map.get(integer); for (Integer integer2 : ilist) { System.out.print(integer2 + " "); } System.out.println(); } } // 返回值为 中心店,所在簇集的点位置的列表 public Map<Integer, List> TimeKMeans(int[] times) { // 记录最小的bias double fPoints[] = new double[1]; double maxRank = -1; double[] points = null; belongs = new int[times.length]; // 中心点数的尝试 从1个点到length个点都尝试一遍 每次均进行评分 每个点都尝试三遍 for (int i = 1; i <= times.length; i++) { double biasRank = -1; for (int l = 0; l < 10; l++) { System.out.println(i + "个中心点"); Random random = new Random(); points = new double[i]; // 初始化中心点 for (int j = 0; j < i; j++) { // 中心点在最近时间-最早时间之间随机 points[j] = times[0] + random.nextInt(times[times.length - 1] - times[0]); } for (int t = 0; t < 1000; t++) { double prePoints[] = new double[i]; System.arraycopy(points, 0, prePoints, 0, i); // System.out.println("starting clusting" + t); biasRank = clusting(points, times, i); if (Arrays.equals(prePoints, points)) { // System.out.println("已收敛"); break; } } System.out.println(biasRank); if (biasRank > maxRank) { maxRank = biasRank; fPoints = new double[i]; System.arraycopy(points, 0, fPoints, 0, i); System.arraycopy(minPoint, 0, belongs, 0, times.length); } } } // System.out.println("max rank:" + maxRank); Map<Integer, List> map = new HashMap<>(); for (int i = 0; i < fPoints.length; i++) { int t = 1; List<Integer> ilist = new ArrayList<>(); for (int k = 0; k < times.length; k++) { if (belongs[k] == i) { ilist.add(k); t++; } } map.put((int) fPoints[i], ilist); } return map; } // 返回轮廓系数 衡量这次聚类的效果 private double clusting(double points[], int times[], int i) { double[][] distanse = new double[i][times.length]; for (int j = 0; j < i; j++) { for (int k = 0; k < times.length; k++) { distanse[j][k] = Math.abs(times[k] - points[j]); } } // 最小距离和最小距离对应的点 minPoint 是第几个元素属于第几个簇 minPoint = new int[times.length]; double minDis[] = new double[times.length]; for (int k = 0; k < times.length; k++) { minDis[k] = Integer.MAX_VALUE; } for (int k = 0; k < times.length; k++) { for (int j = 0; j < i; j++) { if (distanse[j][k] < minDis[k]) { minPoint[k] = j; minDis[k] = distanse[j][k]; } } } // 移动中心点 for (int j = 0; j < i; j++) { int n = 0; int sum = 0; for (int k = 0; k < times.length; k++) { if (minPoint[k] == j) { sum += times[k]; n++; } } if (sum != 0 && n != 0) points[j] = devide(sum, n); else points[j] = times[0]; } // 得到每个簇的成分数量 int clasCount[] = new int[i]; for (int k = 0; k < times.length; k++) { clasCount[minPoint[k]]++; } // 如果某个簇里没有成分 那么中心点过多 直接返回最低评分 for (int j : clasCount) { if (j == 0) return -1; } // 计算廊阔值 衡量误差 // double biasRank = 0; // for (int j = 0; j &l b783 t; i; j++) { // int disSum[] = new int[i]; // double separation = 0; // double cohension = 0; // // 得到该中心点与其他簇的距离 // for (int k = 0; k < times.length; k++) { // disSum[minPoint[k]] += distanse[j][k]; // } // double disScore[] = new double[i]; // for (int p = 0; p < disSum.length; p++) { // //中心点到每个簇的距离除以簇内点数 // disScore[p] = devide(disSum[p], clasCount[p]); // } // cohension = disScore[j]; // Arrays.sort(disScore); // separation = disScore[0]; // if(cohension==0) // cohension=0.01; // if (cohension == separation && disScore.length >= 2) // separation = disScore[1]; // biasRank += devide(separation - cohension, Math.max(separation, // cohension)); // } double biasRank = 0; double[][] allDistanse = new double[times.length][times.length]; for (int j = 0; j < times.length; j++) { for (int k = 0; k < times.length; k++) { allDistanse[j][k] = Math.abs(times[k] - times[j]); } } double ranks[] = new double[points.length]; // 这个点到其余簇的距离 for (int j = 0; j < times.length; j++) { double cohension = 0; double separation = 0; int disSum[] = new int[points.length]; double disRank[] = new double[points.length]; // 填充dissum 该点到各簇的距离和 dissum[该店][第几簇] for (int k = 0; k < times.length; k++) { disSum[minPoint[k]] += allDistanse[j][k]; } for (int p = 0; p < disSum.length; p++) { disRank[p] = devide(disSum[p], clasCount[p]); } cohension = disRank[0]*1.1; if (cohension == 0) cohension = 0.01; Arrays.sort(disRank); //擅自调整了分离度 使分离度评分变低 强调聚合度 separation = disRank[0]*0.9; if (cohension == separation && disRank.length >= 2) separation = disRank[1]; biasRank += devide( devide(separation - cohension, Math.max(separation,cohension)),times.length); } return devide(biasRank, i); } private double devide(int a, int b) { BigDecimal b1 = new BigDecimal(a); BigDecimal b2 = new BigDecimal(b); return b1.divide(b2, 10, RoundingMode.HALF_UP).doubleValue(); } private double devide(double a, double b) { BigDecimal b1 = new BigDecimal(a); BigDecimal b2 = new BigDecimal(b); return b1.divide(b2, 10, RoundingMode.HALF_UP).doubleValue(); } private double devide(double a, int b) { BigDecimal b1 = new BigDecimal(a); BigDecimal b2 = new BigDecimal(b); return b1.divide(b2, 10, RoundingMode.HALF_UP).doubleValue(); }