Scala 学习笔记(6)-程序例子Scala Keams聚类算法
2015-04-22 12:30
375 查看
之前Java写的一个Keams算法,想通过写这个例子试试手,总结下来就是对Scala 还是不是很熟悉,还需要慢慢加强。对于Scala中List ,数组,Map等集合还需深入了解
Scala中foreach 用起来还是比较方便的,对于定义数据类型也比较方法相比java简单
for 循环中遇到一个开闭区间的问题,下面代码中 until 是不包括值为D_LEN的,如果是换成 to 是包含 D_LEN值的 ,这点是需要注意的地方
下面是Scala Keams程序
清单1.
清单2.
清单3.
执行结果
Scala中foreach 用起来还是比较方便的,对于定义数据类型也比较方法相比java简单
for 循环中遇到一个开闭区间的问题,下面代码中 until 是不包括值为D_LEN的,如果是换成 to 是包含 D_LEN值的 ,这点是需要注意的地方
for (i <- 0 until D_LEN) { t(i) = add(t(i), list.point(i)) }
下面是Scala Keams程序
清单1.
package com.test.zhuoer /** * * * Keams聚类用到的实体类 * * ?这个类的构造函数,如果参数多了怎么办? * */ class KEntity(pointArg: Array[Double], attrStrArg: String, idArg: Int) { var point = pointArg; var attrStr = attrStrArg // 属性字符串 var id = idArg //ID }
清单2.
package com.test.zhuoer import scala.util.control.Breaks._ object Keams { var K = 3 //簇大小 var initCluster = Map[Int, Array[Double]]() // 聚类中心 var datasource = List[KEntity]() //数据源 var D_LEN = 4; //数据维度 var k_Cluster = Map[Int, List[KEntity]]() //用来分类的数据Map var ctDistance = new Array[Double](K) //每次迭代聚类中心点 var DISTANCE = 0d //精度控制 /** * * * 初始化方法 */ def init = { var set = Set[Int]() //去重聚类点 var index_k = 0 while (initCluster.size != K) { var t = scala.util.Random.nextInt(datasource.size) if (!set.contains(t)) { set += t; initCluster += (index_k -> datasource(index_k).point) //初始聚类点 k_Cluster += (index_k -> List()) //初始聚类数据 ctDistance(index_k) = -1 //设置聚类点距离 index_k = index_k + 1 } } } /** * * * 初始聚类Map数据 */ def initKCluster { k_Cluster = Map[Int, List[KEntity]]() for (i <- 0 until K) { k_Cluster += (i -> List()) //初始聚类数据 } } /** * * * 加法 */ def add(d1: Double, d2: Double): Double = d1 + d2 /** * * * 减法 */ def sub(d1: Double, d2: Double): Double = d1 - d2 /** * * * 乘法 */ def mul(d1: Double, d2: Double): Double = d1 * d2 /** * * * 除法 */ def div(d1: Double, d2: Double): Double = d1 / d2 /** * * * 两点之间欧氏距离 */ def distance(d1: Array[Double], d2: Array[Double]): Double = { var sum = 0d if (d1.length == d2.length && d1.length == D_LEN) { for (i <- 0 until D_LEN) { sum += Math.pow(sub(d1(i), d2(i)), 2) } } Math.sqrt(sum); } /** * * * 重新计算每个类别下面的点与对应聚类中心的距离 */ def newCenter() = { k_Cluster.foreach(cluster => { var t = new Array[Double](D_LEN) var list = cluster._2 list.foreach(list => { for (i <- 0 until D_LEN) { t(i) = add(t(i), list.point(i)) } }) var ncc = new Array[Double](D_LEN) for (i <- 0 until ncc.length) { ncc(i) = div(t(i), cluster._2.size) } ctDistance(cluster._1) = distance(initCluster(cluster._1), ncc) initCluster += (cluster._1 -> ncc) }) } /** * * * 迭代方法重新将数据分类 */ def order() { initKCluster for (i <- 0 until datasource.size) { var tempDistance = 99999999999d var cluster = 0 for (j <- 0 until K) { var min_cluster = distance(datasource(i).point, initCluster(j)); if (min_cluster < tempDistance) { tempDistance = min_cluster; cluster = j; } } k_Cluster += (cluster -> (k_Cluster(cluster) ::: List(datasource(i)))) } } /** * * * 执行聚类 */ def exec = { breakable { var c = 0 do { order // 迭代方法重新将数据分类 // 重新计算的聚类中心相互之间距离小于精度值,停止迭代 var t = 0; ctDistance.foreach(d => { if (DISTANCE == d) t = t + 1 }) if (t != K) newCenter // 重新计算每个类别下面的点与对应聚类中心的距离 else break println("--------------迭代次数:" + c) c = c + 1 } while (true) } } def main(args: Array[String]): Unit = { println("-------------Start") //准备数据 var d: Array[Array[Double]] = Array( Array(6, 12, 8929, 1474), Array(7, 12, 9149, 9952), Array(4, 12, 3992, 5822), Array(3, 12, 1626, 360), Array(32, 12, 3563, 39630), Array(38, 12, 303451, 34083239), Array(66, 12, 133102, 6468), Array(14, 12, 38860, 15140), Array(128, 72, 271390, 39019349), Array(111, 12, 0, 0), Array(61, 12, 18626, 664), Array(40, 12, 3626, 660), Array(63, 42, 2290136, 3419991), Array(1, 12, 0, 14000), Array(5, 12, 5723, 998), Array(6, 12, 9032, 1512), Array(102, 72, 20134467, 25894663), Array(5, 12, 5723, 5998), Array(101, 72, 621319, 15322448), Array(6, 12, 9095, 1542), Array(6, 12, 9095, 1542), Array(6, 12, 9095, 1542), Array(6, 12, 9095, 1542), Array(6, 12, 9095, 1542), Array(4, 12, 3626, 660), Array(6, 12, 9095, 1542), Array(3, 12, 1626, 360), Array(2, 12, 500, 19100), Array(100, 12, 15420, 8208707), Array(100, 12, 8927659, 38163823), Array(11, 12, 7708, 5546), Array(6, 12, 8849, 33459849), Array(4, 12, 3626, 89160), Array(14, 12, 38860, 14140), Array(1, 12, 0, 79000), Array(30, 12, 225482, 90391), Array(31, 12, 230754, 119948), Array(3, 12, 1626, 360), Array(27, 12, 287635, 148541), Array(11, 12, 7890, 13594), Array(7, 12, 9095, 26942), Array(30, 36, 311375, 73711), Array(2, 12, 500, 84100), Array(1, 12, 0, 228000), Array(6, 12, 8992, 1504), Array(3, 12, 1626, 360), Array(4, 12, 3626, 8660), Array(18, 12, 69041, 17594), Array(18, 12, 69358, 16593), Array(4, 12, 3706, 694), Array(2, 12, 500, 3100), Array(35, 12, 2907, 206039), Array(8, 12, 7446, 10562), Array(2, 12, 500, 3100), Array(21, 12, 189051, 20076), Array(21, 12, 188065, 19604), Array(100, 12, 805762, 19934040), Array(15, 12, 40589, 13905), Array(8, 12, 7549, 2600), Array(6, 12, 9095, 1542), Array(15, 12, 39589, 7305), Array(18, 12, 66326, 10959), Array(7, 12, 9095, 9942), Array(22, 12, 184273, 27756), Array(8, 12, 7708, 8646), Array(23, 12, 223512, 33280), Array(4, 12, 3626, 660), Array(4, 12, 3626, 660), Array(6, 12, 8786, 80428), Array(3, 12, 1626, 360), Array(21, 12, 5515, 21260), Array(1, 12, 0, 3000), Array(1, 12, 0, 41000), Array(30, 12, 330716, 91039), Array(4, 12, 3626, 660), Array(7, 12, 9329, 2022), Array(1, 12, 0, 3000), Array(2, 12, 500, 178700), Array(2, 12, 500, 132900), Array(2, 12, 500, 18500), Array(1, 12, 0, 8200), Array(2, 12, 500, 8300), Array(2, 12, 500, 45900), Array(2, 12, 500, 18900), Array(2, 12, 500, 9500), Array(2, 12, 500, 96500), Array(2, 12, 500, 30700), Array(2, 12, 500, 20100), Array(8, 12, 7577, 67104), Array(58, 36, 1397618, 17879602), Array(11, 12, 8909, 11400), Array(6, 12, 9181, 1594), Array(2, 12, 563, 130), Array(32, 12, 19756, 186422), Array(6, 12, 8786, 6428), Array(5, 12, 5786, 1028), Array(6, 12, 8786, 1428), Array(6, 12, 8786, 40006428), Array(12, 12, 68456, 95240), Array(2, 12, 500, 20100), Array(11, 12, 7708, 7546), Array(4, 12, 3786, 728), Array(8, 12, 7577, 2604), Array(5, 12, 5849, 1058), Array(5, 12, 5786, 1028), Array(6, 12, 8889, 1466), Array(9, 12, 7708, 4246), Array(9, 12, 7708, 4246), Array(7, 12, 9095, 1942), Array(4, 12, 3626, 660), Array(7, 12, 8500, 1700), Array(4, 12, 3500, 600), Array(2, 12, 563, 130), Array(9, 12, 7708, 4246), Array(4, 12, 4000 13000, 400), Array(1, 12, 0, 3000), Array(3, 12, 1500, 300), Array(7, 12, 9095, 1942), Array(10, 12, 6879, 4774), Array(4, 12, 3626, 27660), Array(11, 12, 7141, 7308), Array(9, 12, 7403, 5092), Array(19, 12, 107219, 22580), Array(15, 12, 39589, 14305), Array(32, 12, 8945, 229632), Array(7, 12, 9212, 1982), Array(1, 12, 0, 2000), Array(2, 12, 500, 5100), Array(1, 12, 0, 5000), Array(2, 12, 500, 5100), Array(23, 12, 134301, 37817), Array(3, 12, 1626, 360), Array(2, 12, 626, 160), Array(32, 12, 174216, 160294), Array(1, 12, 0, 342000), Array(2, 12, 500, 120100), Array(21, 12, 8515, 19560), Array(15, 12, 40854, 12462), Array(4, 12, 3706, 694), Array(60, 12, 0, 0), Array(60, 12, 0, 0), Array(14, 12, 38860, 9140), Array(21, 12, 7689, 32237), Array(4, 12, 3626, 660), Array(4, 12, 3626, 660), Array(6, 12, 8889, 362466), Array(3, 12, 1626, 360), Array(1, 12, 0, 3000), Array(2, 12, 563, 5130), Array(2, 12, 500, 62100), Array(4, 12, 3626, 660), Array(45, 12, 1508096, 8341432), Array(1, 12, 0, 114000), Array(4, 12, 3626, 12660), Array(1, 12, 0, 9000), Array(4, 12, 3626, 234660), Array(1, 12, 0, 132000), Array(60, 12, 3293822, 40645), Array(12, 12, 68658, 7290), Array(6, 12, 8786, 8428), Array(4, 12, 3786, 2728), Array(1, 12, 0, 5000), Array(1, 12, 0, 400000000), Array(1, 12, 0, 2000), Array(2, 12, 626, 5160), Array(32, 12, 273220, 152515), Array(2, 12, 500, 5100), Array(1, 12, 0, 8000), Array(1, 12, 0, 5000)) d.foreach { x => { var ke = new KEntity(x, x(0) + " " + x(1) + " " + x(2) + " " + x(3), 0) datasource = ke :: datasource } } init //初始化数据 exec //执行聚类 就这么一句是蛮简洁的差点忽略掉了--~ k_Cluster.foreach(e => { println("---------------K" + e._1) e._2.foreach { x => { print("[") print(x.attrStr) println("]") } } }) } }
清单3.
执行结果
-------------Start --------------迭代次数:0 --------------迭代次数:1 --------------迭代次数:2 --------------迭代次数:3 --------------迭代次数:4 --------------迭代次数:5 ---------------K0 [1.0 12.0 0.0 5000.0] [1.0 12.0 0.0 8000.0] [2.0 12.0 500.0 5100.0] [32.0 12.0 273220.0 152515.0] [2.0 12.0 626.0 5160.0] [1.0 12.0 0.0 2000.0] [1.0 12.0 0.0 5000.0] [4.0 12.0 3786.0 2728.0] [6.0 12.0 8786.0 8428.0] [12.0 12.0 68658.0 7290.0] [60.0 12.0 3293822.0 40645.0] [1.0 12.0 0.0 132000.0] [4.0 12.0 3626.0 234660.0] [1.0 12.0 0.0 9000.0] [4.0 12.0 3626.0 12660.0] [1.0 12.0 0.0 114000.0] [45.0 12.0 1508096.0 8341432.0] [4.0 12.0 3626.0 660.0] [2.0 12.0 500.0 62100.0] [2.0 12.0 563.0 5130.0] [1.0 12.0 0.0 3000.0] [3.0 12.0 1626.0 360.0] [6.0 12.0 8889.0 362466.0] [4.0 12.0 3626.0 660.0] [4.0 12.0 3626.0 660.0] [21.0 12.0 7689.0 32237.0] [14.0 12.0 38860.0 9140.0] [60.0 12.0 0.0 0.0] [60.0 12.0 0.0 0.0] [4.0 12.0 3706.0 694.0] [15.0 12.0 40854.0 12462.0] [21.0 12.0 8515.0 19560.0] [2.0 12.0 500.0 120100.0] [1.0 12.0 0.0 342000.0] [32.0 12.0 174216.0 160294.0] [2.0 12.0 626.0 160.0] [3.0 12.0 1626.0 360.0] [23.0 12.0 134301.0 37817.0] [2.0 12.0 500.0 5100.0] [1.0 12.0 0.0 5000.0] [2.0 12.0 500.0 5100.0] [1.0 12.0 0.0 2000.0] [7.0 12.0 9212.0 1982.0] [32.0 12.0 8945.0 229632.0] [15.0 12.0 39589.0 14305.0] [19.0 12.0 107219.0 22580.0] [9.0 12.0 7403.0 5092.0] [11.0 12.0 7141.0 7308.0] [4.0 12.0 3626.0 27660.0] [10.0 12.0 6879.0 4774.0] [7.0 12.0 9095.0 1942.0] [3.0 12.0 1500.0 300.0] [1.0 12.0 0.0 3000.0] [4.0 12.0 13000.0 400.0] [9.0 12.0 7708.0 4246.0] [2.0 12.0 563.0 130.0] [4.0 12.0 3500.0 600.0] [7.0 12.0 8500.0 1700.0] [4.0 12.0 3626.0 660.0] [7.0 12.0 9095.0 1942.0] [9.0 12.0 7708.0 4246.0] [9.0 12.0 7708.0 4246.0] [6.0 12.0 8889.0 1466.0] [5.0 12.0 5786.0 1028.0] [5.0 12.0 5849.0 1058.0] [8.0 12.0 7577.0 2604.0] [4.0 12.0 3786.0 728.0] [11.0 12.0 7708.0 7546.0] [2.0 12.0 500.0 20100.0] [12.0 12.0 68456.0 95240.0] [6.0 12.0 8786.0 1428.0] [5.0 12.0 5786.0 1028.0] [6.0 12.0 8786.0 6428.0] [32.0 12.0 19756.0 186422.0] [2.0 12.0 563.0 130.0] [6.0 12.0 9181.0 1594.0] [11.0 12.0 8909.0 11400.0] [8.0 12.0 7577.0 67104.0] [2.0 12.0 500.0 20100.0] [2.0 12.0 500.0 30700.0] [2.0 12.0 500.0 96500.0] [2.0 12.0 500.0 9500.0] [2.0 12.0 500.0 18900.0] [2.0 12.0 500.0 45900.0] [2.0 12.0 500.0 8300.0] [1.0 12.0 0.0 8200.0] [2.0 12.0 500.0 18500.0] [2.0 12.0 500.0 132900.0] [2.0 12.0 500.0 178700.0] [1.0 12.0 0.0 3000.0] [7.0 12.0 9329.0 2022.0] [4.0 12.0 3626.0 660.0] [30.0 12.0 330716.0 91039.0] [1.0 12.0 0.0 41000.0] [1.0 12.0 0.0 3000.0] [21.0 12.0 5515.0 21260.0] [3.0 12.0 1626.0 360.0] [6.0 12.0 8786.0 80428.0] [4.0 12.0 3626.0 660.0] [4.0 12.0 3626.0 660.0] [23.0 12.0 223512.0 33280.0] [8.0 12.0 7708.0 8646.0] [22.0 12.0 184273.0 27756.0] [7.0 12.0 9095.0 9942.0] [18.0 12.0 66326.0 10959.0] [15.0 12.0 39589.0 7305.0] [6.0 12.0 9095.0 1542.0] [8.0 12.0 7549.0 2600.0] [15.0 12.0 40589.0 13905.0] [21.0 12.0 188065.0 19604.0] [21.0 12.0 189051.0 20076.0] [2.0 12.0 500.0 3100.0] [8.0 12.0 7446.0 10562.0] [35.0 12.0 2907.0 206039.0] [2.0 12.0 500.0 3100.0] [4.0 12.0 3706.0 694.0] [18.0 12.0 69358.0 16593.0] [18.0 12.0 69041.0 17594.0] [4.0 12.0 3626.0 8660.0] [3.0 12.0 1626.0 360.0] [6.0 12.0 8992.0 1504.0] [1.0 12.0 0.0 228000.0] [2.0 12.0 500.0 84100.0] [30.0 36.0 311375.0 73711.0] [7.0 12.0 9095.0 26942.0] [11.0 12.0 7890.0 13594.0] [27.0 12.0 287635.0 148541.0] [3.0 12.0 1626.0 360.0] [31.0 12.0 230754.0 119948.0] [30.0 12.0 225482.0 90391.0] [1.0 12.0 0.0 79000.0] [14.0 12.0 38860.0 14140.0] [4.0 12.0 3626.0 89160.0] [11.0 12.0 7708.0 5546.0] [100.0 12.0 15420.0 8208707.0] [2.0 12.0 500.0 19100.0] [3.0 12.0 1626.0 360.0] [6.0 12.0 9095.0 1542.0] [4.0 12.0 3626.0 660.0] [6.0 12.0 9095.0 1542.0] [6.0 12.0 9095.0 1542.0] [6.0 12.0 9095.0 1542.0] [6.0 12.0 9095.0 1542.0] [6.0 12.0 9095.0 1542.0] [5.0 12.0 5723.0 5998.0] [6.0 12.0 9032.0 1512.0] [5.0 12.0 5723.0 998.0] [1.0 12.0 0.0 14000.0] [63.0 42.0 2290136.0 3419991.0] [40.0 12.0 3626.0 660.0] [61.0 12.0 18626.0 664.0] [111.0 12.0 0.0 0.0] [14.0 12.0 38860.0 15140.0] [66.0 12.0 133102.0 6468.0] [32.0 12.0 3563.0 39630.0] [3.0 12.0 1626.0 360.0] [4.0 12.0 3992.0 5822.0] [7.0 12.0 9149.0 9952.0] [6.0 12.0 8929.0 1474.0] ---------------K1 [1.0 12.0 0.0 4.0E8] ---------------K2 [6.0 12.0 8786.0 4.0006428E7] [58.0 36.0 1397618.0 1.7879602E7] [100.0 12.0 805762.0 1.993404E7] [6.0 12.0 8849.0 3.3459849E7] [100.0 12.0 8927659.0 3.8163823E7] [101.0 72.0 621319.0 1.5322448E7] [102.0 72.0 2.0134467E7 2.5894663E7] [128.0 72.0 271390.0 3.9019349E7] [38.0 12.0 303451.0 3.4083239E7]
相关文章推荐
- 黑马程序员之c#程序学习笔记:c#程序经典例子学习总结
- Scala入门学习笔记二-基本数据类型、程序控制结构
- Scala学习笔记(一) --- 搭建Scala环境,并运行第一个程序
- Scala学习笔记(三)Actor简单例子
- Spark学习笔记7-在eclipse里用scala编写spark程序(单机和集群运行)
- Scala学习笔记(六):Scala程序
- maven学习笔记:命令行模式创建scala_spark项目并运行程序
- Spark学习笔记 --- scala实现Spark wordcount例子
- Scala学习笔记
- Scala学习笔记10 - scala I/O
- JavaScript学习笔记8--一个文字自动匹配的例子
- Scala学习笔记02_函数入门
- scala学习笔记2(类,继承,抽象类)
- EJB3.0学习笔记---MDB--第一个MDBBean程序:
- Qt学习笔记-嵌入式qt程序支持显示中文
- 程序怎么跑?(2)——linux课程学习笔记
- 学习笔记2-C语言的运算符和程序结构
- linux0.11学习笔记-技术铺垫-简单AB任务切换程序(4)-向现存写数据并响应时钟中断
- SQL Server 2005 学习笔记系列文章导航 存储过程分页的经典例子
- 微信小程序学习笔记2——一些小程序列表