您的位置:首页 > 其它

Scala 学习笔记(6)-程序例子Scala Keams聚类算法

2015-04-22 12:30 375 查看
之前Java写的一个Keams算法,想通过写这个例子试试手,总结下来就是对Scala 还是不是很熟悉,还需要慢慢加强。对于Scala中List ,数组,Map等集合还需深入了解

Scala中foreach 用起来还是比较方便的,对于定义数据类型也比较方法相比java简单

for 循环中遇到一个开闭区间的问题,下面代码中 until 是不包括值为D_LEN的,如果是换成 to 是包含 D_LEN值的 ,这点是需要注意的地方

for (i <- 0 until D_LEN) {
t(i) = add(t(i), list.point(i))
}


下面是Scala Keams程序

清单1.

package com.test.zhuoer

/**
* *
* Keams聚类用到的实体类
*
* ?这个类的构造函数,如果参数多了怎么办?
*
*/
class KEntity(pointArg: Array[Double], attrStrArg: String, idArg: Int) {

var point = pointArg;
var attrStr = attrStrArg // 属性字符串
var id = idArg //ID

}


清单2.

package com.test.zhuoer

import scala.util.control.Breaks._

object Keams {

var K = 3 //簇大小

var initCluster = Map[Int, Array[Double]]() // 聚类中心

var datasource = List[KEntity]() //数据源

var D_LEN = 4; //数据维度

var k_Cluster = Map[Int, List[KEntity]]() //用来分类的数据Map

var ctDistance = new Array[Double](K) //每次迭代聚类中心点

var DISTANCE = 0d //精度控制

/**
* *
* 初始化方法
*/
def init = {
var set = Set[Int]() //去重聚类点
var index_k = 0

while (initCluster.size != K) {
var t = scala.util.Random.nextInt(datasource.size)
if (!set.contains(t)) {
set += t;
initCluster += (index_k -> datasource(index_k).point) //初始聚类点
k_Cluster += (index_k -> List()) //初始聚类数据
ctDistance(index_k) = -1 //设置聚类点距离
index_k = index_k + 1
}
}
}

/**
* *
* 初始聚类Map数据
*/
def initKCluster {
k_Cluster = Map[Int, List[KEntity]]()
for (i <- 0 until K) {
k_Cluster += (i -> List()) //初始聚类数据
}

}

/**
* *
* 加法
*/
def add(d1: Double, d2: Double): Double =
d1 + d2

/**
* *
* 减法
*/
def sub(d1: Double, d2: Double): Double =
d1 - d2

/**
* *
* 乘法
*/
def mul(d1: Double, d2: Double): Double =
d1 * d2

/**
* *
* 除法
*/
def div(d1: Double, d2: Double): Double =
d1 / d2

/**
* *
* 两点之间欧氏距离
*/
def distance(d1: Array[Double], d2: Array[Double]): Double = {

var sum = 0d
if (d1.length == d2.length && d1.length == D_LEN) {
for (i <- 0 until D_LEN) {
sum += Math.pow(sub(d1(i), d2(i)), 2)
}
}

Math.sqrt(sum);
}

/**
* *
* 重新计算每个类别下面的点与对应聚类中心的距离
*/
def newCenter() = {

k_Cluster.foreach(cluster => {
var t = new Array[Double](D_LEN)
var list = cluster._2
list.foreach(list => {
for (i <- 0 until D_LEN) {
t(i) = add(t(i), list.point(i))
}
})

var ncc = new Array[Double](D_LEN)
for (i <- 0 until ncc.length) {
ncc(i) = div(t(i), cluster._2.size)
}

ctDistance(cluster._1) = distance(initCluster(cluster._1), ncc)

initCluster += (cluster._1 -> ncc)

})

}

/**
* *
* 迭代方法重新将数据分类
*/
def order() {

initKCluster

for (i <- 0 until datasource.size) {

var tempDistance = 99999999999d
var cluster = 0
for (j <- 0 until K) {
var min_cluster = distance(datasource(i).point, initCluster(j));
if (min_cluster < tempDistance) {
tempDistance = min_cluster;
cluster = j;
}
}
k_Cluster += (cluster -> (k_Cluster(cluster) ::: List(datasource(i))))
}

}

/**
* *
* 执行聚类
*/
def exec = {

breakable {

var c = 0
do {

order // 迭代方法重新将数据分类

// 重新计算的聚类中心相互之间距离小于精度值,停止迭代
var t = 0;

ctDistance.foreach(d => {
if (DISTANCE == d)
t = t + 1
})

if (t != K)
newCenter // 重新计算每个类别下面的点与对应聚类中心的距离
else
break

println("--------------迭代次数:" + c)
c = c + 1

} while (true)
}
}

def main(args: Array[String]): Unit = {

println("-------------Start")

//准备数据
var d: Array[Array[Double]] = Array(

Array(6, 12, 8929, 1474),
Array(7, 12, 9149, 9952),
Array(4, 12, 3992, 5822),
Array(3, 12, 1626, 360),
Array(32, 12, 3563, 39630),
Array(38, 12, 303451, 34083239),
Array(66, 12, 133102, 6468),
Array(14, 12, 38860, 15140),
Array(128, 72, 271390, 39019349),
Array(111, 12, 0, 0),
Array(61, 12, 18626, 664),
Array(40, 12, 3626, 660),
Array(63, 42, 2290136, 3419991),
Array(1, 12, 0, 14000),
Array(5, 12, 5723, 998),
Array(6, 12, 9032, 1512),
Array(102, 72, 20134467, 25894663),
Array(5, 12, 5723, 5998),
Array(101, 72, 621319, 15322448),
Array(6, 12, 9095, 1542),
Array(6, 12, 9095, 1542),
Array(6, 12, 9095, 1542),
Array(6, 12, 9095, 1542),
Array(6, 12, 9095, 1542),
Array(4, 12, 3626, 660),
Array(6, 12, 9095, 1542),
Array(3, 12, 1626, 360),
Array(2, 12, 500, 19100),
Array(100, 12, 15420, 8208707),
Array(100, 12, 8927659, 38163823),
Array(11, 12, 7708, 5546),
Array(6, 12, 8849, 33459849),
Array(4, 12, 3626, 89160),
Array(14, 12, 38860, 14140),
Array(1, 12, 0, 79000),
Array(30, 12, 225482, 90391),
Array(31, 12, 230754, 119948),
Array(3, 12, 1626, 360),
Array(27, 12, 287635, 148541),
Array(11, 12, 7890, 13594),
Array(7, 12, 9095, 26942),
Array(30, 36, 311375, 73711),
Array(2, 12, 500, 84100),
Array(1, 12, 0, 228000),
Array(6, 12, 8992, 1504),
Array(3, 12, 1626, 360),
Array(4, 12, 3626, 8660),
Array(18, 12, 69041, 17594),
Array(18, 12, 69358, 16593),
Array(4, 12, 3706, 694),
Array(2, 12, 500, 3100),
Array(35, 12, 2907, 206039),
Array(8, 12, 7446, 10562),
Array(2, 12, 500, 3100),
Array(21, 12, 189051, 20076),
Array(21, 12, 188065, 19604),
Array(100, 12, 805762, 19934040),
Array(15, 12, 40589, 13905),
Array(8, 12, 7549, 2600),
Array(6, 12, 9095, 1542),
Array(15, 12, 39589, 7305),
Array(18, 12, 66326, 10959),
Array(7, 12, 9095, 9942),
Array(22, 12, 184273, 27756),
Array(8, 12, 7708, 8646),
Array(23, 12, 223512, 33280),
Array(4, 12, 3626, 660),
Array(4, 12, 3626, 660),
Array(6, 12, 8786, 80428),
Array(3, 12, 1626, 360),
Array(21, 12, 5515, 21260),
Array(1, 12, 0, 3000),
Array(1, 12, 0, 41000),
Array(30, 12, 330716, 91039),
Array(4, 12, 3626, 660),
Array(7, 12, 9329, 2022),
Array(1, 12, 0, 3000),
Array(2, 12, 500, 178700),
Array(2, 12, 500, 132900),
Array(2, 12, 500, 18500),
Array(1, 12, 0, 8200),
Array(2, 12, 500, 8300),
Array(2, 12, 500, 45900),
Array(2, 12, 500, 18900),
Array(2, 12, 500, 9500),
Array(2, 12, 500, 96500),
Array(2, 12, 500, 30700),
Array(2, 12, 500, 20100),
Array(8, 12, 7577, 67104),
Array(58, 36, 1397618, 17879602),
Array(11, 12, 8909, 11400),
Array(6, 12, 9181, 1594),
Array(2, 12, 563, 130),
Array(32, 12, 19756, 186422),
Array(6, 12, 8786, 6428),
Array(5, 12, 5786, 1028),
Array(6, 12, 8786, 1428),
Array(6, 12, 8786, 40006428),
Array(12, 12, 68456, 95240),
Array(2, 12, 500, 20100),
Array(11, 12, 7708, 7546),
Array(4, 12, 3786, 728),
Array(8, 12, 7577, 2604),
Array(5, 12, 5849, 1058),
Array(5, 12, 5786, 1028),
Array(6, 12, 8889, 1466),
Array(9, 12, 7708, 4246),
Array(9, 12, 7708, 4246),
Array(7, 12, 9095, 1942),
Array(4, 12, 3626, 660),
Array(7, 12, 8500, 1700),
Array(4, 12, 3500, 600),
Array(2, 12, 563, 130),
Array(9, 12, 7708, 4246),
Array(4, 12,
4000
13000, 400),
Array(1, 12, 0, 3000),
Array(3, 12, 1500, 300),
Array(7, 12, 9095, 1942),
Array(10, 12, 6879, 4774),
Array(4, 12, 3626, 27660),
Array(11, 12, 7141, 7308),
Array(9, 12, 7403, 5092),
Array(19, 12, 107219, 22580),
Array(15, 12, 39589, 14305),
Array(32, 12, 8945, 229632),
Array(7, 12, 9212, 1982),
Array(1, 12, 0, 2000),
Array(2, 12, 500, 5100),
Array(1, 12, 0, 5000),
Array(2, 12, 500, 5100),
Array(23, 12, 134301, 37817),
Array(3, 12, 1626, 360),
Array(2, 12, 626, 160),
Array(32, 12, 174216, 160294),
Array(1, 12, 0, 342000),
Array(2, 12, 500, 120100),
Array(21, 12, 8515, 19560),
Array(15, 12, 40854, 12462),
Array(4, 12, 3706, 694),
Array(60, 12, 0, 0),
Array(60, 12, 0, 0),
Array(14, 12, 38860, 9140),
Array(21, 12, 7689, 32237),
Array(4, 12, 3626, 660),
Array(4, 12, 3626, 660),
Array(6, 12, 8889, 362466),
Array(3, 12, 1626, 360),
Array(1, 12, 0, 3000),
Array(2, 12, 563, 5130),
Array(2, 12, 500, 62100),
Array(4, 12, 3626, 660),
Array(45, 12, 1508096, 8341432),
Array(1, 12, 0, 114000),
Array(4, 12, 3626, 12660),
Array(1, 12, 0, 9000),
Array(4, 12, 3626, 234660),
Array(1, 12, 0, 132000),
Array(60, 12, 3293822, 40645),
Array(12, 12, 68658, 7290),
Array(6, 12, 8786, 8428),
Array(4, 12, 3786, 2728),
Array(1, 12, 0, 5000),
Array(1, 12, 0, 400000000),
Array(1, 12, 0, 2000),
Array(2, 12, 626, 5160),
Array(32, 12, 273220, 152515),
Array(2, 12, 500, 5100),
Array(1, 12, 0, 8000),
Array(1, 12, 0, 5000))

d.foreach { x =>
{
var ke = new KEntity(x, x(0) + " " + x(1) + " " + x(2) + " " + x(3), 0)
datasource = ke :: datasource
}
}

init //初始化数据

exec //执行聚类 就这么一句是蛮简洁的差点忽略掉了--~

k_Cluster.foreach(e => {
println("---------------K" + e._1)
e._2.foreach { x =>
{
print("[")
print(x.attrStr)
println("]")
}
}
})

}

}


清单3.

执行结果

-------------Start
--------------迭代次数:0
--------------迭代次数:1
--------------迭代次数:2
--------------迭代次数:3
--------------迭代次数:4
--------------迭代次数:5
---------------K0
[1.0 12.0 0.0 5000.0]
[1.0 12.0 0.0 8000.0]
[2.0 12.0 500.0 5100.0]
[32.0 12.0 273220.0 152515.0]
[2.0 12.0 626.0 5160.0]
[1.0 12.0 0.0 2000.0]
[1.0 12.0 0.0 5000.0]
[4.0 12.0 3786.0 2728.0]
[6.0 12.0 8786.0 8428.0]
[12.0 12.0 68658.0 7290.0]
[60.0 12.0 3293822.0 40645.0]
[1.0 12.0 0.0 132000.0]
[4.0 12.0 3626.0 234660.0]
[1.0 12.0 0.0 9000.0]
[4.0 12.0 3626.0 12660.0]
[1.0 12.0 0.0 114000.0]
[45.0 12.0 1508096.0 8341432.0]
[4.0 12.0 3626.0 660.0]
[2.0 12.0 500.0 62100.0]
[2.0 12.0 563.0 5130.0]
[1.0 12.0 0.0 3000.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 8889.0 362466.0]
[4.0 12.0 3626.0 660.0]
[4.0 12.0 3626.0 660.0]
[21.0 12.0 7689.0 32237.0]
[14.0 12.0 38860.0 9140.0]
[60.0 12.0 0.0 0.0]
[60.0 12.0 0.0 0.0]
[4.0 12.0 3706.0 694.0]
[15.0 12.0 40854.0 12462.0]
[21.0 12.0 8515.0 19560.0]
[2.0 12.0 500.0 120100.0]
[1.0 12.0 0.0 342000.0]
[32.0 12.0 174216.0 160294.0]
[2.0 12.0 626.0 160.0]
[3.0 12.0 1626.0 360.0]
[23.0 12.0 134301.0 37817.0]
[2.0 12.0 500.0 5100.0]
[1.0 12.0 0.0 5000.0]
[2.0 12.0 500.0 5100.0]
[1.0 12.0 0.0 2000.0]
[7.0 12.0 9212.0 1982.0]
[32.0 12.0 8945.0 229632.0]
[15.0 12.0 39589.0 14305.0]
[19.0 12.0 107219.0 22580.0]
[9.0 12.0 7403.0 5092.0]
[11.0 12.0 7141.0 7308.0]
[4.0 12.0 3626.0 27660.0]
[10.0 12.0 6879.0 4774.0]
[7.0 12.0 9095.0 1942.0]
[3.0 12.0 1500.0 300.0]
[1.0 12.0 0.0 3000.0]
[4.0 12.0 13000.0 400.0]
[9.0 12.0 7708.0 4246.0]
[2.0 12.0 563.0 130.0]
[4.0 12.0 3500.0 600.0]
[7.0 12.0 8500.0 1700.0]
[4.0 12.0 3626.0 660.0]
[7.0 12.0 9095.0 1942.0]
[9.0 12.0 7708.0 4246.0]
[9.0 12.0 7708.0 4246.0]
[6.0 12.0 8889.0 1466.0]
[5.0 12.0 5786.0 1028.0]
[5.0 12.0 5849.0 1058.0]
[8.0 12.0 7577.0 2604.0]
[4.0 12.0 3786.0 728.0]
[11.0 12.0 7708.0 7546.0]
[2.0 12.0 500.0 20100.0]
[12.0 12.0 68456.0 95240.0]
[6.0 12.0 8786.0 1428.0]
[5.0 12.0 5786.0 1028.0]
[6.0 12.0 8786.0 6428.0]
[32.0 12.0 19756.0 186422.0]
[2.0 12.0 563.0 130.0]
[6.0 12.0 9181.0 1594.0]
[11.0 12.0 8909.0 11400.0]
[8.0 12.0 7577.0 67104.0]
[2.0 12.0 500.0 20100.0]
[2.0 12.0 500.0 30700.0]
[2.0 12.0 500.0 96500.0]
[2.0 12.0 500.0 9500.0]
[2.0 12.0 500.0 18900.0]
[2.0 12.0 500.0 45900.0]
[2.0 12.0 500.0 8300.0]
[1.0 12.0 0.0 8200.0]
[2.0 12.0 500.0 18500.0]
[2.0 12.0 500.0 132900.0]
[2.0 12.0 500.0 178700.0]
[1.0 12.0 0.0 3000.0]
[7.0 12.0 9329.0 2022.0]
[4.0 12.0 3626.0 660.0]
[30.0 12.0 330716.0 91039.0]
[1.0 12.0 0.0 41000.0]
[1.0 12.0 0.0 3000.0]
[21.0 12.0 5515.0 21260.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 8786.0 80428.0]
[4.0 12.0 3626.0 660.0]
[4.0 12.0 3626.0 660.0]
[23.0 12.0 223512.0 33280.0]
[8.0 12.0 7708.0 8646.0]
[22.0 12.0 184273.0 27756.0]
[7.0 12.0 9095.0 9942.0]
[18.0 12.0 66326.0 10959.0]
[15.0 12.0 39589.0 7305.0]
[6.0 12.0 9095.0 1542.0]
[8.0 12.0 7549.0 2600.0]
[15.0 12.0 40589.0 13905.0]
[21.0 12.0 188065.0 19604.0]
[21.0 12.0 189051.0 20076.0]
[2.0 12.0 500.0 3100.0]
[8.0 12.0 7446.0 10562.0]
[35.0 12.0 2907.0 206039.0]
[2.0 12.0 500.0 3100.0]
[4.0 12.0 3706.0 694.0]
[18.0 12.0 69358.0 16593.0]
[18.0 12.0 69041.0 17594.0]
[4.0 12.0 3626.0 8660.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 8992.0 1504.0]
[1.0 12.0 0.0 228000.0]
[2.0 12.0 500.0 84100.0]
[30.0 36.0 311375.0 73711.0]
[7.0 12.0 9095.0 26942.0]
[11.0 12.0 7890.0 13594.0]
[27.0 12.0 287635.0 148541.0]
[3.0 12.0 1626.0 360.0]
[31.0 12.0 230754.0 119948.0]
[30.0 12.0 225482.0 90391.0]
[1.0 12.0 0.0 79000.0]
[14.0 12.0 38860.0 14140.0]
[4.0 12.0 3626.0 89160.0]
[11.0 12.0 7708.0 5546.0]
[100.0 12.0 15420.0 8208707.0]
[2.0 12.0 500.0 19100.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 9095.0 1542.0]
[4.0 12.0 3626.0 660.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[5.0 12.0 5723.0 5998.0]
[6.0 12.0 9032.0 1512.0]
[5.0 12.0 5723.0 998.0]
[1.0 12.0 0.0 14000.0]
[63.0 42.0 2290136.0 3419991.0]
[40.0 12.0 3626.0 660.0]
[61.0 12.0 18626.0 664.0]
[111.0 12.0 0.0 0.0]
[14.0 12.0 38860.0 15140.0]
[66.0 12.0 133102.0 6468.0]
[32.0 12.0 3563.0 39630.0]
[3.0 12.0 1626.0 360.0]
[4.0 12.0 3992.0 5822.0]
[7.0 12.0 9149.0 9952.0]
[6.0 12.0 8929.0 1474.0]
---------------K1
[1.0 12.0 0.0 4.0E8]
---------------K2
[6.0 12.0 8786.0 4.0006428E7]
[58.0 36.0 1397618.0 1.7879602E7]
[100.0 12.0 805762.0 1.993404E7]
[6.0 12.0 8849.0 3.3459849E7]
[100.0 12.0 8927659.0 3.8163823E7]
[101.0 72.0 621319.0 1.5322448E7]
[102.0 72.0 2.0134467E7 2.5894663E7]
[128.0 72.0 271390.0 3.9019349E7]
[38.0 12.0 303451.0 3.4083239E7]
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: