您的位置:首页 > 其它

数据挖掘---Kmeans算法

2016-03-19 10:14 190 查看
聚类算法

library(amap)
###Kmeans聚类
getwd()
setwd("E://RProgramming//k-means")
getwd()
rm(list = ls())

## a 2-dimensional example
x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
View(x)
colnames(x) <- c("x", "y")
(cl <- Kmeans(x, 2))

##聚类结果可视化
plot(x, col = cl$cluster)
# 打点
points(cl$centers, col = 1:2, pch = 8, cex=2)

##输出聚类结果
result=cbind(x,cl$cluster)
# 控制台查看
result
# 写出来文件
write.csv(result,"result.csv")

## random starts do help here with too many clusters
(cl <- kmeans(x, 5, nstart = 25))
plot(x, col = cl$cluster)
points(cl$centers, col = 1:5, pch = 8)

kmeans(x, 5,nstart = 25)


代码运行结果

> library(amap)
> ###Kmeans聚类
> getwd()
[1] "E:/RProgramming/k-means"
> setwd("E://RProgramming//k-means")
> getwd()
[1] "E:/RProgramming/k-means"
> rm(list = ls())
> ## a 2-dimensional example
> x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
+ matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
> View(x)
> colnames(x) <- c("x", "y")
> (cl <- Kmeans(x, 2))
K-means clustering with 2 clusters of sizes 49, 51

Cluster means:
x            y
1 -0.05872921 -0.006889006
2  1.01425098  1.023978497

Clustering vector:
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[43] 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[85] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2

Within cluster sum of squares by cluster:
[1] 0.06295857 0.11741825

Available components:

[1] "cluster"  "centers"  "withinss" "size"
> ##聚类结果可视化
> plot(x, col = cl$cluster)
> # 打点
> points(cl$centers, col = 1:2, pch = 8, cex=2)
> ##输出聚类结果
> result=cbind(x,cl$cluster)
> # 控制台查看
> result
x            y
[1,]  0.28177490  0.268509380 1
[2,]  0.06621169  0.119891580 1
[3,] -0.38274424 -0.248606083 1
[4,]  0.09006260  0.051699632 1
[5,] -0.09692542  0.322215975 1
[6,]  0.15647970  0.288436312 1
[7,]  0.19092762  0.133741359 1
[8,] -0.23427971  0.610128559 1
[9,]  0.18454749  0.095391763 1
[10,]  0.36291855  0.005966326 1
[11,]  0.53569403 -0.068460001 1
[12,] -0.20051678 -0.291325358 1
[13,] -0.21145636 -0.023752685 1
[14,]  0.04976921  0.222398735 1
[15,] -0.18968903 -0.229554005 1
[16,]  0.10335602  0.102729404 1
[17,]  0.91992730  0.435664321 2
[18,]  0.10611566  0.683222443 1
[19,] -0.35083793 -0.023331964 1
[20,] -0.21631613 -0.161222241 1
[21,] -0.27096395 -0.095428642 1
[22,] -0.05633221 -0.301882592 1
[23,] -0.01348153  0.182819398 1
[24,]  0.25295100 -0.086372855 1
[25,] -0.47296439  0.081390039 1
[26,] -0.07352592  0.308974701 1
[27,] -0.44389001  0.066118619 1
[28,] -0.69930613 -0.069449851 1
[29,] -0.26116616  0.073768868 1
[30,]  0.16615100 -0.291111975 1
[31,] -0.34114446  0.445654806 1
[32,] -0.12046305  0.259997356 1
[33,] -0.45790251  0.079322505 1
[34,] -0.01581126  0.605178889 1
[35,] -0.21893653  0.440846660 1
[36,] -0.33256445  0.051105505 1
[37,]  0.15031059 -0.391764171 1
[38,] -0.18969351 -0.114427348 1
[39,] -0.06457588  0.282985210 1
[40,]  0.14211920 -0.077059994 1
[41,] -0.07789533 -0.300006066 1
[42,] -0.03288814 -0.375984243 1
[43,]  0.02504853 -0.199187223 1
[44,] -0.27071483 -0.414218204 1
[45,]  0.44211860 -0.135544698 1
[46,]  0.26243353 -0.562781889 1
[47,]  0.44280891 -0.416570857 1
[48,]  0.02185630 -0.574371771 1
[49,] -0.43189796 -0.442488648 1
[50,] -0.18250261 -0.225151949 1
[51,]  0.87410799  1.119161938 2
[52,]  1.82655210  0.474417785 2
[53,]  0.90396910  0.763810043 2
[54,]  0.99401616  0.588297290 2
[55,]  0.88184672  1.246619299 2
[56,]  0.70808651  1.523417676 2
[57,]  0.66147531  1.032255967 2
[58,]  1.33575298  1.233658428 2
[59,]  0.78446146  0.980213662 2
[60,]  0.95517760  1.112569454 2
[61,]  1.12980722  1.135207545 2
[62,]  1.29627076  1.521610225 2
[63,]  0.96338533  1.276594783 2
[64,]  0.79565001  0.614111841 2
[65,]  1.36715990  1.216801384 2
[66,]  1.26021158  0.844766095 2
[67,]  0.87792257  1.031412943 2
[68,]  0.71873228  1.460882668 2
[69,]  0.78956259  0.905758067 2
[70,]  0.52181321  1.075192095 2
[71,]  1.32940602  0.925738073 2
[72,]  1.33318715  0.892233271 2
[73,]  1.30163787  0.856011754 2
[74,]  1.04552814  0.916749719 2
[75,]  1.04953931  1.144217671 2
[76,]  0.94100022  1.320290001 2
[77,]  0.87490027  0.582653870 2
[78,]  1.73685788  1.081607402 2
[79,]  0.51904755  1.116358447 2
[80,]  1.42149965  1.348846736 2
[81,]  0.57919980  0.841866542 2
[82,]  0.53690029  1.246479376 2
[83,]  0.78562009  0.931084973 2
[84,]  1.58231756  0.942339863 2
[85,]  1.15360276  1.635169242 2
[86,]  1.04507616  0.820702032 2
[87,]  0.71241553  0.508826264 2
[88,]  1.36306586  1.432871059 2
[89,]  0.87988681  1.101456216 2
[90,]  1.36126577  0.924339801 2
[91,]  0.84432014  1.637220741 2
[92,]  0.71505447  1.199441186 2
[93,]  0.67140836  1.222118417 2
[94,]  0.97958336  0.923694018 2
[95,]  1.34961513  0.798982707 2
[96,]  1.23954654  0.370656795 2
[97,]  0.86322650  0.561514996 2
[98,]  0.74487757  1.305471967 2
[99,]  1.02631646  0.714943866 2
[100,]  1.17500801  1.326592814 2
> # 写出来文件
> write.csv(result,"result.csv")
> ## random starts do help here with too many clusters
> (cl <- kmeans(x, 5, nstart = 25))
K-means clustering with 5 clusters of sizes 23, 16, 15, 20, 26

Cluster means:
x          y
1  0.07764794  0.2201457
2  1.38560096  1.0597516
3  0.90363587  0.6986890
4  0.80013232  1.2393272
5 -0.17937053 -0.2077274

Clustering vector:
[1] 1 1 5 1 1 1 1 1 1 1 1 5 5 1 5 1 3 1 5 5 5 5 1 1 5 1 5 5 5 5 1 1 5 1 1 5 5 5 1 1 5 5
[43] 5 5 1 5 5 5 5 5 4 2 3 3 4 4 4 2 4 4 2 2 4 3 2 2 4 4 3 4 2 2 2 3 4 4 3 2 4 2 3 4 3 2
[85] 4 3 3 2 4 2 4 4 4 3 2 3 3 4 3 2

Within cluster sum of squares by cluster:
[1] 2.1838748 1.6543424 0.8645221 1.2843706 2.6236598
(between_SS / total_SS =  88.2 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
[6] "betweenss"    "size"         "iter"         "ifault"
> plot(x, col = cl$cluster)
> points(cl$centers, col = 1:5, pch = 8)
> kmeans(x, 5,nstart = 25)
K-means clustering with 5 clusters of sizes 20, 26, 23, 16, 15

Cluster means:
x          y
1  0.80013232  1.2393272
2 -0.17937053 -0.2077274
3  0.07764794  0.2201457
4  1.38560096  1.0597516
5  0.90363587  0.6986890

Clustering vector:
[1] 3 3 2 3 3 3 3 3 3 3 3 2 2 3 2 3 5 3 2 2 2 2 3 3 2 3 2 2 2 2 3 3 2 3 3 2 2 2 3 3 2 2
[43] 2 2 3 2 2 2 2 2 1 4 5 5 1 1 1 4 1 1 4 4 1 5 4 4 1 1 5 1 4 4 4 5 1 1 5 4 1 4 5 1 5 4
[85] 1 5 5 4 1 4 1 1 1 5 4 5 5 1 5 4

Within cluster sum of squares by cluster:
[1] 1.2843706 2.6236598 2.1838748 1.6543424 0.8645221
(between_SS / total_SS =  88.2 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
[6] "betweenss"    "size"         "iter"         "ifault"
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: