3.聚类–K-means的Java实现
2015-11-30 10:52
417 查看
K-means的步骤
输入:含n个样本的数据集,簇的数据K
输出:K个簇
算法步骤:
1.初始化K个簇类中心C1,C2,-……Ck(通常随机选择)
2.repeat步骤3,4
3,将数据集中的每个样本分配到与之最近的中心Ci所在的簇Cj;
4.更新聚类中心Ci,即计算各个簇的样本均值;
5.直到样本分配不在改变
上代码:输出:K个簇
算法步骤:
1.初始化K个簇类中心C1,C2,-……Ck(通常随机选择)
2.repeat步骤3,4
3,将数据集中的每个样本分配到与之最近的中心Ci所在的簇Cj;
4.更新聚类中心Ci,即计算各个簇的样本均值;
5.直到样本分配不在改变
importjava.lang.annotation.ElementType; importjava.lang.annotation.Retention; importjava.lang.annotation.RetentionPolicy; importjava.lang.annotation.Target; /** *在对象的属性上标注此注释, *表示纳入kmeans算法,仅支持数值类属性 *@author阿飞哥 */ @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.FIELD) public@interfaceKmeanField{ }
.csharpcode,.csharpcodepre
{
font-size:small;
color:black;
font-family:consolas,"CourierNew",courier,monospace;
background-color:#ffffff;
/*white-space:pre;*/
}
.csharpcodepre{margin:0em;}
.csharpcode.rem{color:#008000;}
.csharpcode.kwrd{color:#0000ff;}
.csharpcode.str{color:#006080;}
.csharpcode.op{color:#0000c0;}
.csharpcode.preproc{color:#cc6633;}
.csharpcode.asp{background-color:#ffff00;}
.csharpcode.html{color:#800000;}
.csharpcode.attr{color:#ff0000;}
.csharpcode.alt
{
background-color:#f4f4f4;
width:100%;
margin:0em;
}
.csharpcode.lnum{color:#606060;}
importjava.lang.annotation.Annotation; importjava.lang.reflect.Field; importjava.lang.reflect.Method; importjava.util.ArrayList; importjava.util.List; /** * *@author阿飞哥 * */ publicclassKmeans<T>{ /** *所有数据列表 */ privateList<T>players=newArrayList<T>(); /** *数据类别 */ privateClass<T>classT; /** *初始化列表 */ privateList<T>initPlayers; /** *需要纳入kmeans算法的属性名称 */ privateList<String>fieldNames=newArrayList<String>(); /** *分类数 */ privateintk=1; publicKmeans(){ } /** *初始化列表 * *@paramlist *@paramk */ publicKmeans(List<T>list,intk){ this.players=list; this.k=k; Tt=list.get(0); this.classT=(Class<T>)t.getClass(); Field[]fields=this.classT.getDeclaredFields(); System.out.println("fields---------------------------------------------="+fields.length); for(inti=0;i<fields.length;i++){ AnnotationkmeansAnnotation=fields[i] .getAnnotation(KmeanField.class); if(kmeansAnnotation!=null){ fieldNames.add(fields[i].getName()); System.out.println("fieldNames.add"+fields[i].getName()); } } initPlayers=newArrayList<T>(); for(inti=0;i<k;i++){ initPlayers.add(players.get(i)); } } publicList<T>[]comput(){ List<T>[]results=newArrayList[k]; booleancenterchange=true; while(centerchange){ centerchange=false; for(inti=0;i<k;i++){ results[i]=newArrayList<T>(); } for(inti=0;i<players.size();i++){ Tp=players.get(i); double[]dists=newdouble[k]; for(intj=0;j<initPlayers.size();j++){ TinitP=initPlayers.get(j); /*计算距离*/ doubledist=distance(initP,p); //doubledist=1.0; //doubledist=LevenshteinDistance.levenshteinDistance(initP,p); //System.out.println("dist="+dist); dists[j]=dist; } intdist_index=computOrder(dists); //System.out.println("dist_index="+dist_index); results[dist_index].add(p); } //System.out.println("results[0].size()="+results[0].size()); for(inti=0;i<k;i++){//在每一个簇中寻找中心点 Tplayer_new=findNewCenter(results[i]); //System.out.println("results[i]"+i+"----"+k+"---===="+results[i].size()+"===="+player_new.toString()); Tplayer_old=initPlayers.get(i); if(!IsPlayerEqual(player_new,player_old)){ centerchange=true; initPlayers.set(i,player_new); } } } //System.out.println("results+"+results.length); returnresults; } /** *比较是否两个对象是否属性一致 * *@paramp1 *@paramp2 *@return */ publicbooleanIsPlayerEqual(Tp1,Tp2){ if(p1==p2){ returntrue; } if(p1==null||p2==null){ returnfalse; } booleanflag=true; try{ for(inti=0;i<fieldNames.size();i++){ StringfieldName=fieldNames.get(i); StringgetName="get" +fieldName.substring(0,1).toUpperCase() +fieldName.substring(1); //System.out.println(fieldNames); Objectvalue1=invokeMethod(p1,getName,null); Objectvalue2=invokeMethod(p2,getName,null); if(!value1.equals(value2)){ flag=false; break; } } }catch(Exceptione){ e.printStackTrace(); flag=false; } returnflag; } /** *得到新聚类中心对象 * *@paramps *@return */ publicTfindNewCenter(List<T>ps){ try{ Tt=classT.newInstance(); if(ps==null||ps.size()==0){ returnt; } double[]ds=newdouble[fieldNames.size()]; for(Tvo:ps){ for(inti=0;i<fieldNames.size();i++){ StringfieldName=fieldNames.get(i); StringgetName="get" +fieldName.substring(0,1).toUpperCase() +fieldName.substring(1); Objectobj=invokeMethod(vo,getName,null); Doublefv=(obj==null?0:Double.parseDouble(obj+"")); ds[i]+=fv; } } //System.out.println("-----------------"); for(inti=0;i<fieldNames.size();i++){ ds[i]=ds[i]/ps.size();//平均距离 StringfieldName=fieldNames.get(i); /*给对象设值*/ StringsetName="set" +fieldName.substring(0,1).toUpperCase() +fieldName.substring(1); //invokeMethod(t,setName,newClass[]{double.class},ds[i]); System.out.println("ds[i]++="+ds[i]+"----ps.size()"+ps.size()); invokeMethod(t,setName,newClass[]{double.class},ds[i]); } returnt; }catch(Exceptionex){ ex.printStackTrace(); } returnnull; } /** *得到最短距离,并返回最短距离索引 * *@paramdists *@return */ publicintcomputOrder(double[]dists){ doublemin=0; intindex=0; for(inti=0;i<dists.length-1;i++){ doubledist0=dists[i]; if(i==0){ min=dist0; index=0; } doubledist1=dists[i+1]; if(min>dist1){ min=dist1; index=i+1; } } returnindex; } /** *计算距离(相似性)采用欧几里得算法 * *@paramp0 *@paramp1 *@return */ publicdoubledistance(Tp0,Tp1){ doubledis=0; try{ for(inti=0;i<fieldNames.size();i++){ StringfieldName=fieldNames.get(i); StringgetName="get" +fieldName.substring(0,1).toUpperCase() +fieldName.substring(1); //System.out.println("fieldNames-----="+fieldNames.size()); Doublefield0Value=Double.parseDouble(invokeMethod(p0,getName,null)+""); Doublefield1Value=Double.parseDouble(invokeMethod(p1,getName,null)+""); //System.out.println("field0Value="+field0Value); dis+=Math.pow(field0Value-field1Value,2); } }catch(Exceptionex){ ex.printStackTrace(); } returnMath.sqrt(dis); } /*------公共方法-----*/ publicObjectinvokeMethod(Objectowner,StringmethodName,Class[]argsClass, Object...args){ ClassownerClass=owner.getClass(); try{ Methodmethod=ownerClass.getDeclaredMethod(methodName,argsClass); returnmethod.invoke(owner,args); }catch(SecurityExceptione){ e.printStackTrace(); }catch(NoSuchMethodExceptione){ e.printStackTrace(); }catch(Exceptionex){ ex.printStackTrace(); } returnnull; } }
.csharpcode,.csharpcodepre
{
font-size:small;
color:black;
font-family:consolas,"CourierNew",courier,monospace;
background-color:#ffffff;
/*white-space:pre;*/
}
.csharpcodepre{margin:0em;}
.csharpcode.rem{color:#008000;}
.csharpcode.kwrd{color:#0000ff;}
.csharpcode.str{color:#006080;}
.csharpcode.op{color:#0000c0;}
.csharpcode.preproc{color:#cc6633;}
.csharpcode.asp{background-color:#ffff00;}
.csharpcode.html{color:#800000;}
.csharpcode.attr{color:#ff0000;}
.csharpcode.alt
{
background-color:#f4f4f4;
width:100%;
margin:0em;
}
.csharpcode.lnum{color:#606060;}
publicclassPlayer{ privateintid; //@KmeanField privateStringname; privateintage; /*得分*/ @KmeanField privatedoublegoal; /*助攻*/ //@KmeanField privatedoubleassists; /*篮板*/ //@KmeanField privatedoublebackboard; /*抢断*/ //@KmeanField privatedoublesteals; publicintgetId(){ returnid; } publicvoidsetId(intid){ this.id=id; } publicStringgetName(){ returnname; } publicvoidsetName(Stringname){ this.name=name; } publicintgetAge(){ returnage; } publicvoidsetAge(intage){ this.age=age; } publicdoublegetGoal(){ returngoal; } publicvoidsetGoal(doublegoal){ this.goal=goal; } publicdoublegetAssists(){ returnassists; } publicvoidsetAssists(doubleassists){ this.assists=assists; } publicdoublegetBackboard(){ returnbackboard; } publicvoidsetBackboard(doublebackboard){ this.backboard=backboard; } publicdoublegetSteals(){ returnsteals; } publicvoidsetSteals(doublesteals){ this.steals=steals; } @Override publicStringtoString(){ //TODOAuto-generatedmethodstub returnname; } }
.csharpcode,.csharpcodepre
{
font-size:small;
color:black;
font-family:consolas,"CourierNew",courier,monospace;
background-color:#ffffff;
/*white-space:pre;*/
}
.csharpcodepre{margin:0em;}
.csharpcode.rem{color:#008000;}
.csharpcode.kwrd{color:#0000ff;}
.csharpcode.str{color:#006080;}
.csharpcode.op{color:#0000c0;}
.csharpcode.preproc{color:#cc6633;}
.csharpcode.asp{background-color:#ffff00;}
.csharpcode.html{color:#800000;}
.csharpcode.attr{color:#ff0000;}
.csharpcode.alt
{
background-color:#f4f4f4;
width:100%;
margin:0em;
}
.csharpcode.lnum{color:#606060;}
.csharpcode,.csharpcodepre
{
font-size:small;
color:black;
font-family:consolas,"CourierNew",courier,monospace;
background-color:#ffffff;
/*white-space:pre;*/
}
.csharpcodepre{margin:0em;}
.csharpcode.rem{color:#008000;}
.csharpcode.kwrd{color:#0000ff;}
.csharpcode.str{color:#006080;}
.csharpcode.op{color:#0000c0;}
.csharpcode.preproc{color:#cc6633;}
.csharpcode.asp{background-color:#ffff00;}
.csharpcode.html{color:#800000;}
.csharpcode.attr{color:#ff0000;}
.csharpcode.alt
{
background-color:#f4f4f4;
width:100%;
margin:0em;
}
.csharpcode.lnum{color:#606060;}
importjava.util.ArrayList;
importjava.util.List;
importjava.util.Random;
publicclassTestMain{
publicstaticvoidmain(String[]args){
List<Player>listPlayers=newArrayList<Player>();
for(inti=0;i<15;i++){
Playerp1=newPlayer();
p1.setName("afei-"+i);
p1.setAssists(i);
p1.setBackboard(i);
//p1.setGoal(newRandom(100*i).nextDouble());
p1.setGoal(i*10);
p1.setSteals(i);
//listPlayers.add(p1);
}
Playerp1=newPlayer();
p1.setName("afei1");
p1.setGoal(1);
p1.setAssists(8);
listPlayers.add(p1);
Playerp2=newPlayer();
p2.setName("afei2");
p2.setGoal(2);
listPlayers.add(p2);
Playerp3=newPlayer();
p3.setName("afei3");
p3.setGoal(3);
listPlayers.add(p3);
Playerp4=newPlayer();
p4.setName("afei4");
p4.setGoal(7);
listPlayers.add(p4);
Playerp5=newPlayer();
p5.setName("afei5");
p5.setGoal(8);
listPlayers.add(p5);
Playerp6=newPlayer();
p6.setName("afei6");
p6.setGoal(25);
listPlayers.add(p6);
Playerp7=newPlayer();
p7.setName("afei7");
p7.setGoal(26);
listPlayers.add(p7);
Playerp8=newPlayer();
p8.setName("afei8");
p8.setGoal(27);
listPlayers.add(p8);
Playerp9=newPlayer();
p9.setName("afei9");
p9.setGoal(28);
listPlayers.add(p9);
Kmeans<Player>kmeans=newKmeans<Player>(listPlayers,2);
List<Player>[]results=kmeans.comput();
for(inti=0;i<results.length;i++){
System.out.println("===========类别"+(i+1)+"================");
List<Player>list=results[i];
for(Playerp:list){
System.out.println(p.getName()+"--->"
+p.getGoal()+","+p.getAssists()+","
+p.getSteals()+","+p.getBackboard());
}
}
}
}
.csharpcode,.csharpcodepre
{
font-size:small;
color:black;
font-family:consolas,"CourierNew",courier,monospace;
background-color:#ffffff;
/*white-space:pre;*/
}
.csharpcodepre{margin:0em;}
.csharpcode.rem{color:#008000;}
.csharpcode.kwrd{color:#0000ff;}
.csharpcode.str{color:#006080;}
.csharpcode.op{color:#0000c0;}
.csharpcode.preproc{color:#cc6633;}
.csharpcode.asp{background-color:#ffff00;}
.csharpcode.html{color:#800000;}
.csharpcode.attr{color:#ff0000;}
.csharpcode.alt
{
background-color:#f4f4f4;
width:100%;
margin:0em;
}
.csharpcode.lnum{color:#606060;}