您的位置:首页 > 其它

课程设计——聚类分析的初步实践

2008-09-14 21:08 561 查看
搞完了两周的课程设计,学到了不少的知识。

1、k-means算法:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define max 19
#define EPS 0.001

int k;
typedef struct
{
    doublex1,x2,x3,x4,ks;//ks is short for k_distance
    inttag;
}cluster;

cluster clu[max];

void getnumber(int n)//输入数据
{
    int i;
    FILE *pcluster;
    if((pcluster=fopen("data.txt","r"))==NULL)
    {
       printf("\nopen file is failed\n");
       exit(1);
    }
   rewind(pcluster);
    for(i=0;i<n;i++)
       fscanf(pcluster,"%lf%lf%lf%lf",&clu[i].x1,&clu[i].x2,&clu[i].x3,&clu[i].x4);
   fclose(pcluster);
}

void output(int n)//输出数据
{
    int i,j;

    for(i=0;i<k;i++)
    {
       printf("\n第%d类:",i+1);
       for (j=0;j<n;j++)
       {
           if (clu[j].tag==i+1)
               printf(" %d",j+1);
       }
       putchar('\n');
    }
}
double square(double a,double b)//计算欧几里德距离的平方函数
{
    returnfabs(a-b)*fabs(a-b);
}
double calculate_distance(cluster num1,clusternum2)//计算欧几里德距离
{
    doubletemp;
   temp=square(num1.x1,num2.x1)+square(num1.x2,num2.x2)+square(num1.x3,num2.x3)+square(num1.x4,num2.x4);
    returnsqrt(temp);
}

int equal(cluster * center,cluster *centercopy)//判断新重心与老重心是否相等,如果相等,则收敛
{
    inti,temp;
    for(i=0;i<k;i++)
    {
       temp=calculate_distance(centercopy[i],center[i]);
       if (temp>EPS)
       {
           return 0;
       }
    }
    return1;
}
void copycenter(cluster * center,cluster *centercopy)//将新的重心拷贝用于收敛的比较
{
    int i;
    for(i=0;i<k;i++)
    {
       centercopy[i]=center[i];
    }
}

void UpdateCluster(cluster * simple,cluster * center,const intn)//将点加入新的类中
{
    inti,j;
    doubleks;
    for(i=0;i<n;i++)//UpdateCluster
    {
       simple[i].ks=calculate_distance(simple[i],center[0]);
       simple[i].tag=center[0].tag;
       for (j=1;j<k;j++)
       {
           ks = calculate_distance(simple[i],center[j]);
           if (ks<clu[i].ks)
           {
               simple[i].ks=ks;
               simple[i].tag=center[j].tag;
           }
       }
    }
}
void UpdateCenter(cluster * simple,cluster * center,const intn)//更新新类的重心,即新类的算术平均
{
    intc[k],i,j;
    clustera[k];
    for (i=0;i< k;i++)//UpdateCenter
    {
       a[i].x1=a[i].x2=a[i].x3=a[i].x4=0;
       c[i]=0;
       for (j=0;j < n;j++)
       {
           if (simple[j].tag==center[i].tag)
           {
               c[i]++;
               a[i].x1+=simple[j].x1;
               a[i].x2+=simple[j].x2;
               a[i].x3+=simple[j].x3;
               a[i].x4+=simple[j].x4;
           }
       }
       center[i].x1= a[i].x1/c[i];
       center[i].x2= a[i].x2/c[i];
       center[i].x3= a[i].x3/c[i];
       center[i].x4= a[i].x4/c[i];
    }
}
int main()
{
    intcount=0,n,flag=1;//1 for false,0 for ture
    int i;

   printf("Please input the number of the data:\n");
    scanf("%d",&n);
    printf("Howmany kinds do you want:\n");
    scanf("%d",&k);
    clusterclu_center[k],clu_center_copy[k];
   getnumber(n);

    for(i=0;i<k;i++)//InitCenter
    {
       clu_center[i]=clu[i];
       clu_center[i].tag=i+1;
    }
   copycenter(clu_center,clu_center_copy);

    while(flag)//迭代
    {
       UpdateCluster(clu,clu_center,n);//将点划入新类
       UpdateCenter(clu,clu_center,n);//计算新类重心

       if (equal(clu_center,clu_center_copy))
       {
           flag=0;
       }
       else
       {
           copycenter(clu_center,clu_center_copy);
           count++;//计算迭代次数
       }
    }
   printf("\n总共迭代了%d次\n",count);
   output(n);
    return0;
}
2、凝聚分层聚类

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define max 100
#define m 2*n-1
int n;

typedef struct
{
    doublex1,x2,x3,x4,space;
    inttag;
    intlchild;//记住左儿子的下标
    intrchild;//记住右儿子的下标
    intparent;//记住 父亲 的下标
    charlayer[max];//记住所在的层,最下一层为1层
}cluster;

cluster clu[max];
void Getnumber(int n)//输入数据
{
    int i;
    FILE *pcluster;
    if((pcluster=fopen("data.txt","r"))==NULL)
    {
       printf("\nopen file is failed\n");
       exit(1);
    }
   rewind(pcluster);
    for(i=0;i<n;i++)
       fscanf(pcluster,"%lf%lf%lf%lf",&clu[i].x1,&clu[i].x2,&clu[i].x3,&clu[i].x4);
   fclose(pcluster);
}
double Square(double a,double b)//欧几里德距离用的平方函数
{
    returnfabs(a-b)*fabs(a-b);
}
double Calculate_distance(cluster num1,clusternum2)//计算欧几里德距离
{
    doubletemp;
   temp=Square(num1.x1,num2.x1)+Square(num1.x2,num2.x2)+Square(num1.x3,num2.x3)+Square(num1.x4,num2.x4);
    returnsqrt(temp);
}
void Initial(cluster * data)//对数据进行初始化
{
    int i;
   for(i=0;i<m;i++)
    {
       data[i].parent=data[i].lchild=data[i].rchild=-1;
       data[i].space=0;
       data[i].tag=i+1;
    }
}
double Space_matrix(cluster * data_rest,int row,int *num1,int*num2)//calculate the minimum space,and return the index
{
    int i=0,j=i+1;
    doubletemp,min=987654321;
    for(i=0;i<=row;i++)
    {
       for (j=i+1;j<=row;j++)
       {
           if(data_rest[i].parent==-1&&data_rest[j].parent==-1)//????
           {
               temp=Calculate_distance(data_rest[i],data_rest[j]);
               if (temp<min)
               {
                   min=temp;
                   *num1=i;
                   *num2=j;
               }
           }
       }
    }
    returnmin;
}
void Updatacenter(cluster * center,int parent,int lchild,intrchild)//计算新的重心
{
   center[parent].x1=(center[lchild].x1+center[rchild].x1)/2;
   center[parent].x2=(center[lchild].x2+center[rchild].x2)/2;
   center[parent].x3=(center[lchild].x3+center[rchild].x3)/2;
   center[parent].x4=(center[lchild].x4+center[rchild].x4)/2;
}
void CreateTree(cluster * center)//根据凝聚的思想,构建huffman tree
{
    inti,p1,p2;
    for(i=n;i<m;i++)
    {
       center[i].space=Space_matrix(center,i-1,&p1,&p2);//theminimum space
       center[i].lchild=p1;
       center[i].rchild=p2;
       center[p1].parent=center[p2].parent=center[i].tag;
       Updatacenter(center,i,p1,p2);
    }
}
void Output(cluster * T)
{
   
    int c, p,i;          
    charcd[n+1];          
    intstart;            

   memset(cd,'',sizeof(cd));                     
    for ( i =0;i <n;  i++)
    {
       

       start=n;                                    
       c=i;                                       
       while ( (p=T[c].parent-1)>=0)
       {
           
           cd[--start]=(T[p].lchild==c)? '0' : '1';
           c=p;     
       }
       strcpy(T[i].layer,&cd[start]);               
       printf("%-10d----------%10s\n",T[i].tag,T[i].layer);
    }
}
int main()
{

   printf("Please input the number of the data:\n");
    scanf("%d",&n);
   Getnumber(n);
   Initial(clu);
   CreateTree(clu);
    printf("Thehuffman codes are:\n");
   Output(clu);
    return0;
}
以上两个算法不足之处在于只是在四维空间实现了算法,要是优化一下的话,可以把四维空间用一个数组来储存坐标,我就不写了,交给后来人吧。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: