您的位置：首页 > 其它

一个简单的字符串hash表

2012-08-21 12:38 204 查看

设计一个字符串hash表。

1.总体设计图：

2.为什么要这么设计？

用一个Buckets来保存所有哈希表，用户能看到的只是一个相对于Buckets的偏移量即索引，而这个索引是数组索引还是其他比如二叉排序树的搜索关键字，这就对Buckets的数据结构进行了隐藏；另外各个线程分别对自己的得到的索引值的哈希表进行操作，互不影响，所以支持了多线程。

3.接口设计

对于字符串hash表所支持的操作也即通常的插入，删除，查找所以目前对于哈希表仅仅支持这三种操作，由于需要得到索引和释放申请来的索引而创建/释放一个哈希表所以增加了st_create和st_release方法，另外为方便调试，增加打印整个字符串表的操作

代码如下：

/**
 File: strtab.h                                   
 table is implemented as a chained hash table
 use the BKDR string hash funcion
 cc 2012.8.20 blog.csdn/zhccl/
 */

#ifndef _STRTAB_
#define _STRTAB_

/**
 Function st_create() create the hash table
 returns the index of all hash tables or -1 if create error.
 use the ID for hiding the datastructure of tables
 */
int st_create();

/**
 Function st_insert inserts a char * to the string table index=id
 inserted only the first time, otherwise ignored
 returns 1 or 0 if is exist.
 */
int st_insert(int id,char * str);

/**
 Function st_lookup look up the char * from string table index=id
 returns 1 or 0 if not found
 */
int st_lookup(int id,char * str);

/**
 Function st_delete delete the char * from string table index=id
 returns 1 when deleted or 0 if not found.
 */
int st_delete(int id,char *str);

/**
 Function st_release the index=id string table
 returns 1 when released or 0 if not exist.
 */
int st_release(int id);

/**
 Function st_print just for debug.
 */
void st_print(int id);

#endif

4.具体实现

/**
 File: strtab.c                                
 table is implemented as a chained hash table
 use the BKDR string hash funcion
 all function's implement.
 cc 2012.8.20
 */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>

#define DBUG

/*the max size of tables */
#define MAX_INDEX 255

/* the size of hash table */
#define TABLE_SIZE 253

/* node structure */
typedef struct strNode{
	char *str;
	struct strNode *next;
}* pstrNode;

/* all hash tables */
static pstrNode** Buckets=NULL;

/*the BKDR hash funcion */
static unsigned int BKDRHash(char *str){  
    unsigned int seed = 131; // 31 131 1313 13131 131313 etc..  
    unsigned int hash = 0;  
   
    while (*str){  
        hash = hash * seed + (*str++);
    }  
   
    return (hash & 0x7FFFFFFF);
}
/**
 a very simple error handle function.
 */
static void st_error(char *errmsg){
	fprintf(stderr,"%s\n",errmsg);
	exit(0);
}
int st_create(){
	//if the first use the create all tables.
	if(NULL==Buckets){
		if(NULL==(Buckets=(pstrNode **)malloc(MAX_INDEX*sizeof(pstrNode **)))){
			st_error("Memory over follow.");
		}
		memset(Buckets,0,MAX_INDEX*sizeof(pstrNode **));
	}
	//find a free index of tables,return it for costom storing strings. 
	for(int i=0;i<MAX_INDEX;i++){
		if(NULL==Buckets[i]){
			if(NULL==(Buckets[i]=(pstrNode *)malloc(TABLE_SIZE*sizeof(pstrNode *)))){
				st_error("Memory over follow.");
			}
			memset(Buckets[i],0,TABLE_SIZE*sizeof(pstrNode *));
			return i;
		}
	}
	//now the tables is full must extend or report error.
	st_error("The buckets is full.");
	return -1;
}
int st_insert(int id,char* srcstr){
	assert(NULL!=Buckets[id]);

	int hash=BKDRHash(srcstr)%TABLE_SIZE;

	//if the first use Bucetss[i][hash],so srcstr must be the first string.
	if( NULL== (*(*(Buckets+id)+hash))){
		if(NULL==((*(*(Buckets+id)+hash))=(pstrNode)malloc(sizeof(struct strNode)))){
			st_error("Memory over follow.");
		}
		if( NULL==( (*(*(Buckets+id)+hash))->str=(char *)malloc( (strlen(srcstr)+1) * sizeof(char) ) ) ){
			st_error("Memory over follow.");
		}
		strcpy((*(*(Buckets+id)+hash))->str,srcstr);
		(*(*(Buckets+id)+hash))->next=NULL;
#ifdef DBUG
		printf("String:< %s > is inserted at the first time in hash=%d .\n",srcstr,hash);
#endif
		return 1;
	}
	else{
		//conflict then use chain to resovle.
		pstrNode pnode=(*(*(Buckets+id)+hash));

		while( (pnode!=NULL) && (strcmp(srcstr,pnode->str)!=0) ){
			pnode=pnode->next;
		}
		//not found,so for the first time , do inserting.
		if(pnode==NULL){
			if(NULL==(pnode=(pstrNode)malloc(sizeof(struct strNode)))){
				st_error("Memory over follow.");
			}
			if(NULL==(pnode->str=(char *)malloc((strlen(srcstr)+1)*sizeof(char)))){
				st_error("Memory over follow.");
			}
			strcpy(pnode->str,srcstr);

			pnode->next=(*(*(Buckets+id)+hash));
			(*(*(Buckets+id)+hash))=pnode;
#ifdef DBUG
			printf("String:< %s > is inserted in hash=%d .\n",srcstr,hash);
#endif
			return 1;
		}
		//the srcstr is exist,so ignore.
		else{
#ifdef DBUG
			printf("String:< %s > is exist.\n",srcstr);
#endif
			return 0;
		}
	}
}

int st_lookup(int id,char * srcstr){
	assert(NULL!=Buckets[id]);
	int hash=BKDRHash(srcstr)%TABLE_SIZE;
	pstrNode ptemp=*(*(Buckets+id)+hash);
	while( (NULL!=ptemp) && (strcmp(ptemp->str,srcstr)!=0)){
		ptemp=ptemp->next;
	}
#ifdef DBUG
	if(NULL!=ptemp){
		printf("String <%s> has found.\n",ptemp->str);
	}
	else{
		printf("String <%s> has not found.\n",srcstr);
	}
#endif
	return NULL==ptemp?0:1;
}
int st_delete(int id,char *srcstr){
	assert(NULL!=Buckets[id]);

	int hash=BKDRHash(srcstr)%TABLE_SIZE;
	/**
	 pprenode point to the precocious node
	 and the ptemp point to the node which will be deleted.
	 */
	pstrNode ptemp,pprenode;

	ptemp=pprenode=*(*(Buckets+id)+hash);
	while( (NULL!=ptemp) && (strcmp(ptemp->str,srcstr)!=0)){
		pprenode=ptemp;
		ptemp=ptemp->next;
	}
	//the srcstr is exist.
	if(NULL!=ptemp){
		/**
		 we must test the node which will delete is the only one node or not.
		 if it is the only one,we can't free an item from an array,so we set
		 the item of array[index] NULL will be ok.
		 otherwise we must free the node from the chain.
		 */
		/* more than one node */
		if(pprenode != ptemp){
			/* remove the node from hash table which index=id. */
			pprenode->next=ptemp->next;
			/* free the node : have to handle two pointers. */
			free(ptemp->str);
			ptemp->str=NULL;
			free(ptemp);
			ptemp=NULL;	
		}
		/* only one. */
		else{
			free(ptemp->str);
			ptemp->str=NULL;
			free(ptemp);
			ptemp=NULL;

			*(*(Buckets+id)+hash)=NULL;
		}
#ifdef DBUG
		printf("Delete string < %s > OK.\n",srcstr);
#endif
		return 1;
	}
	else{
#ifdef DBUG
		printf("< %s > is not exist.\n",srcstr);
#endif
		return 0;
	}
}
int st_release(int id){
	assert(NULL!=Buckets[id]);
	
	pstrNode ptemp,pnext;
	for(int i=0;i<TABLE_SIZE;i++){
		ptemp=*(*(Buckets+id)+i);
		while(NULL != ptemp){
			pnext=ptemp->next;
#ifdef DBUG
			printf("The sting < %s > node has freed.\n",ptemp->str);
#endif
			free(ptemp->str);
			ptemp->str=NULL;
			free(ptemp);
			ptemp=NULL;

			ptemp=pnext;
		}
		*(*(Buckets+id)+i)=NULL;
	}
	free(Buckets[id]);
	Buckets[id]=NULL;
	/**
	 if Buckets[i] is last exist table , after release Buckets[i]
	 we should release the whole Buckets.
	 */
	int lastindex;
	for(lastindex=0;lastindex<MAX_INDEX;lastindex++){
		if(NULL!=Buckets[lastindex]){
			break;
		}
	}
	if(MAX_INDEX==lastindex){
		free(Buckets);
		Buckets=NULL;
	}
#ifdef DBUG
	printf("The table index=%d has freed.\n",id);
#endif
	return 1;
}

void st_print(int id){
	assert(NULL!=Buckets[id]);
	
	pstrNode ptemp;
	printf("--------------------------Buckets[%d]-----------------------------\n",id);
	for(int i=0;i<TABLE_SIZE;i++){
		printf("------------------------------TABLE[%d]-------------------------\n",i);
		int order=1;
		ptemp=(*(*(Buckets+id)+i));
		while( NULL !=  ptemp){
			printf("The index=%-4d string =%-15s\n",order++,ptemp->str);
			ptemp=ptemp->next;
		}
	}
}

5.设计不足

1.如果代码需要被其他用户调用，最好对索引进行一次变换，以防止用户去释放并非自己获得的索引。

2.对GPL协议文件中的所有字符串进行测试，发现冲突比较严重。

亲，版权所有，转载注明出处：blog.csdn/zhccl/

如果你要使用这些代码，不要删除每个文件开始那段注释就可以了。^_^

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航