您的位置：首页 > 其它

算法学习之哈希表实现

2017-05-23 14:20 477 查看

哈希表是一个键值对的数据结构，经常用于数据库索引，map，缓存等地方。可以表示成value = f(key),查找效率很高。哈希表实现最关键的地方是哈希函数的选择，好的哈希函数可以均匀分布，冲突小。现在工业界最常用的哈希函数是murmur，memcached和nginx使用的就是murmur。简单常用的哈希函数构造法有：1.直接定值法，利用key设计一个线性函数 f=a*key+b; 2.数字分析法，主要抽取部分数字进行循环左移，右移，相加减等各种操作。3.平方取中法。4.折叠法。5.除留余数法等。

哈希表冲突处理，哈希函数是会发生冲突的，不同的key计算出了相同的hashcode。处理的方法有闭散列法和开散列法。1.闭散列法就是所有的操作还在原来的存储空间，没有开辟新的存储空间。线性探测法：f(key)=(f(key)+1)mod n。

双重散列法:hash函数产生冲突时，调用rehash函数重新计算hash值。2.开散列法也称为拉链法，用链表组织整个哈希表，拉链法是用的最多的一种方法。

实现一个c语言版的存储字符串类型的hashmap。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define   DEFAULT_INITIAL_CAPACITY     1 << 4   //哈希表默认初始化容量
#define   MAXIMUM_CAPACITY             1 << 30  //哈希表最大容量
#define   DEFAULT_LOAD_FACTOR          0.75F    //哈希表负载系数,当系数越大占用空间越大，查询效率越高，系数越小占用空间越小，查询效率变低，此系数可调最佳0.75,如有需求可以自己调整
struct hlist_node
{
struct hlist_node **prev;   //如果用双向链表组织哈希表可以使用，本程序是用单链表组织的
struct hlist_node *next;
};
struct hlist_head
{
struct hlist_node *first;
};
struct node
{
struct hlist_node hlist_node;
char *key;
char *value;
};
struct hash_map
{
struct hlist_head *table;    //哈希表基地址
int length;                 //哈希表头结点长度
int size;                   //k-v键值对个数
int threshold;             //哈希表阈值
};
/*计算hashcode，java jdk1.8的计算方法，通过关键字的地址计算关键字的哈希值，此哈希函数散列情况较好  */
static int hash(char *key)
{
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
unsigned int hash = 0;
while (*key)
{
hash = hash * seed + (*key++);
}

return (hash & 0x7FFFFFFF);
}
/*当哈希表内存不够用时，扩容，扩容的时候把旧哈希表中的内容复制到新的哈希表中*/
static void resize(struct hash_map *map)
{
int old_cap = map->length;
int new_cap,i;
if(old_cap >= MAXIMUM_CAPACITY) {
map->threshold = MAXIMUM_CAPACITY;
} else if((new_cap = old_cap << 1) < MAXIMUM_CAPACITY && old_cap >= DEFAULT_INITIAL_CAPACITY){
map->threshold = map->threshold << 1;
map->length = new_cap;
}
struct hlist_head *old_table = map->table;

map->table = (struct hlist_head*)calloc(map->length,sizeof(struct hlist_head));
for(i = 0; i < old_cap; i++) {
if(old_table[i].first != NULL) {
struct hlist_node *p = old_table[i].first;
if(p->next == NULL) {
map->table[hash(((struct node*)p)->key) & (new_cap - 1)].first = p;
} else {
struct hlist_node *low_head = NULL,*low_tail = NULL;
struct hlist_node *high_head = NULL, *high_tail = NULL;
while(p != NULL) {
if(hash(((struct node*)p)->key) & old_cap == 0) {
if(low_tail == NULL)
low_head = p;
else
low_tail->next = p;
low_tail = p;
} else {
if(high_tail == NULL)
high_head = p;
else
high_tail->next = p;
high_tail = p;
}
p = p->next;
}
if(low_tail != NULL) {
low_tail->next = NULL;
map->table[i].first = low_head;
}
if(high_tail != NULL) {
high_tail->next = NULL;
map->table[i + old_cap].first = high_head;
}
}
}
}
free(old_table);
}
/*初始化哈希表，获取哈希表对象指针*/
static struct hash_map *new_hash_map()
{
struct hash_map *hash_map = (struct hash_map*)malloc(sizeof(struct hash_map));
hash_map->length = DEFAULT_INITIAL_CAPACITY;
hash_map->threshold = (int)(DEFAULT_LOAD_FACTOR * (DEFAULT_INITIAL_CAPACITY));
hash_map->size  = 0;
hash_map->table = (struct hlist_head*)calloc(hash_map->length,sizeof(struct hlist_head));
return hash_map;
}
/*初始化哈希表节点*/
static struct hlist_node *new_node(char *key,char *value)
{
struct node *node = (struct node*)malloc(sizeof(struct node));
node->key = key;
node->value = value;
node->hlist_node.next = NULL;
return &(node->hlist_node);
}
/*向哈希表中添加值*/
static int put_val(struct hash_map *map,int hash,char *key,char *value)
{
struct hlist_node *p;
int i;
int n = map->length;
/*如果此节点为空，说明这个链表还没有哈希映射*/
if((p = map->table[i = (n-1) & hash].first) == NULL) {
map->table[i].first = new_node(key,value);
} else {

struct node *e = NULL;
/*遍历链表*/
if(!strcmp(((struct node*)p)->key,key)) {
e = p;
}
while(p->next != NULL) {
struct node *tmp = (struct node*)p;
/*如果有相等的key，则需要修改此key对应的value，所以记下这个节点*/
if(!strcmp(tmp->key,key)) {
e = tmp;
break;
}
p = p->next;
}
/*如果没有相同的key则在链表末尾插入新节点*/
if(!e) {
p->next = new_node(key,value);
} else { /*否则更改key对应的value*/
e->value = value;
return 1;
}
}
if(++map->size > map->threshold)
resize(map);
return 0;
}
int put(struct hash_map *map,char *key,char *value)
{
return put_val(map,hash(key),key,value);
}
struct hlist_node *get_node(struct hash_map *map,int hash,char *key)
{
struct hlist_node *p;
int n;
if(map->table != NULL && (n = map->length) > 0 &&
(p = map->table[(n-1) & hash].first) != NULL){
struct node *tmp;
while(p != NULL) {
tmp = (struct node*)p;
if(!strcmp(tmp->key,key)) {
return p;
}
p = p->next;
}
}
return NULL;
}
char *get(struct hash_map *map,char *key)
{
struct node *e;
return (e = (struct node*)get_node(map,hash(key),key)) == NULL ? NULL : e->value;
}
int main()
{
struct hash_map *map = new_hash_map();
put(map,"中国","北京");
put(map,"美国","华盛顿");
put(map,"俄罗斯","莫斯科");
put(map,"日本","东京");
printf("%s\n",get(map,"中国"));
printf("%s\n",get(map,"俄罗斯"));
return 0;
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航