您的位置:首页 > 其它

hash table相关--引入哈希桶的概念来实现一个哈希表

2014-05-14 14:53 381 查看
前面的讲述了如何用链地址法实现一个哈希表,那么今天来分析一下另一种解决哈希冲突的做法,即为每个Hash值,建立一个Hash桶(Bucket),桶的容量是固定的,也就是只能处理固定次数的冲突,如1048576个Hash桶,每个桶中有4个表项(Entry),总计4M个表项。其实这两种的实现思路雷同,就是对Hash表中每个Hash值建立一个冲突表,即将冲突的几个记录以表的形式存储在其中。

大致的思路是这样的:
首先哈希桶的个数是固定的,有用户构建的时候输入,一旦构建,个数就已经固定;查找的时候首先将key值通过哈希函数获取哈希值,根据哈希值获取到对应的哈希桶,然后遍历哈希桶内的pairs数组获取。

struct
Bucket {

unsigned int count;

Pair *pairs;

};

struct StrMap {

unsigned int count;

Bucket *buckets;

};

strmap.h

#ifndef _STRMAP_H_

#define _STRMAP_H_

#ifdef __cplusplus

extern "C"

{

#endif

#include <stdlib.h>

#include <string.h>

typedef struct StrMap StrMap;

/*

* This callback function is called once per key-value when iterating over

* all keys associated to values.

*

* Parameters:

*

* key: A pointer to a null-terminated C string. The string must not

* be modified by the client.

*

* value: A pointer to a null-terminated C string. The string must

* not be modified by the client.

*

* obj: A pointer to a client-specific object. This parameter may be

* null.

*

* Return value: None.

*/

typedef void(*sm_enum_func)(const char *key, const char *value, const void *obj);

/*

* Creates a string map.

*

* Parameters:

*

* capacity: The number of top-level slots this string map

* should allocate. This parameter must be > 0.

*

* Return value: A pointer to a string map object,

* or null if a new string map could not be allocated.

*/

StrMap * sm_new(unsigned int capacity);

/*

* Releases all memory held by a string map object.

*

* Parameters:

*

* map: A pointer to a string map. This parameter cannot be null.

* If the supplied string map has been previously released, the

* behaviour of this function is undefined.

*

* Return value: None.

*/

void sm_delete(StrMap *map);

/*

* Returns the value associated with the supplied key.

*

* Parameters:

*

* map: A pointer to a string map. This parameter cannot be null.

*

* key: A pointer to a null-terminated C string. This parameter cannot

* be null.

*

* out_buf: A pointer to an output buffer which will contain the value,

* if it exists and fits into the buffer.

*

* n_out_buf: The size of the output buffer in bytes.

*

* Return value: If out_buf is set to null and n_out_buf is set to 0 the return

* value will be the number of bytes required to store the value (if it exists)

* and its null-terminator. For all other parameter configurations the return value

* is 1 if an associated value was found and completely copied into the output buffer,

* 0 otherwise.

*/

int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf);

/*

* Queries the existence of a key.

*

* Parameters:

*

* map: A pointer to a string map. This parameter cannot be null.

*

* key: A pointer to a null-terminated C string. This parameter cannot

* be null.

*

* Return value: 1 if the key exists, 0 otherwise.

*/

int sm_exists(const StrMap *map, const char *key);

/*

* Associates a value with the supplied key. If the key is already

* associated with a value, the previous value is replaced.

*

* Parameters:

*

* map: A pointer to a string map. This parameter cannot be null.

*

* key: A pointer to a null-terminated C string. This parameter

* cannot be null. The string must have a string length > 0. The

* string will be copied.

*

* value: A pointer to a null-terminated C string. This parameter

* cannot be null. The string must have a string length > 0. The

* string will be copied.

*

* Return value: 1 if the association succeeded, 0 otherwise.

*/

int sm_put(StrMap *map, const char *key, const char *value);

/*

* Returns the number of associations between keys and values.

*

* Parameters:

*

* map: A pointer to a string map. This parameter cannot be null.

*

* Return value: The number of associations between keys and values.

*/

int sm_get_count(const StrMap *map);

/*

* An enumerator over all associations between keys and values.

*

* Parameters:

*

* map: A pointer to a string map. This parameter cannot be null.

*

* enum_func: A pointer to a callback function that will be

* called by this procedure once for every key associated

* with a value. This parameter cannot be null.

*

* obj: A pointer to a client-specific object. This parameter will be

* passed back to the client's callback function. This parameter can

* be null.

*

* Return value: 1 if enumeration completed, 0 otherwise.

*/

int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj);

#ifdef __cplusplus

}

#endif

#endif

struct
Pair {

char *key;

char *value;

};

struct Bucket {

unsigned int count;

Pair *pairs;

};

struct StrMap {

unsigned int count;

Bucket *buckets;

};

strmap.c

#include "strmap.h"

typedef struct Pair Pair;

typedef struct Bucket Bucket;

struct Pair {

char *key;

char *value;

};

struct Bucket {

unsigned int count;

Pair *pairs;

};

struct StrMap {

unsigned int count;

Bucket *buckets;

};

static Pair * get_pair(Bucket *bucket, const char *key);

static unsigned long hash(const char *str);

StrMap * sm_new(unsigned int capacity)

{

StrMap *map;

map = malloc(sizeof(StrMap));

if (map == NULL) {

return NULL;

}

map->count = capacity;

map->buckets = malloc(map->count * sizeof(Bucket));

if (map->buckets == NULL) {

free(map);

return NULL;

}

memset(map->buckets, 0, map->count * sizeof(Bucket));

return map;

}

void sm_delete(StrMap *map)

{

unsigned int i, j, n, m;

Bucket *bucket;

Pair *pair;

if (map == NULL) {

return;

}

n = map->count;

bucket = map->buckets;

i = 0;

while (i < n) {

m = bucket->count;

pair = bucket->pairs;

j = 0;

while(j < m) {

free(pair->key);

free(pair->value);

pair++;

j++;

}

free(bucket->pairs);

bucket++;

i++;

}

free(map->buckets);

free(map);

}

int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf)

{

unsigned int index;

Bucket *bucket;

Pair *pair;

if (map == NULL) {

return 0;

}

if (key == NULL) {

return 0;

}

index = hash(key) % map->count;

bucket = &(map->buckets[index]);

pair = get_pair(bucket, key);

if (pair == NULL) {

return 0;

}

if (out_buf == NULL && n_out_buf == 0) {

return strlen(pair->value) + 1;

}

if (out_buf == NULL) {

return 0;

}

if (strlen(pair->value) >= n_out_buf) {

return 0;

}

strcpy(out_buf, pair->value);

return 1;

}

int sm_exists(const StrMap *map, const char *key)

{

unsigned int index;

Bucket *bucket;

Pair *pair;

if (map == NULL) {

return 0;

}

if (key == NULL) {

return 0;

}

index = hash(key) % map->count;

bucket = &(map->buckets[index]);

pair = get_pair(bucket, key);

if (pair == NULL) {

return 0;

}

return 1;

}

int sm_put(StrMap *map, const char *key, const char *value)

{

unsigned int key_len, value_len, index;

Bucket *bucket;

Pair *tmp_pairs, *pair;

char *tmp_value;

char *new_key, *new_value;

if (map == NULL) {

return 0;

}

if (key == NULL || value == NULL) {

return 0;

}

key_len = strlen(key);

value_len = strlen(value);

/* Get a pointer to the bucket the key string hashes to */

index = hash(key) % map->count;

bucket = &(map->buckets[index]);

/* Check if we can handle insertion by simply replacing

* an existing value in a key-value pair in the bucket.

*/

if ((pair = get_pair(bucket, key)) != NULL) {

/* The bucket contains a pair that matches the provided key,

* change the value for that pair to the new value.

*/

if (strlen(pair->value) < value_len) {

/* If the new value is larger than the old value, re-allocate

* space for the new larger value.

*/

tmp_value = realloc(pair->value, (value_len + 1) * sizeof(char));

if (tmp_value == NULL) {

return 0;

}

pair->value = tmp_value;

}

/* Copy the new value into the pair that matches the key */

strcpy(pair->value, value);

return 1;

}

/* Allocate space for a new key and value */

new_key = malloc((key_len + 1) * sizeof(char));

if (new_key == NULL) {

return 0;

}

new_value = malloc((value_len + 1) * sizeof(char));

if (new_value == NULL) {

free(new_key);

return 0;

}

/* Create a key-value pair */

if (bucket->count == 0) {

/* The bucket is empty, lazily allocate space for a single

* key-value pair.

*/

bucket->pairs = malloc(sizeof(Pair));

if (bucket->pairs == NULL) {

free(new_key);

free(new_value);

return 0;

}

bucket->count = 1;

}

else {

/* The bucket wasn't empty but no pair existed that matches the provided

* key, so create a new key-value pair.

*/

tmp_pairs = realloc(bucket->pairs, (bucket->count + 1) * sizeof(Pair));

if (tmp_pairs == NULL) {

free(new_key);

free(new_value);

return 0;

}

bucket->pairs = tmp_pairs;

bucket->count++;

}

/* Get the last pair in the chain for the bucket */

pair = &(bucket->pairs[bucket->count - 1]);

pair->key = new_key;

pair->value = new_value;

/* Copy the key and its value into the key-value pair */

strcpy(pair->key, key);

strcpy(pair->value, value);

return 1;

}

int sm_get_count(const StrMap *map)

{

unsigned int i, j, n, m;

unsigned int count;

Bucket *bucket;

Pair *pair;

if (map == NULL) {

return 0;

}

bucket = map->buckets;

n = map->count;

i = 0;

count = 0;

while (i < n) {

pair = bucket->pairs;

m = bucket->count;

j = 0;

while (j < m) {

count++;

pair++;

j++;

}

bucket++;

i++;

}

return count;

}

int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj)

{

unsigned int i, j, n, m;

Bucket *bucket;

Pair *pair;

if (map == NULL) {

return 0;

}

if (enum_func == NULL) {

return 0;

}

bucket = map->buckets;

n = map->count;

i = 0;

while (i < n) {

pair = bucket->pairs;

m = bucket->count;

j = 0;

while (j < m) {

enum_func(pair->key, pair->value, obj);

pair++;

j++;

}

bucket++;

i++;

}

return 1;

}

/*

* Returns a pair from the bucket that matches the provided key,

* or null if no such pair exist.

*/

static Pair * get_pair(Bucket *bucket, const char *key)

{

unsigned int i, n;

Pair *pair;

n = bucket->count;

if (n == 0) {

return NULL;

}

pair = bucket->pairs;

i = 0;

while (i < n) {

if (pair->key != NULL && pair->value != NULL) {

if (strcmp(pair->key, key) == 0) {

return pair;

}

}

pair++;

i++;

}

return NULL;

}

/*

* Returns a hash code for the provided string.

*/

static unsigned long hash(const char *str)

{

unsigned long hash = 5381;

int c;

while (c = *str++) {

hash = ((hash << 5) + hash) + c;

}

return hash;

}

前一节与这节这两种实现方法看似比较类似,但也有差异:

基于哈希桶的情况下,由于Hash桶容量的限制,所以,有可能发生Hash表填不满的情况,也就是,虽然Hash表里面还有空位,但是新建的表项由于冲突过多,而不能装入Hash表中。不过,这样的实现也有其好处,就是查表的最大开销是可以确定的,因为最多处理的冲突数是确定的,所以算法的时间复杂度为O(1)+O(m),其中m为Hash桶容量。

而另一种通过链表的实现,由于Hash桶的容量是无限的,因此,只要没有超出Hash表的最大容量,就能够容纳新建的表项。但是,一旦发生了Hash冲突严重的情况,就会造成Hash桶的链表过长,大大降低查找效率。在最坏的情况下,时间复杂度退化为O(n),其中n为Hash表的总容量。当然,这种情况的概率小之又小,几乎是可以忽略的。
转自

http://www.nowamagic.net/academy/detail/3008108

struct Pair {

char *key;

char *value;

};

struct Bucket {

unsigned int count;

Pair *pairs;

};

struct StrMap {

unsigned int count;

Bucket *buckets;

};
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: