您的位置:首页 > 编程语言

Cuckoo Hash 基本思想和代码实现

2012-04-09 21:43 337 查看
Cuckoo Hash 是一种hash冲突解决方法, 其目的是即时使用简易的hash function 也能够实现hash key的均匀分布。

基本思想是使用2个hash函数来处理碰撞,从而每个key都对应到2个位置。

插入操作如下:

1. 对key值hash,生成两个hash key值,hashk1和 hashk2, 如果对应的两个位置上有一个为空,那么直接把key插入即可。

2. 否则,任选一个位置,把key值插入,把已经在那个位置的key值踢出来。

3. 被踢出来的key值,需要重新插入,直到没有key被踢出为止。

查找思路比较简单。

代码实现如下:

// Cuckoo_hash.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <string>
#include <cmath>
#include <iostream>
using namespace std;

template<class KeyT>
class CuckooHash;

template<>
class CuckooHash<int>
{

private:
int lnBucket;	//size of bucket
int *mpKeyBucket1;	//the first bucket for first hash
int *mpKeyBucket2;	//the second bucket for second hash
enum {MaxLoop = 1000};	//used to control rehash loop

int lnCantInsertNum;

private:
//first hash function
int hHashOne(int& irKey)
{
int lHashKey = 0;

lHashKey = irKey % lnBucket;

return lHashKey;
}

//second hash function
int hHashTwo(int& irKey)
{
int lHashKey = 0;

lHashKey = irKey / lnBucket;
lHashKey = lHashKey % lnBucket;

return lHashKey;
}

//todo: juge one num is Prime NUM or not
bool hIsPrime(int inN)
{
if(inN <= 0) return false;

int last = sqrt((double)inN);

for(int i = 2; i<= last; i++)
{
if(inN % i == 0)
return false;
}

return true;
}
int hGetMinPrime(int inNum)
{
while( !hIsPrime(inNum) ) inNum ++;

return inNum;
}

//try to rehash all the other key
bool hReHash(int iKey, int deeps)
{
if(deeps <= 0) return false;

int lHashKey1 = hHashOne(iKey);
int lHashKey2 = hHashTwo(iKey);

if(iKey == mpKeyBucket1[lHashKey1])
{
if(mpKeyBucket2[lHashKey2] == 0)
{
mpKeyBucket2[lHashKey2] = iKey;
return true;
}
else
{
if( hReHash(mpKeyBucket2[lHashKey2], deeps - 1) )
{	mpKeyBucket2[lHashKey2] = iKey;
return true;
}

}
}
else if(iKey == mpKeyBucket2[lHashKey2])
{
if(mpKeyBucket1[lHashKey1] == 0)
{
mpKeyBucket1[lHashKey1] = iKey;
return true;
}
else
{
if( hReHash(mpKeyBucket1[lHashKey1], deeps - 1))
{
mpKeyBucket1[lHashKey1] = iKey;
return true;
}
}
}

return false;

}

public:
CuckooHash(int inNum)
{
lnBucket = inNum;

mpKeyBucket1 = NULL;

mpKeyBucket2 = NULL;

lnCantInsertNum = 0;
}

void InitHashTable()
{
lnBucket = hGetMinPrime(lnBucket);

mpKeyBucket1 = new int[lnBucket];
memset(mpKeyBucket1, 0, sizeof(int) * lnBucket);

mpKeyBucket2 = new int[lnBucket];
memset(mpKeyBucket2, 0, sizeof(int) * lnBucket);
}

~CuckooHash()
{
if(mpKeyBucket1)
delete[] mpKeyBucket1;

if(mpKeyBucket2)
delete[] mpKeyBucket2;
}

void Insert(int& irKey)
{
if(find(irKey)) return;

int lHashKey1 = hHashOne(irKey);
int lHashKey2 = hHashTwo(irKey);

if(mpKeyBucket1[lHashKey1]  == 0)
mpKeyBucket1[lHashKey1] = irKey;
else if(mpKeyBucket2[lHashKey2] == 0)
mpKeyBucket2[lHashKey2] = irKey;
else
{
if(hReHash(mpKeyBucket1[lHashKey1], MaxLoop))
mpKeyBucket1[lHashKey1] = irKey;
else if(hReHash(mpKeyBucket2[lHashKey2], MaxLoop))
mpKeyBucket2[lHashKey2] = irKey;
else
lnCantInsertNum ++;

}

cout << "After insert : " << irKey << endl;
cout << lHashKey1 << " " << lHashKey2 << endl;
PrintBucket4Test();

}

bool find(int& irKey)
{
int lHashKey1 = hHashOne(irKey);
if(mpKeyBucket1 && mpKeyBucket1[lHashKey1] == irKey)
return true;

int lHashKey2 = hHashTwo(irKey);
if(mpKeyBucket2 && mpKeyBucket2[lHashKey2] == irKey)
return true;

return false;
}

void PrintBucket4Test()
{
for(int i = 0; i<lnBucket; i++ )
cout << mpKeyBucket1[i] << ' ';
cout << endl;

for(int i = 0; i<lnBucket; i++ )
cout << mpKeyBucket2[i] << ' ';
cout << endl;
}
};

int _tmain(int argc, _TCHAR* argv[])
{

CuckooHash<int> CKHash(12);
CKHash.InitHashTable();

int a[] = {20, 50, 53, 75, 100, 67, 105, 3, 36, 39, 6};
for(int i = 0; i< sizeof(a)/sizeof(int); i++)
{
CKHash.Insert(a[i]);
}

int b;
cin >> b;
return 0;
}


参考链接如下:
http://www.it-c.dk/people/pagh/papers/cuckoo-undergrad.pdf

http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf

http://en.wikipedia.org/wiki/Cuckoo_hashing

http://hi.baidu.com/algorithms/blog/item/eb89b582add48f95f703a61e.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: