您的位置：首页 > 理论基础 > 数据结构算法

多核计算与程序设计 - 06 基本算法和数据结构之三哈希表与哈希链表

2013-02-10 22:45 615 查看

一，哈希表

哈希表：hash(杂乱信息的意思) 的音译，用来把一些杂乱无章的信息根据其关键字的特点映射到一个连续的空间，操作简单，用途广泛，例如：电子词典。

这里用到的映射方法称为索引方法。对应的实现函数称为哈希函数。将映射后的值称为索引。

1）哈希表的索引方法

整除取余法

对于整数而言，是很常用的一个方法，能够很快的计算出索引值，但有时候会产生较多的相同的索引，使算法退化。

最差的时候会变成顺序查找。

int hashInt1(int nKey, int uBucketCount)
{
return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1， 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
return nKey & uMask;
}

折叠法

当关键词位数很长时，可以将关键字分割成位数相同的几部分，把每部分转换成计算机可以识别的整数，再将整数相加，得到新的整数，再把得到的新的整数用上面的取余法进行运算，得到hash值。

int hashString(char* strKey, int uBucketCount)
{
    int i = 0;
    int nRet = 0;
    int nHashValue = 0;
    char* p = strKey;
    
    while(*p != '\0')
    {
    <span style="white-space:pre">	</span><span style="color:#ff0000;">// 这里把字符串分组，并把每组转换为一个整数
    <span style="white-space:pre">	</span>// 这个转换方法可以自己定义，按照key的特征灵活选择</span>
        if (i == 5)
        {
            i = 0;
            nRet += nHashValue;
            nHashValue = 0;
        }

    <span style="white-space:pre">	</span>nHashValue += nHashValue << 3;
    <span style="white-space:pre">	</span>nHashValue += (int)(*p);
    <span style="white-space:pre">	</span>p++;
    <span style="white-space:pre">	</span>i++;
    }

    nRet += nHashValue;
<span style="white-space:pre">	</span>return nRet % uBucketCount;
}

平方取中法

将关键字进行平方运算后，再取中间几位作为索引。

//<span style="color:#ff0000;"> 例：平方后取234位(个位为第0位)作为hash值</span>
int hashSqrMid234(int nKey)
{
int nRet =0;
int nHashValue = 0;
nHashValue = nKey * nKey;

nRet = nHashValue / 100000;
nHashValue = nHashValue /100;

nRet = nHashValue % (nRet * 1000);
return nRet;
}

随即函数法

用随机函数产生hash值。

2）哈希表的冲突解决方法

不同的关键字，通过hash算法可能得到相同的hash值，这就产生了冲突。

解决冲突的方法：

链表存储法

将同一索引的关键词放在一个链表中，将哈希表的索引指向链表的表头。

索引探测法

索引探测法的基本思想是发现索引有冲突后，在索引的位置向后查找一个空的索引位置，将数据存放在此索引的位置。

索引探测法公式描述：

H(i) = ( Hash ( key ) + P(i) ) % uBucketCount

线性探测法：

当发现索引处已经存储数据时，从索引位置向后按顺序查找一个空索引的位置，将数据存储在这个空位置上。

P(i) = 1

二次探测法和伪随机探测法：

P(i) = i * i 时为二次探测法

P(i) = 随机序列时为伪随机探测法

实例代码：

hash_table.h

#include <iostream>

using namespace std;

#ifndef __HASH_TABLE_H__
#define __HASH_TABLE_H__

typedef struct _SINGLE_NODE
{
int  nData;
_SINGLE_NODE *pNext;
}SINGLE_NODE;

class MyHashTable
{
public:
MyHashTable();
~MyHashTable();

int  InsertHashTable(int nData);
int  FindHash(int nData);
void DeleteHash(int nData);

private:

SINGLE_NODE ** m_pBucket;
int            m_nBucketCount;
int            m_nNodeCount;
int            m_nCurrNodeID;
SINGLE_NODE  * m_pCurrentNode;

int calcHash(int nKey);
};

#endif

hash_table.cpp

#include "hash_table.h"

int hashInt1(int nKey, int uBucketCount)
{
return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1， 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
return nKey & uMask;
}

int hashString(char* strKey, int uBucketCount)
{
int i = 0;
int nRet = 0;
int nHashValue = 0;
char* p = strKey;

while(*p != '\0')
{
// 这里把字符串分组，并把每组转换为一个整数
// 这个转换方法可以自己定义，按照key的特征灵活选择
if (i == 5)
{
i = 0;
nRet += nHashValue;
nHashValue = 0;
}

nHashValue += nHashValue << 3;
nHashValue += (int)(*p);
p++;
i++;
}

nRet += nHashValue;
return nRet % uBucketCount;
}

int hashSqrMid234(int nKey)
{
int nRet =0;
int nHashValue = 0;
nHashValue = nKey * nKey;  // 5499025

nRet = nHashValue / 100000;   // 549
nHashValue = nHashValue /100;   // 54990

nRet = nHashValue % (nRet * 1000);
return nRet;
}

MyHashTable::MyHashTable()
{
m_nBucketCount = 1024;
m_nNodeCount = 0;
m_nCurrNodeID = 0;
m_pCurrentNode = NULL;

m_pBucket = (SINGLE_NODE**)malloc(m_nBucketCount * sizeof(SINGLE_NODE*));
memset(m_pBucket, 0, m_nBucketCount * sizeof(SINGLE_NODE*));
}

MyHashTable::~MyHashTable()
{
SINGLE_NODE *pNode = NULL;

for (int i = 0 ; i < m_nBucketCount; i++)
{
pNode = m_pBucket[i];
while (pNode != NULL)
{
m_pBucket[i] = pNode->pNext;
free(pNode);
pNode = NULL;
pNode = m_pBucket[i];

}
}
}

int MyHashTable::calcHash(int nKey)
{
return hashInt1(nKey, m_nBucketCount);
}

int MyHashTable::InsertHashTable(int nData)
{
SINGLE_NODE *pNode = NULL;
SINGLE_NODE *pNewNode;
int nHashValue;

pNewNode = (SINGLE_NODE*)malloc(sizeof(SINGLE_NODE));

nHashValue = calcHash(nData);
pNode = m_pBucket[nHashValue];

cout << "Insert Hash Key: " << nHashValue  << "  Value=" << nData << endl;

pNewNode->nData = nData;
pNewNode->pNext = pNode;

m_pBucket[nHashValue] = pNewNode;
m_nNodeCount += 1;

return m_nNodeCount;
}

int MyHashTable::FindHash(int nData)
{
SINGLE_NODE *pNode;
int nHashValue;

nHashValue = calcHash(nData);
pNode = m_pBucket[nHashValue];

while(pNode != NULL)
{
if (pNode->nData == nData)
{
cout << "FindHash find it Key= " <<nHashValue << " Data=" << pNode->nData << endl;
return pNode->nData;
}
pNode = pNode->pNext;
}

return -1;
}

void MyHashTable::DeleteHash(int nData)
{
SINGLE_NODE *pNode;
int nHashValue;

nHashValue = calcHash(nData);
pNode = m_pBucket[nHashValue];

if (pNode != NULL)
{
m_pBucket[nHashValue] = pNode->pNext;
free(pNode);
pNode = NULL;
}
}

main.cpp 测试程序

#include "hash_table.h"

void main()
{
int testArr[10] = {123, 4332,223,8664,2239,89777,54455,32,23,64322};
int i = 0;

MyHashTable hashTable;

for (i = 0; i < 10; i++)
{
hashTable.InsertHashTable(testArr[i]);
}

cout << endl;
cout << "Finding the hash kes and values" << endl;
cout << endl;
for (i = 0; i < 10; i++)
{
if (hashTable.FindHash(testArr[i]) != -1)
{
cout << "========================" << endl;
}
}

hashTable.DeleteHash(testArr[3]);

cout << endl;
cout << "Test the 3 the key is deleted" << endl;
cout << endl;
for (i = 0; i < 10; i++)
{
if (hashTable.FindHash(testArr[i]) != -1)
{
cout << "========================" << endl;
}
}
cin >> i;

}

测试结果：

Insert Hash Key: 123 Value=123

Insert Hash Key: 236 Value=4332

Insert Hash Key: 223 Value=223

Insert Hash Key: 472 Value=8664

Insert Hash Key: 191 Value=2239

Insert Hash Key: 689 Value=89777

Insert Hash Key: 183 Value=54455

Insert Hash Key: 32 Value=32

Insert Hash Key: 23 Value=23

Insert Hash Key: 834 Value=64322

Finding the hash kes and values

FindHash find it Key= 123 Data=123

========================

FindHash find it Key= 236 Data=4332

========================

FindHash find it Key= 223 Data=223

========================

FindHash find it Key= 472 Data=8664

========================

FindHash find it Key= 191 Data=2239

========================

FindHash find it Key= 689 Data=89777

========================

FindHash find it Key= 183 Data=54455

========================

FindHash find it Key= 32 Data=32

========================

FindHash find it Key= 23 Data=23

========================

FindHash find it Key= 834 Data=64322

========================

Test the 3 the key is deleted

FindHash find it Key= 123 Data=123

========================

FindHash find it Key= 236 Data=4332

========================

FindHash find it Key= 223 Data=223

========================

FindHash find it Key= 191 Data=2239

========================

FindHash find it Key= 689 Data=89777

========================

FindHash find it Key= 183 Data=54455

========================

FindHash find it Key= 32 Data=32

========================

FindHash find it Key= 23 Data=23

========================

FindHash find it Key= 834 Data=64322

========================

二，哈希链表

哈希表可以实现快速的精确查找，但无法实现排序操作，链表可以实现有序的输出，但在查找时时间较长，为了结合哈希表和链表的优点这里实现了一个哈希链表。

哈希链表的数据结构是结合了链表和哈希表的复合数据结构。

实例代码：

hash_link_table.h

#include <iostream>

using namespace std;

#ifndef __HASH_LINK_TABLE_H__
#define __HASH_LINK_TABLE_H__

typedef struct _SINGLE_NODE
{
int  nData;
_SINGLE_NODE *pHashNext;
_SINGLE_NODE *pPrev;
_SINGLE_NODE *pNext;
}SINGLE_NODE;

class MyHashTable
{
public:
MyHashTable();
~MyHashTable();

int  InsertHashTable(int nData);
int  FindHash(int nData);
void DeleteHash(int nData);
void ShowSortLink();

private:

SINGLE_NODE ** m_pBucket;
int            m_nBucketCount;
int            m_nNodeCount;
int            m_nCurrNodeID;
SINGLE_NODE  * m_pCurrentNode;
SINGLE_NODE  * m_pLinkHead;
SINGLE_NODE  * m_pLinkTail;

int calcHash(int nKey);
};

#endif

hash_link_table.cpp

#include "hash_link_table.h"

int hashInt1(int nKey, int uBucketCount)
{
return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1， 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
return nKey & uMask;
}

int hashString(char* strKey, int uBucketCount)
{
int i = 0;
int nRet = 0;
int nHashValue = 0;
char* p = strKey;

while(*p != '\0')
{
// 这里把字符串分组，并把每组转换为一个整数
// 这个转换方法可以自己定义，按照key的特征灵活选择
if (i == 5)
{
i = 0;
nRet += nHashValue;
nHashValue = 0;
}

nHashValue += nHashValue << 3;
nHashValue += (int)(*p);
p++;
i++;
}

nRet += nHashValue;
return nRet % uBucketCount;
}

int hashSqrMid234(int nKey)
{
int nRet =0;
int nHashValue = 0;
nHashValue = nKey * nKey;  // 5499025

nRet = nHashValue / 100000;   // 549
nHashValue = nHashValue /100;   // 54990

nRet = nHashValue % (nRet * 1000);
return nRet;
}

MyHashTable::MyHashTable()
{
m_nBucketCount = 1024;
m_nNodeCount = 0;
m_nCurrNodeID = 0;
m_pCurrentNode = NULL;
m_pLinkHead = NULL;
m_pLinkTail = NULL;

m_pBucket = (SINGLE_NODE**)malloc(m_nBucketCount * sizeof(SINGLE_NODE*));
memset(m_pBucket, 0, m_nBucketCount * sizeof(SINGLE_NODE*));
}

MyHashTable::~MyHashTable()
{
SINGLE_NODE *pNode = NULL;

for (int i = 0 ; i < m_nBucketCount; i++)
{
pNode = m_pBucket[i];
while (pNode != NULL)
{
m_pBucket[i] = pNode->pNext;
free(pNode);
pNode = NULL;
pNode = m_pBucket[i];

}
}
}

int MyHashTable::calcHash(int nKey)
{
return hashInt1(nKey, m_nBucketCount);
}

int MyHashTable::InsertHashTable(int nData)
{
SINGLE_NODE *pNode = NULL;
SINGLE_NODE *pNewNode;
int nHashValue;

pNewNode = (SINGLE_NODE*)malloc(sizeof(SINGLE_NODE));

nHashValue = calcHash(nData);
pNode = m_pBucket[nHashValue];

cout << "Insert Hash Key: " << nHashValue  << "  Value=" << nData << endl;

pNewNode->nData = nData;
pNewNode->pHashNext = pNode;

m_pBucket[nHashValue] = pNewNode;
m_nNodeCount += 1;

// add to link
pNewNode->pNext = NULL;
pNewNode->pPrev = NULL;
if (m_pLinkHead == NULL)
{
m_pLinkHead = pNewNode;
m_pLinkTail = pNewNode;
}
else
{
pNode = m_pLinkHead;
while ((pNode->pNext != NULL) && (pNewNode->nData > pNode->nData))
{
pNode = pNode->pNext;
}

if ((pNode->pNext == NULL) && (pNewNode->nData >  pNode->nData))
{
pNewNode->pPrev = pNode;
pNode->pNext = pNewNode;
m_pLinkTail = pNewNode;
}
else
{
pNewNode->pPrev = pNode->pPrev;
if (pNode != m_pLinkHead)
{
pNode->pPrev->pNext = pNewNode;
}
else
{
m_pLinkHead = pNewNode;
}

pNewNode->pNext = pNode;
pNode->pPrev = pNewNode;
}
}

return m_nNodeCount;
}

void MyHashTable::ShowSortLink()
{
SINGLE_NODE *pNode = m_pLinkHead;
cout << endl;
cout << endl;
cout << "Show all the node ==========================" << endl;
while(pNode != NULL)
{
cout << "key= " << pNode->nData << "   ";
pNode = pNode->pNext;
}
cout << endl;
cout << endl;
}

int MyHashTable::FindHash(int nData)
{
SINGLE_NODE *pNode;
int nHashValue;

nHashValue = calcHash(nData);
pNode = m_pBucket[nHashValue];

while(pNode != NULL)
{
if (pNode->nData == nData)
{
cout << "FindHash find it Key= " <<nHashValue << " Data=" << pNode->nData << endl;
return pNode->nData;
}
pNode = pNode->pNext;
}

return -1;
}

void MyHashTable::DeleteHash(int nData)
{
SINGLE_NODE *pNode;
int nHashValue;

nHashValue = calcHash(nData);
pNode = m_pBucket[nHashValue];

if (pNode != NULL)
{
m_pBucket[nHashValue] = pNode->pNext;
pNode->pPrev->pNext = pNode->pNext;
pNode->pNext->pPrev = pNode->pPrev;

free(pNode);
pNode = NULL;
}
}

main.cpp

#include "hash_link_table.h"

void main()
{
int testArr[10] = {123, 4332,223,8664,2239,89777,54455,32,23,64322};
int i = 0;

MyHashTable hashTable;

for (i = 0; i < 10; i++)
{
hashTable.InsertHashTable(testArr[i]);
}

cout << endl;
cout << "Finding the hash kes and values" << endl;
cout << endl;
for (i = 0; i < 10; i++)
{
if (hashTable.FindHash(testArr[i]) != -1)
{
cout << "========================" << endl;
}
}

hashTable.ShowSortLink();

hashTable.DeleteHash(testArr[3]);

cout << endl;
cout << "Test the 3 the key is deleted" << endl;
cout << endl;
for (i = 0; i < 10; i++)
{
if (hashTable.FindHash(testArr[i]) != -1)
{
cout << "========================" << endl;
}
}

hashTable.ShowSortLink();
cin >> i;
}

测试结果：

Insert Hash Key: 123 Value=123

Insert Hash Key: 236 Value=4332

Insert Hash Key: 223 Value=223

Insert Hash Key: 472 Value=8664

Insert Hash Key: 191 Value=2239

Insert Hash Key: 689 Value=89777

Insert Hash Key: 183 Value=54455

Insert Hash Key: 32 Value=32

Insert Hash Key: 23 Value=23

Insert Hash Key: 834 Value=64322

Finding the hash kes and values

FindHash find it Key= 123 Data=123

========================

FindHash find it Key= 236 Data=4332

========================

FindHash find it Key= 223 Data=223

========================

FindHash find it Key= 472 Data=8664 test deleting

========================

FindHash find it Key= 191 Data=2239

========================

FindHash find it Key= 689 Data=89777

========================

FindHash find it Key= 183 Data=54455

========================

FindHash find it Key= 32 Data=32

========================

FindHash find it Key= 23 Data=23

========================

FindHash find it Key= 834 Data=64322

========================

Show all the node ==========================

key= 23 key= 32 key= 123 key= 223 key= 2239 key= 4332 key= 8664 key= 54455 key= 64322 key= 89777

Test the 3 the key is deleted

FindHash find it Key= 123 Data=123

========================

FindHash find it Key= 236 Data=4332

========================

FindHash find it Key= 223 Data=223

========================

FindHash find it Key= 191 Data=2239

========================

FindHash find it Key= 689 Data=89777

========================

FindHash find it Key= 183 Data=54455

========================

FindHash find it Key= 32 Data=32

========================

FindHash find it Key= 23 Data=23

========================

FindHash find it Key= 834 Data=64322

========================

Show all the node ==========================

key= 23 key= 32 key= 123 key= 223 key= 2239 key= 4332 key= 54455 key= 64322 key= 89777

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航

多核计算与程序设计 - 06 基本算法和数据结构 之三 哈希表与哈希链表

一，哈希表

1） 哈希表的索引方法

2） 哈希表的冲突解决方法

多核计算与程序设计 - 06 基本算法和数据结构之三哈希表与哈希链表

1）哈希表的索引方法

2）哈希表的冲突解决方法