您的位置：首页 > 其它

字符串hash函数

2013-10-09 19:04 155 查看

本文搜集了一些字符串的常用hash函数。

范例1：判断两个单词是否含有相同的字母，此时我们可以用hash做。例如，“aaabb”与"aabb"含有相同的单词。（参考：http://kmplayer.iteye.com/blog/656782）

#include <iostream>
using namespace std;

int hash(const char* a)  //hash函数设计到位，节约了空间，当然我们也可以用bitset
{
int tmp=0;
while(*a)
{
tmp|=1<<(*a-'a');
a++;
}
return tmp;
}

int main()
{
char* a="abc";
char* b="acb";
cout<<hash(a)<<endl;
cout<<hash(b)<<endl;
return 0;
}

范例2：判断兄弟单词，兄弟单词定义为两个单词含有的所有字母相同，但是位置不同。例如“aaccdd”和“acdacd”。何海涛在《剑指offer》上的解法，定义bitcnt[26]，用一个字符串加计数，另一个字符串减计数。

我们这里为了素数的hash构造方法。特意用素数来hash，例如，a=2，b=3，c=5，然后整个单词的hash就是其乘积。

#include <iostream>
#include <cctype>
using namespace std;

const int MAX = 200;
int prime[MAX] = {2,3,5};

//产生小于num的所有素数,返回值为产生素数的个数
int GeneratePrime(int num)
{
int curPossibleNum = 5;
int gap = 2;
int count = 3;

while(curPossibleNum <= num){
curPossibleNum += gap;
bool flag = true;

for(int j=0; prime[j]*prime[j]<=curPossibleNum; j++){
if(curPossibleNum % prime[j] == 0)
flag = false;
}

if(flag == true)
prime[count++] = curPossibleNum;

gap = 6 - gap;
}

return count;
}

long long Hash(char str[]){ //在这里我们简单的将大小写统一
long long hashValue = 1;
while(*str != '\0'){
hashValue = hashValue * prime[tolower(*str) - 'a'];
++str;
}
return hashValue;
}

int main()
{
char* str="abdc";
GeneratePrime(26);
cout<<Hash(str)<<endl;
system("pause");
return 0;
}

范例3：统计单词的个数，C++ Primer中采用map的方法。本文采用介绍采用hash的方法。（参考：http://kmplayer.iteye.com/blog/647471）

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define WORDLENGTH 30
#define NHASH 300

typedef struct node* nodeptr;
typedef struct node
{
char* word;
int cnt;
nodeptr next;
} node;

int hash(char* buf)  //其实，我没懂这里为什么是31
{
unsigned n=0;
char* p;
for(p=buf;*p;p++)
n=31*n+(*p);
return n%NHASH;
}

nodeptr hashTable[NHASH];

//链表法,解决hash的冲突.
void incword(char* buf)
{
int n=hash(buf);
nodeptr p;
for(p=hashTable
;p;p=p->next)
{
if(strcmp(p->word,buf)==0)
{
p->cnt++;
return;
}
}
p=(nodeptr)malloc(sizeof(node));
p->word=(char*)malloc(strlen(buf)+1);
strcpy(p->word,buf);
p->cnt=1;
p->next=hashTable
;
hashTable
=p;
}

int main ()
{
freopen("genetic.txt","r",stdin);
char buf[WORDLENGTH];
int i;
while( scanf("%s",buf)!=EOF )
incword(buf);
for(i=0;i<NHASH;i++)
{
nodeptr p=hashTable[i];
for(;p;p=p->next)
printf("%s %d\n",p->word,p->cnt);
}
return 0 ;
}

有好的字符串hash，欢迎告之。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航