您的位置：首页 > 其它

字符串匹配算法比较

2010-04-10 19:35 288 查看

做了一个很粗糙的实验，比较了几种字符串匹配算法的性能。程序用-O3进行编译优化。以下为待查找的文本长度为434018字节，模式串长度为4时
的典型实验结果。可以看到，horspool算法最快，表现最差的为KMP系的shift_and算法（实验结果与《柔性字符串匹配》一书中的结果一
致）。

strstr（C库函数）　time:743　微秒

horspool: 　　time:642　微秒

shift_and: 　　time:1465　微秒

DNDM: 　　time:721　微秒

以下为horspool，shift_and和DNDM算法的实验源码：

// horspool算法：计算模式串pat在文本txt中出现的次数

int horspool(const char *txt,const char *pat)

{

short d[256];

short m = strlen(pat); /**< m is the length of pat */

// preprocessing

for(unsigned short c = 0; c < 256; c++)

d[c] = m;

for(short i = 0; i < m-1; i++){

d[(unsigned char)pat[i]] = m - i - 1;

}

// searching

const char *p = txt; /**< current pointer */

const char *t = txt + strlen(txt) - m;

int cnt = 0; /**< the exist times of pat in txt */

int jj = m-1;

while(p <= t){

int j = jj;

while(j >= 0 && pat[j] == p[j])

j--;

if(j == -1)

cnt++;

p += d[(unsigned char)p[m-1]];

}

return cnt;

}

// Shift_And算法：计算模式串pat在文本txt中出现的次数

int shift_and(const char *txt, const char *pat)

{

long b[256];

int m = strlen(pat);

for(int i = 0; i < 256; i++)

b[i] = 0;

for(int i = 0; i < m; i++)

b[(unsigned char)pat[i]] |= (0x1 << i);

int cnt = 0;

long d = 0;

const char *s = txt;

const char *end = txt + strlen(txt);

long mask = 0x1<<m-1;

while(s < end){

d = ((d<<1) | 0x1) & b[(unsigned char)*s];

if(d & mask)

cnt ++;

s++;

}

return cnt;

}

// BNDM算法：计算模式串pat在文本txt中出现的次数

int BNDM(const char *txt, const char *pat)

{

long b[256];

int m = strlen(pat);

for(int i = 0; i < 256; i++)

b[i] = 0;

for(int i = 0; i < m; i++)

b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));

const char *limit = txt + strlen(txt) - m;

const char *s = txt;

int cnt = 0;

long mask = 0x1 << (m-1);

while(s <= limit){

int j = m-1;

int last = m-1;

long d = -1;

while(d != 0){

d &= b[(unsigned char)s[j]];

j--;

if(d & mask){

if(j >= 0)

last = j;

else

cnt++;

}

d <<= 1;

}

s += last+1;

}

return cnt;

}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航