字符串匹配算法比较
2010-04-10 19:35
288 查看
做了一个很粗糙的实验,比较了几种字符串匹配算法的性能。程序用-O3进行编译优化。以下为待查找的文本长度为434018字节,模式串长度为4时
的典型实验结果。可以看到,horspool算法最快,表现最差的为KMP系的shift_and算法(实验结果与《柔性字符串匹配》一书中的结果一
致)。
strstr(C库函数) time:743 微秒
horspool: time:642 微秒
shift_and: time:1465 微秒
DNDM: time:721 微秒
以下为horspool,shift_and和DNDM算法的实验源码:
// horspool算法:计算模式串pat在文本txt中出现的次数
int horspool(const char *txt,const char *pat)
{
short d[256];
short m = strlen(pat); /**< m is the length of pat */
// preprocessing
for(unsigned short c = 0; c < 256; c++)
d[c] = m;
for(short i = 0; i < m-1; i++){
d[(unsigned char)pat[i]] = m - i - 1;
}
// searching
const char *p = txt; /**< current pointer */
const char *t = txt + strlen(txt) - m;
int cnt = 0; /**< the exist times of pat in txt */
int jj = m-1;
while(p <= t){
int j = jj;
while(j >= 0 && pat[j] == p[j])
j--;
if(j == -1)
cnt++;
p += d[(unsigned char)p[m-1]];
}
return cnt;
}
// Shift_And算法:计算模式串pat在文本txt中出现的次数
int shift_and(const char *txt, const char *pat)
{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << i);
int cnt = 0;
long d = 0;
const char *s = txt;
const char *end = txt + strlen(txt);
long mask = 0x1<<m-1;
while(s < end){
d = ((d<<1) | 0x1) & b[(unsigned char)*s];
if(d & mask)
cnt ++;
s++;
}
return cnt;
}
// BNDM算法:计算模式串pat在文本txt中出现的次数
int BNDM(const char *txt, const char *pat)
{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));
const char *limit = txt + strlen(txt) - m;
const char *s = txt;
int cnt = 0;
long mask = 0x1 << (m-1);
while(s <= limit){
int j = m-1;
int last = m-1;
long d = -1;
while(d != 0){
d &= b[(unsigned char)s[j]];
j--;
if(d & mask){
if(j >= 0)
last = j;
else
cnt++;
}
d <<= 1;
}
s += last+1;
}
return cnt;
}
的典型实验结果。可以看到,horspool算法最快,表现最差的为KMP系的shift_and算法(实验结果与《柔性字符串匹配》一书中的结果一
致)。
strstr(C库函数) time:743 微秒
horspool: time:642 微秒
shift_and: time:1465 微秒
DNDM: time:721 微秒
以下为horspool,shift_and和DNDM算法的实验源码:
// horspool算法:计算模式串pat在文本txt中出现的次数
int horspool(const char *txt,const char *pat)
{
short d[256];
short m = strlen(pat); /**< m is the length of pat */
// preprocessing
for(unsigned short c = 0; c < 256; c++)
d[c] = m;
for(short i = 0; i < m-1; i++){
d[(unsigned char)pat[i]] = m - i - 1;
}
// searching
const char *p = txt; /**< current pointer */
const char *t = txt + strlen(txt) - m;
int cnt = 0; /**< the exist times of pat in txt */
int jj = m-1;
while(p <= t){
int j = jj;
while(j >= 0 && pat[j] == p[j])
j--;
if(j == -1)
cnt++;
p += d[(unsigned char)p[m-1]];
}
return cnt;
}
// Shift_And算法:计算模式串pat在文本txt中出现的次数
int shift_and(const char *txt, const char *pat)
{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << i);
int cnt = 0;
long d = 0;
const char *s = txt;
const char *end = txt + strlen(txt);
long mask = 0x1<<m-1;
while(s < end){
d = ((d<<1) | 0x1) & b[(unsigned char)*s];
if(d & mask)
cnt ++;
s++;
}
return cnt;
}
// BNDM算法:计算模式串pat在文本txt中出现的次数
int BNDM(const char *txt, const char *pat)
{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));
const char *limit = txt + strlen(txt) - m;
const char *s = txt;
int cnt = 0;
long mask = 0x1 << (m-1);
while(s <= limit){
int j = m-1;
int last = m-1;
long d = -1;
while(d != 0){
d &= b[(unsigned char)s[j]];
j--;
if(d & mask){
if(j >= 0)
last = j;
else
cnt++;
}
d <<= 1;
}
s += last+1;
}
return cnt;
}
相关文章推荐
- 各种字符串匹配算法代码 速度比较
- BF,KMP,BM三种字符串匹配算法性能比较
- 字符串匹配算法比较 http://blog.csdn.net/airfer/article/details/8951802/
- 字符串匹配算法比较
- 字符串匹配算法比较
- [转]字符串匹配算法比较
- 字符串匹配算法:KMP算法与BM算法比较
- 实现一个比较高级的字符匹配算法,即一串很长的字符,要求找到符合要求字符的字符串
- KMP字符串匹配算法模板代码
- 字符串匹配算法之Aho-Corasick
- 字符串匹配算法总结
- 字符串匹配算法之KMP
- sunday 字符串匹配算法的实现(支持二进制匹配)
- 零零散散学算法之再叙字符串匹配
- 字符串匹配自动机的算法原理
- LeetCode Implement strStr()(朴素的字符串匹配,RK算法,KMP算法)
- 算法——字符串匹配之Rabin-Karp算法
- 字符串匹配算法总结
- 一个自己研究出来的字符串匹配算法-k子串算法
- 字符串匹配算法之KMP算法和BM算法