字符串匹配算法之总结
2014-04-25 15:59
369 查看
问题提出:
给定字符串text, pattern,确定是否pattern 是text的子串,若是,请返回最先匹配的位置。问题解决:
1.暴力搜索:2.KMP 算法:涉及到求pattern的后缀数组,以保证每次的有效偏移,加快搜索;
3.Boyer-Moore算法:详述请参见维基百科;
4.Karp-Rabin 算法:详述请参见维基百科;
实现代码:
#ifndef _PATTERN_SEARCH_H_#define _PATTERN_SEARCH_H_
#include <assert.h>
/*
* The implementation of brute force search
*
*/
int PatternMatchBrute( const char* text, const char* pattern )
{
assert( text );
assert( pattern );
size_t textLen = strlen( text );
size_t patternLen = strlen( pattern );
for( size_t i = 0; i <= textLen - patternLen; i++ )
{
size_t start = i;
size_t j = 0;
for( ; j < patternLen; j++ )
{
if( text[start] == pattern[j] )
{
start++;
}
else
{
break;
}
}
if( j == patternLen )
{
return i;
}
}
return -1;
}
/*
* Helper function it can be used to calculate suffix array which associated with KMP algorithm
*
*/
int* CalcSuffix( const char* pattern, size_t len )
{
int* suffix = new int[ len + 1 ];
memset( suffix, 0x00, sizeof(int)*( len + 1) );
int j = 0;
for( int i = 1; i < len; i++ )
{
while( j > 0 && pattern[i] != pattern[j] )
{
j = suffix[j - 1];
}
if( pattern[i] == pattern[j] )
{
suffix[i] = ++j;
}
}
return suffix;
}
/*
* The implementation of KMP algorithm
*
*/
int PatternMatchKMP( const char* text, const char* pattern )
{
assert( text );
assert( pattern );
size_t textLen = strlen( text );
size_t patternLen = strlen( pattern );
int* suffix = CalcSuffix( pattern, patternLen );
for( int i = 0; i <= textLen - patternLen; )
{
int start = i;
int j = 0;
for( j = 0; j < patternLen; j++ )
{
if( text[i] == pattern[j] )
{
i++;
}
else
{
i += j - suffix[j];
break;
}
}
if( j == patternLen )
{
delete [] suffix;
return i - patternLen;
}
if( start == i )
{
i++;
}
}
delete [] suffix;
return -1;
}
/*
* The implementation of BoyerMoore algorithm
*
*/
int PatternMatchBME( const char* text, const char* pattern )
{
int base = 256;
int* right = new int[base];
memset( right, -1, sizeof(int) * base );
int len = strlen( pattern );
for( int i = 0; i < len; i++ )
{
right[pattern[i]] = i;
}
int skip = -1;
int strLen = strlen( text );
for( int i = 0; i <= strLen - len; i += skip )
{
skip = 0;
for( int j = len - 1; j >= 0; j-- )
{
if( text[i + j] != pattern[j] )
{
skip = j - right[text[i + j]]; // key point
if( skip < 1 )
{
skip = 1;
}
break;
}
}
if( 0 == skip )
{
delete [] right;
return i;
}
}
delete [] right;
return -1;
}
/*
* Helper function
*
*/
int hashValue( int len, int base, int prime )
{
int res = 1;
for( int i = 0; i < len - 1; i++ )
{
res = ( res * base )%prime;
}
return res;
}
/*
* The implementation of Rabin-Karp algorithm
*
*/
int PatternMatchKRB( const char* str, const char* pat )
{
int base = 256;
int prime = 101;
int len = strlen( pat );
int h = hashValue( len, base, prime );
int hashStr = 0;
int hashPat = 0;
for( int i = 0; i < len; i++ )
{
hashStr = ( hashStr * base + str[i] ) % prime;
hashPat = ( hashPat * base + pat[i] ) % prime;
}
int strLen = strlen( str );
for( int i = 0; i <= strLen - len; i++ )
{
if( hashStr == hashPat )
{
int j = 0;
for( ; j < len; j++ )
{
if( str[i + j] != pat[j] )
break;
}
if( j == len )
{
return i;
}
}
if( i < strLen - len )
{
hashStr = ( base * ( hashStr - str[i] * h ) + str[i + len]) % prime;
if( hashStr < 0 )
{
hashStr += prime;
}
}
}
return -1;
}
/*
* Test interface
*
*/
void TestPatternSearch()
{
const char* text = "acccwdocccwwhowccsiowiowwwccwweioewchcccandccswwveaoiewddddiweoicccchacccwwsfchadchanddsoisndochandischurcccchandchinawitnessbrchandeakoutmiters";
const char* pattern = "cccwws";
const char* substr = strstr( text, pattern );
int pos = substr - text;
int newPos = PatternMatchBrute( text, pattern );
int kmpPos = PatternMatchKMP( text, pattern );
int boyerPos = PatternMatchBME( text, pattern );
int krbPos = PatternMatchKRB( text, pattern );
getchar();
}
#endif
相关文章推荐
- Javascript SHA-1:Secure Hash Algorithm
- [转]可视化的数据结构和算法
- 统计文件中不小于某一长度的单词的个数(泛型算法实现)
- 使用他人的MD5编码类,修改形成密码串
- Extracting Structured Data from Web Pages
- (译)Cocos2d_for_iPhone_1_Game_Development_Cookbook:1.13使用CCTexture2DMutable调换调色盘
- Java中3DES加密
- Refactoring Notes-Refactoring Methods(3)
- 图书馆管理程序~~不过貌似功能!!有空再修修
- trainging contest#2(2011成都现场赛)I BY Hyoga
- C/C++头文件包含内容概览
- 堆栈的应用(1) 平衡符号 C++实现
- 程序员编程艺术第一章、左旋转字符串
- 程序员编程艺术:第三章续、Top K算法问题的实现
- 程序员编程艺术:第四章、现场编写类似strstr/strcpy/strpbrk的函数
- 十四、第三章再续:快速选择SELECT算法的深入分析与实现
- 程序员编程艺术:第七章、求连续子数组的最大和
- 程序员编程艺术:第八章、从头至尾漫谈虚函数
- 程序员编程艺术:第九章、闲话链表追赶问题
- 程序员编程艺术:第十章、如何给10^7个数据量的磁盘文件排序