您的位置:首页 > 编程语言 > Go语言

字符串匹配算法之总结

2014-04-25 15:59 369 查看

问题提出:

给定字符串text, pattern,确定是否pattern 是text的子串,若是,请返回最先匹配的位置。

问题解决:

       1.暴力搜索:

       2.KMP 算法:涉及到求pattern的后缀数组,以保证每次的有效偏移,加快搜索;

       3.Boyer-Moore算法:详述请参见维基百科;

       4.Karp-Rabin 算法:详述请参见维基百科;

实现代码:

#ifndef _PATTERN_SEARCH_H_
#define _PATTERN_SEARCH_H_

#include <assert.h>

/*
* The implementation of brute force search
*
*/
int PatternMatchBrute( const char* text, const char* pattern )
{
assert( text );
assert( pattern );

size_t textLen = strlen( text );
size_t patternLen = strlen( pattern );

for( size_t i = 0; i <= textLen - patternLen; i++ )
{
size_t start = i;
size_t j = 0;
for( ; j < patternLen; j++ )
{
if( text[start] == pattern[j] )
{
start++;
}
else
{
break;
}
}

if( j == patternLen )
{
return i;
}
}

return -1;
}

/*
* Helper function it can be used to calculate suffix array which associated with KMP algorithm
*
*/
int* CalcSuffix( const char* pattern, size_t len )
{
int* suffix = new int[ len + 1 ];
memset( suffix, 0x00, sizeof(int)*( len + 1) );

int j = 0;
for( int i = 1; i < len; i++ )
{
while( j > 0 && pattern[i] != pattern[j] )
{
j = suffix[j - 1];
}

if( pattern[i] == pattern[j] )
{
suffix[i] = ++j;
}

}

return suffix;
}

/*
* The implementation of KMP algorithm
*
*/
int PatternMatchKMP( const char* text, const char* pattern )
{
assert( text );
assert( pattern );

size_t textLen = strlen( text );
size_t patternLen = strlen( pattern );
int* suffix = CalcSuffix( pattern, patternLen );

for( int i = 0; i <= textLen - patternLen; )
{
int start = i;
int j = 0;
for( j = 0; j < patternLen; j++ )
{
if( text[i] == pattern[j] )
{
i++;
}
else
{
i += j - suffix[j];
break;
}
}

if( j == patternLen )
{
delete [] suffix;
return i - patternLen;
}

if( start == i )
{
i++;
}

}

delete [] suffix;
return -1;
}

/*
* The implementation of BoyerMoore algorithm
*
*/
int PatternMatchBME( const char* text, const char* pattern )
{
int base = 256;
int* right = new int[base];
memset( right, -1, sizeof(int) * base );

int len = strlen( pattern );
for( int i = 0; i < len; i++ )
{
right[pattern[i]] = i;
}

int skip = -1;
int strLen = strlen( text );
for( int i = 0; i <= strLen - len; i += skip )
{
skip = 0;
for( int j = len - 1; j >= 0; j-- )
{
if( text[i + j] != pattern[j] )
{
skip = j - right[text[i + j]]; // key point
if( skip < 1 )
{
skip = 1;
}

break;
}
}

if( 0 == skip )
{
delete [] right;
return i;
}
}

delete [] right;

return -1;

}

/*
* Helper function
*
*/
int hashValue( int len, int base, int prime )
{
int res = 1;
for( int i = 0; i < len - 1; i++ )
{
res = ( res * base )%prime;
}

return res;
}

/*
* The implementation of Rabin-Karp algorithm
*
*/
int PatternMatchKRB( const char* str, const char* pat )
{
int base = 256;
int prime = 101;

int len = strlen( pat );
int h = hashValue( len, base, prime );

int hashStr = 0;
int hashPat = 0;
for( int i = 0; i < len; i++ )
{
hashStr = ( hashStr * base + str[i] ) % prime;
hashPat = ( hashPat * base + pat[i] ) % prime;
}

int strLen = strlen( str );
for( int i = 0; i <= strLen - len; i++ )
{
if( hashStr == hashPat )
{
int j = 0;
for( ; j < len; j++ )
{
if( str[i + j] != pat[j] )
break;
}

if( j == len )
{
return i;
}
}

if( i < strLen - len )
{
hashStr = ( base * ( hashStr - str[i] * h ) + str[i + len]) % prime;
if( hashStr < 0 )
{
hashStr += prime;
}
}
}

return -1;
}

/*
* Test interface
*
*/
void TestPatternSearch()
{
const char* text = "acccwdocccwwhowccsiowiowwwccwweioewchcccandccswwveaoiewddddiweoicccchacccwwsfchadchanddsoisndochandischurcccchandchinawitnessbrchandeakoutmiters";
const char* pattern = "cccwws";

const char* substr = strstr( text, pattern );
int pos = substr - text;

int newPos = PatternMatchBrute( text, pattern );

int kmpPos = PatternMatchKMP( text, pattern );

int boyerPos = PatternMatchBME( text, pattern );

int krbPos = PatternMatchKRB( text, pattern );

getchar();
}

#endif
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  algorithm