您的位置:首页 > 编程语言 > C语言/C++

基于KMP算法的C++字符串帮助类

2017-07-08 11:20 501 查看
利用C++实现的字符串基本查找替换算法类.基于KMP算法. 7月8日第一版,效率不满意,7月9日进行持续优化,提高算法效率,测试场景下大大超过C++标准库中的方法的效率.

7月9日:

对7月8日的程序做了优化,优化的点集中于:

1. KMP算法中需要的array根据pattern来生成,则在pattern不变的情况下,反复查询该pattern的情况不应该每次再重新生成pattern. (实际应用场景: 类似在word文件中查找某个关键字出现的次数).

2. C++标准库中的string类的某些操作效率较低,不适合在大循环(千万次循环级别)中使用.可以将string先转换成const char*,之后利用指针对const char*操作.

3. 补充了replace, replaceall方法.并利用直接操作memory的方法(memset, memcpy)来实现.

可以看到,在我的测试用例下面,基于KMP的算法的字符串查找的效率比C++自带的方法提升了7倍左右.



优化后的程序:

#ifndef _STRING_HELPER_H_
#define _STRING_HELPER_H_
#include <iostream>
using namespace std;

class StringHelper
{
public:
StringHelper(const string searchSpaceStr, const string patternStr);
~StringHelper();
bool find();
bool find(unsigned int& count);
bool find(unsigned int& count, unsigned int &firstStartLocation);
bool replace(const string replaceToStr, string& replaceRs);
bool replaceAll(const string replaceToStr, string& replaceRs);
private:
bool findFirst(unsigned int &firstStartLocation);
bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr);
bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation);
void getNext(const char *pStr, int *nextArr);
int *nextArray;
char* m_SearchSpaceChArray;
char* m_PatternChArray;
};

#endif // !_STRING_HELPER_H_


#include "StringHelper.h"

StringHelper::StringHelper(const string searchSpaceStr, const string patternStr)
{
nextArray = NULL;
m_SearchSpaceChArray = NULL;
m_PatternChArray = NULL;
m_SearchSpaceChArray = new char[searchSpaceStr.length() + 1];
memset(m_SearchSpaceChArray, 0, sizeof(char) * (searchSpaceStr.length() + 1));
memcpy(m_SearchSpaceChArray, searchSpaceStr.c_str(), sizeof(char) * searchSpaceStr.length());
m_PatternChArray = new char[patternStr.length() + 1];
memset(m_PatternChArray, 0, sizeof(char) * (patternStr.length() + 1));
memcpy(m_PatternChArray, patternStr.c_str(), sizeof(char) * patternStr.length());
if (NULL == nextArray)
{
nextArray = new int[searchSpaceStr.length()];
getNext(m_PatternChArray, nextArray);
}
else
{
delete[] nextArray;
nextArray = NULL;
nextArray = new int[searchSpaceStr.length()];
getNext(m_PatternChArray, nextArray);
}
}

StringHelper::~StringHelper()
{
if (NULL != nextArray)
{
delete[] nextArray;
nextArray = NULL;
}
if (NULL != m_SearchSpaceChArray)
{
delete[] m_SearchSpaceChArray;
m_SearchSpaceChArray = NULL;
}
if (NULL != m_PatternChArray)
{
delete[] m_PatternChArray;
m_PatternChArray = NULL;
}
}

bool StringHelper::find()
{
bool result = false;
result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray);
return result;
}

bool StringHelper::find(unsigned int& count)
{
count = 0;
bool result = false;
unsigned int startLocation = 0;
unsigned int stopLocation = 0;
while (startLocation < strlen(m_SearchSpaceChArray))
{
result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation);
if (result)
{
count++;
}
else
{
break;
}
}
if (0 != count)
{
result = true;
}
return result;
}

bool StringHelper::find(unsigned int& count, unsigned int &firstStartLocation)
{
unsigne
4000
d int startLocation = 0;
count = 0;
firstStartLocation = 0;
bool result = false;
unsigned int stopLocation = 0;
while (startLocation < strlen(m_SearchSpaceChArray))
{
result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation);
if (result)
{
count++;
if (1 == count)
{
firstStartLocation = startLocation;
}
}
else
{
break;
}
}
if (0 != count)
{
result = true;
}
return result;
}

bool StringHelper::replace(const string replaceToStr, string& replaceRs)
{
unsigned int count = 0;
unsigned int startLocation = 0;
replaceRs.clear();
bool rs = findFirst(startLocation);
if (rs)
{
int len = strlen(m_SearchSpaceChArray) - strlen(m_PatternChArray) + strlen(replaceToStr.c_str()) + 1;
char *replaceRsChArray = new char[len];
memset(replaceRsChArray, 0, sizeof(char) * len);
memcpy(replaceRsChArray, m_SearchSpaceChArray, sizeof(char) * startLocation);
memcpy(&replaceRsChArray[startLocation], replaceToStr.c_str(), sizeof(char) * replaceToStr.length());
unsigned int stopLocation = startLocation + strlen(m_PatternChArray);
if (stopLocation < strlen(m_SearchSpaceChArray))
{
memcpy(&replaceRsChArray[startLocation + strlen(replaceToStr.c_str())], &m_SearchSpaceChArray[stopLocation], sizeof(char) * (strlen(m_SearchSpaceChArray) - stopLocation));
}
replaceRs.append(replaceRsChArray);
}

return rs;
}

bool StringHelper::replaceAll(const string replaceToStr, string& replaceRs)
{
bool result = false;
unsigned int startLocation = 0;
unsigned int stopLocation = 0;
char* tempChArray = NULL;
char* swapTempChArray = NULL;
int count = 0;
replaceRs.clear();
while (startLocation < strlen(m_SearchSpaceChArray))
{
int lastStopPos = stopLocation;
result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation);
if (result)
{
count++;
int len = 0;
if (NULL == tempChArray)
{
len = startLocation + replaceToStr.length() + 1;
tempChArray = new char[len];
swapTempChArray = new char[len];
if (NULL == tempChArray || NULL == swapTempChArray)
{
return false;
}
memset(tempChArray, 0, sizeof(char) * len);
memset(swapTempChArray, 0, sizeof(char) * len);
memcpy(tempChArray, m_SearchSpaceChArray, sizeof(char) * startLocation);
memcpy(&tempChArray[startLocation], replaceToStr.c_str(), sizeof(char) * replaceToStr.length());
memcpy(swapTempChArray, tempChArray, sizeof(char) * strlen(tempChArray));
}
else
{
int firstPartLen = strlen(tempChArray);
len = strlen(tempChArray) + startLocation - lastStopPos + replaceToStr.length() + 1;
delete[] tempChArray;
tempChArray = NULL;
tempChArray = new char[len];
memset(tempChArray, 0, sizeof(char) * len);
memcpy(tempChArray, swapTempChArray, sizeof(char) * strlen(swapTempChArray));
memcpy(&tempChArray[strlen(tempChArray)], &m_SearchSpaceChArray[lastStopPos], sizeof(char) * (startLocation - lastStopPos));
int lenlen = strlen(tempChArray);
memcpy(&tempChArray[strlen(tempChArray)], replaceToStr.c_str(), sizeof(char) * replaceToStr.length());
delete[] swapTempChArray;
swapTempChArray = NULL;
swapTempChArray = new char[len];
memset(swapTempChArray, 0, len);
memcpy(swapTempChArray, tempChArray, sizeof(char) * strlen(tempChArray));
}
}
else
{
replaceRs.append(tempChArray);
replaceRs.append(&m_SearchSpaceChArray[lastStopPos]);
if (NULL != swapTempChArray)
{
delete[] swapTempChArray;
swapTempChArray = NULL;
}
if (NULL != tempChArray)
{
delete[] tempChArray;
tempChArray = NULL;
}
break;
}
}
return result;
return true;
}

bool StringHelper::findFirst(unsigned int & firstStartLocation)
{
bool result = false;
firstStartLocation = 0;
unsigned int stopLocation = 0;
result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, firstStartLocation, stopLocation);
return result;
}

bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr)
{
if (NULL == nextArray)
{
return false;
}

int i = 0;
int j = 0;

int searchSpaceLength = strlen(pSearchSpaceStr);
int patternStrLength = strlen(pPatternStr);

while (i < searchSpaceLength && j < patternStrLength)
{
if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
{
i++;
j++;
}
else
{
j = nextArray[j];
}
}

if (j == patternStrLength)
{
return true;
}
return false;
}

bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation)
{
if (NULL == nextArray)
{
return false;
}

int i = stopLocation;
int j = 0;

int searchSpaceLength = strlen(pSearchSpaceStr);
int patternStrLength = strlen(pPatternStr);

while (i < searchSpaceLength && j < patternStrLength)
{
if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
{
i++;
j++;
}
else
{
j = nextArray[j];
}
}

if (j == patternStrLength)
{
startLocation = i - j;  //start pos
stopLocation = i;
return true;
}
return false;
}

//nextArr means while current position compare failed, the compare should start at which position
void StringHelper::getNext(const char *pStr, int *nextArr)
{
int i = 0, k = -1, pLen = strlen(pStr);
nextArr[i] = k;
int mLen = pLen - 1;
while (i < mLen)
{
if (k == -1 || pStr[i] == pStr[k])
{
i++;
k++;
if (pStr[i] == pStr[k])
{
nextArr[i] = nextArr[k];
}
else
{
nextArr[i] = k;
}
}
else
k = nextArr[k];
}
}


优化前的代码:

#ifndef _STRING_HELPER_H_
#define _STRING_HELPER_H_
#include <iostream>
using namespace std;

class StringHelper
{
public:
StringHelper();
~StringHelper();
bool find(const string searchSpaceStr, const string patternStr);
bool find(const string searchSpaceStr, const string patternStr, unsigned int& count);
bool find(const string searchSpaceStr, const string patternStr, unsigned int& count, unsigne
fa5b
d int &firstStartLocation);
bool replace(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs);
bool replaceAll(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs);
private:
bool findInit(const string searchSpaceStr, const string patternStr, char **pSearchSpaceChArray, char **pPatternChArray);
bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr);
bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation);
void getNext(const char *pStr, int *nextArr);
int *nextArray;
};

#endif // !_STRING_HELPER_H_


#include "StringHelper.h"

StringHelper::StringHelper()
{
nextArray = NULL;
}

StringHelper::~StringHelper()
{
if (NULL != nextArray)
{
delete[] nextArray;
nextArray = NULL;
}
}

bool StringHelper::find(const string searchSpaceStr, const string patternStr)
{
bool result = false;
char **pSearchSpaceChArray = new char*;
char **pPatternChArray = new char*;
result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray);
if (result)
result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray);

if (*pSearchSpaceChArray != NULL)
{
delete[] *pSearchSpaceChArray;
*pSearchSpaceChArray = NULL;
if (pSearchSpaceChArray != NULL)
{
delete pSearchSpaceChArray;
pSearchSpaceChArray = NULL;
}
}
if (*pPatternChArray != NULL)
{
delete[] * pPatternChArray;
*pPatternChArray = NULL;
if (pPatternChArray != NULL)
{
delete pPatternChArray;
pPatternChArray = NULL;
}
}
return result;
}

bool StringHelper::find(const string searchSpaceStr, const string patternStr, unsigned int& count)
{
count = 0;
bool result = false;
char **pSearchSpaceChArray = new char*;
char **pPatternChArray = new char*;
result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray);
if (result)
{
unsigned int startLocation = 0;
unsigned int stopLocation = 0;
while (startLocation < strlen(*pSearchSpaceChArray))
{
result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation);
if (result)
{
count++;
}
else
{
break;
}
}
}
if (0 != count)
{
result = true;
}
return result;
}

bool StringHelper::find(const string searchSpaceStr, const string patternStr, unsigned int& count, unsigned int &firstStartLocation)
{
unsigned int startLocation = 0;
count = 0;
firstStartLocation = 0;
bool result = false;
char **pSearchSpaceChArray = new char*;
char **pPatternChArray = new char*;
result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray);
if (result)
{
unsigned int startLocation = 0;
unsigned int stopLocation = 0;
while (startLocation < strlen(*pSearchSpaceChArray))
{
result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation);
if (result)
{
count++;
if (1 == count)
{
firstStartLocation = startLocation;
}
}
else
{
break;
}
}
}
if (0 != count)
{
result = true;
}
return result;
}

bool StringHelper::replace(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs)
{
unsigned int count = 0;
unsigned int startLocation = 0;
replaceRs.clear();
bool rs = find(searchSpaceStr, needReplaceStr, count, startLocation);
if (rs)
{
replaceRs.append(searchSpaceStr.substr(0, startLocation));
replaceRs.append(replaceToStr);
unsigned int stopLocation = startLocation + strlen(needReplaceStr.c_str());
if (stopLocation < searchSpaceStr.length())
{
replaceRs.append(searchSpaceStr.substr(stopLocation, searchSpaceStr.length()-stopLocation));
}
}
return rs;
}

bool StringHelper::replaceAll(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs)
{
replaceRs.clear();
int count = 0;
int lastTimeStopLocation = 0;
bool result = false;
char **pSearchSpaceChArray = new char*;
char **pPatternChArray = new char*;
result = findInit(searchSpaceStr, needReplaceStr, pSearchSpaceChArray, pPatternChArray);
if (result)
{
unsigned int startLocation = 0;
unsigned int stopLocation = 0;
while (startLocation < strlen(*pSearchSpaceChArray))
{
result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation);
if (result)
{
if (lastTimeStopLocation != startLocation)
{
replaceRs.append(searchSpaceStr.substr(lastTimeStopLocation, startLocation - lastTimeStopLocation));
}
replaceRs.append(replaceToStr);
lastTimeStopLocation = stopLocation;
count++;
}
else
{
if (stopLocation < searchSpaceStr.length())
{
replaceRs.append(searchSpaceStr.substr(stopLocation, searchSpaceStr.length() - stopLocation));
}
break;
}
}
}
if (0 != count)
{
result = true;
}
return result;
}

bool StringHelper::findInit(const string searchSpaceStr, const string patternStr,
char **pSearchSpaceChArray, char **pPatternChArray)
{
bool result = false;
*pSearchSpaceChArray = new char[strlen(searchSpaceStr.c_str())+1];
*pPatternChArray = new char[strlen(patternStr.c_str())+1];
strcpy(*pSearchSpaceChArray, searchSpaceStr.c_str());
strcpy(*pPatternChArray, patternStr.c_str());
if (0 == strlen(*pSearchSpaceChArray) || 0 == strlen(*pPatternChArray))
{
result = false;
}
else
{
result = true;
if (NULL == nextArray)
{
nextArray = new int[strlen(*pPatternChArray)];
getNext(*pPatternChArray, nextArray);
}
else
{
delete[] nextArray;
nextArray = NULL;
nextArray = new int[strlen(*pPatternChArray)];
getNext(*pPatternChArray, nextArray);
}
}

return result;
}

bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr)
{
if (NULL == nextArray)
{
return false;
}

int i = 0;
int j = 0;

int searchSpaceLength = strlen(pSearchSpaceStr);
int patternStrLength = strlen(pPatternStr);

while (i < searchSpaceLength && j < patternStrLength)
{
if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
{
i++;
j++;
}
else
{
j = nextArray[j];
}
}

if (j == patternStrLength)
{
return true;
}
return false;
}

bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation)
{
if (NULL == nextArray)
{
return false;
}

int i = stopLocation;
int j = 0;

int searchSpaceLength = strlen(pSearchSpaceStr);
int patternStrLength = strlen(pPatternStr);

while (i < searchSpaceLength && j < patternStrLength)
{
if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
{
i++;
j++;
}
else
{
j = nextArray[j];
}
}

if (j == patternStrLength)
{
startLocation = i - j;  //start pos
stopLocation = i;
return true;
}
return false;
}

//nextArr means while current position compare failed, the compare should start at which position
void StringHelper::getNext(const char *pStr, int *nextArr)
{
int i = 0, k = -1, pLen = strlen(pStr);
nextArr[i] = k;
int mLen = pLen - 1;
while (i < mLen)
{
if (k == -1 || pStr[i] == pStr[k])
{
i++;
k++;
if (pStr[i] == pStr[k])
{
nextArr[i] = nextArr[k];
}
else
{
nextArr[i] = k;
}
}
else
k = nextArr[k];
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: