基于KMP算法的C++字符串帮助类
2017-07-08 11:20
501 查看
利用C++实现的字符串基本查找替换算法类.基于KMP算法. 7月8日第一版,效率不满意,7月9日进行持续优化,提高算法效率,测试场景下大大超过C++标准库中的方法的效率.
7月9日:
对7月8日的程序做了优化,优化的点集中于:
1. KMP算法中需要的array根据pattern来生成,则在pattern不变的情况下,反复查询该pattern的情况不应该每次再重新生成pattern. (实际应用场景: 类似在word文件中查找某个关键字出现的次数).
2. C++标准库中的string类的某些操作效率较低,不适合在大循环(千万次循环级别)中使用.可以将string先转换成const char*,之后利用指针对const char*操作.
3. 补充了replace, replaceall方法.并利用直接操作memory的方法(memset, memcpy)来实现.
可以看到,在我的测试用例下面,基于KMP的算法的字符串查找的效率比C++自带的方法提升了7倍左右.
优化后的程序:
优化前的代码:
7月9日:
对7月8日的程序做了优化,优化的点集中于:
1. KMP算法中需要的array根据pattern来生成,则在pattern不变的情况下,反复查询该pattern的情况不应该每次再重新生成pattern. (实际应用场景: 类似在word文件中查找某个关键字出现的次数).
2. C++标准库中的string类的某些操作效率较低,不适合在大循环(千万次循环级别)中使用.可以将string先转换成const char*,之后利用指针对const char*操作.
3. 补充了replace, replaceall方法.并利用直接操作memory的方法(memset, memcpy)来实现.
可以看到,在我的测试用例下面,基于KMP的算法的字符串查找的效率比C++自带的方法提升了7倍左右.
优化后的程序:
#ifndef _STRING_HELPER_H_ #define _STRING_HELPER_H_ #include <iostream> using namespace std; class StringHelper { public: StringHelper(const string searchSpaceStr, const string patternStr); ~StringHelper(); bool find(); bool find(unsigned int& count); bool find(unsigned int& count, unsigned int &firstStartLocation); bool replace(const string replaceToStr, string& replaceRs); bool replaceAll(const string replaceToStr, string& replaceRs); private: bool findFirst(unsigned int &firstStartLocation); bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr); bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation); void getNext(const char *pStr, int *nextArr); int *nextArray; char* m_SearchSpaceChArray; char* m_PatternChArray; }; #endif // !_STRING_HELPER_H_
#include "StringHelper.h" StringHelper::StringHelper(const string searchSpaceStr, const string patternStr) { nextArray = NULL; m_SearchSpaceChArray = NULL; m_PatternChArray = NULL; m_SearchSpaceChArray = new char[searchSpaceStr.length() + 1]; memset(m_SearchSpaceChArray, 0, sizeof(char) * (searchSpaceStr.length() + 1)); memcpy(m_SearchSpaceChArray, searchSpaceStr.c_str(), sizeof(char) * searchSpaceStr.length()); m_PatternChArray = new char[patternStr.length() + 1]; memset(m_PatternChArray, 0, sizeof(char) * (patternStr.length() + 1)); memcpy(m_PatternChArray, patternStr.c_str(), sizeof(char) * patternStr.length()); if (NULL == nextArray) { nextArray = new int[searchSpaceStr.length()]; getNext(m_PatternChArray, nextArray); } else { delete[] nextArray; nextArray = NULL; nextArray = new int[searchSpaceStr.length()]; getNext(m_PatternChArray, nextArray); } } StringHelper::~StringHelper() { if (NULL != nextArray) { delete[] nextArray; nextArray = NULL; } if (NULL != m_SearchSpaceChArray) { delete[] m_SearchSpaceChArray; m_SearchSpaceChArray = NULL; } if (NULL != m_PatternChArray) { delete[] m_PatternChArray; m_PatternChArray = NULL; } } bool StringHelper::find() { bool result = false; result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray); return result; } bool StringHelper::find(unsigned int& count) { count = 0; bool result = false; unsigned int startLocation = 0; unsigned int stopLocation = 0; while (startLocation < strlen(m_SearchSpaceChArray)) { result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation); if (result) { count++; } else { break; } } if (0 != count) { result = true; } return result; } bool StringHelper::find(unsigned int& count, unsigned int &firstStartLocation) { unsigne 4000 d int startLocation = 0; count = 0; firstStartLocation = 0; bool result = false; unsigned int stopLocation = 0; while (startLocation < strlen(m_SearchSpaceChArray)) { result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation); if (result) { count++; if (1 == count) { firstStartLocation = startLocation; } } else { break; } } if (0 != count) { result = true; } return result; } bool StringHelper::replace(const string replaceToStr, string& replaceRs) { unsigned int count = 0; unsigned int startLocation = 0; replaceRs.clear(); bool rs = findFirst(startLocation); if (rs) { int len = strlen(m_SearchSpaceChArray) - strlen(m_PatternChArray) + strlen(replaceToStr.c_str()) + 1; char *replaceRsChArray = new char[len]; memset(replaceRsChArray, 0, sizeof(char) * len); memcpy(replaceRsChArray, m_SearchSpaceChArray, sizeof(char) * startLocation); memcpy(&replaceRsChArray[startLocation], replaceToStr.c_str(), sizeof(char) * replaceToStr.length()); unsigned int stopLocation = startLocation + strlen(m_PatternChArray); if (stopLocation < strlen(m_SearchSpaceChArray)) { memcpy(&replaceRsChArray[startLocation + strlen(replaceToStr.c_str())], &m_SearchSpaceChArray[stopLocation], sizeof(char) * (strlen(m_SearchSpaceChArray) - stopLocation)); } replaceRs.append(replaceRsChArray); } return rs; } bool StringHelper::replaceAll(const string replaceToStr, string& replaceRs) { bool result = false; unsigned int startLocation = 0; unsigned int stopLocation = 0; char* tempChArray = NULL; char* swapTempChArray = NULL; int count = 0; replaceRs.clear(); while (startLocation < strlen(m_SearchSpaceChArray)) { int lastStopPos = stopLocation; result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation); if (result) { count++; int len = 0; if (NULL == tempChArray) { len = startLocation + replaceToStr.length() + 1; tempChArray = new char[len]; swapTempChArray = new char[len]; if (NULL == tempChArray || NULL == swapTempChArray) { return false; } memset(tempChArray, 0, sizeof(char) * len); memset(swapTempChArray, 0, sizeof(char) * len); memcpy(tempChArray, m_SearchSpaceChArray, sizeof(char) * startLocation); memcpy(&tempChArray[startLocation], replaceToStr.c_str(), sizeof(char) * replaceToStr.length()); memcpy(swapTempChArray, tempChArray, sizeof(char) * strlen(tempChArray)); } else { int firstPartLen = strlen(tempChArray); len = strlen(tempChArray) + startLocation - lastStopPos + replaceToStr.length() + 1; delete[] tempChArray; tempChArray = NULL; tempChArray = new char[len]; memset(tempChArray, 0, sizeof(char) * len); memcpy(tempChArray, swapTempChArray, sizeof(char) * strlen(swapTempChArray)); memcpy(&tempChArray[strlen(tempChArray)], &m_SearchSpaceChArray[lastStopPos], sizeof(char) * (startLocation - lastStopPos)); int lenlen = strlen(tempChArray); memcpy(&tempChArray[strlen(tempChArray)], replaceToStr.c_str(), sizeof(char) * replaceToStr.length()); delete[] swapTempChArray; swapTempChArray = NULL; swapTempChArray = new char[len]; memset(swapTempChArray, 0, len); memcpy(swapTempChArray, tempChArray, sizeof(char) * strlen(tempChArray)); } } else { replaceRs.append(tempChArray); replaceRs.append(&m_SearchSpaceChArray[lastStopPos]); if (NULL != swapTempChArray) { delete[] swapTempChArray; swapTempChArray = NULL; } if (NULL != tempChArray) { delete[] tempChArray; tempChArray = NULL; } break; } } return result; return true; } bool StringHelper::findFirst(unsigned int & firstStartLocation) { bool result = false; firstStartLocation = 0; unsigned int stopLocation = 0; result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, firstStartLocation, stopLocation); return result; } bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr) { if (NULL == nextArray) { return false; } int i = 0; int j = 0; int searchSpaceLength = strlen(pSearchSpaceStr); int patternStrLength = strlen(pPatternStr); while (i < searchSpaceLength && j < patternStrLength) { if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j]) { i++; j++; } else { j = nextArray[j]; } } if (j == patternStrLength) { return true; } return false; } bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation) { if (NULL == nextArray) { return false; } int i = stopLocation; int j = 0; int searchSpaceLength = strlen(pSearchSpaceStr); int patternStrLength = strlen(pPatternStr); while (i < searchSpaceLength && j < patternStrLength) { if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j]) { i++; j++; } else { j = nextArray[j]; } } if (j == patternStrLength) { startLocation = i - j; //start pos stopLocation = i; return true; } return false; } //nextArr means while current position compare failed, the compare should start at which position void StringHelper::getNext(const char *pStr, int *nextArr) { int i = 0, k = -1, pLen = strlen(pStr); nextArr[i] = k; int mLen = pLen - 1; while (i < mLen) { if (k == -1 || pStr[i] == pStr[k]) { i++; k++; if (pStr[i] == pStr[k]) { nextArr[i] = nextArr[k]; } else { nextArr[i] = k; } } else k = nextArr[k]; } }
优化前的代码:
#ifndef _STRING_HELPER_H_ #define _STRING_HELPER_H_ #include <iostream> using namespace std; class StringHelper { public: StringHelper(); ~StringHelper(); bool find(const string searchSpaceStr, const string patternStr); bool find(const string searchSpaceStr, const string patternStr, unsigned int& count); bool find(const string searchSpaceStr, const string patternStr, unsigned int& count, unsigne fa5b d int &firstStartLocation); bool replace(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs); bool replaceAll(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs); private: bool findInit(const string searchSpaceStr, const string patternStr, char **pSearchSpaceChArray, char **pPatternChArray); bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr); bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation); void getNext(const char *pStr, int *nextArr); int *nextArray; }; #endif // !_STRING_HELPER_H_
#include "StringHelper.h" StringHelper::StringHelper() { nextArray = NULL; } StringHelper::~StringHelper() { if (NULL != nextArray) { delete[] nextArray; nextArray = NULL; } } bool StringHelper::find(const string searchSpaceStr, const string patternStr) { bool result = false; char **pSearchSpaceChArray = new char*; char **pPatternChArray = new char*; result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray); if (result) result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray); if (*pSearchSpaceChArray != NULL) { delete[] *pSearchSpaceChArray; *pSearchSpaceChArray = NULL; if (pSearchSpaceChArray != NULL) { delete pSearchSpaceChArray; pSearchSpaceChArray = NULL; } } if (*pPatternChArray != NULL) { delete[] * pPatternChArray; *pPatternChArray = NULL; if (pPatternChArray != NULL) { delete pPatternChArray; pPatternChArray = NULL; } } return result; } bool StringHelper::find(const string searchSpaceStr, const string patternStr, unsigned int& count) { count = 0; bool result = false; char **pSearchSpaceChArray = new char*; char **pPatternChArray = new char*; result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray); if (result) { unsigned int startLocation = 0; unsigned int stopLocation = 0; while (startLocation < strlen(*pSearchSpaceChArray)) { result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation); if (result) { count++; } else { break; } } } if (0 != count) { result = true; } return result; } bool StringHelper::find(const string searchSpaceStr, const string patternStr, unsigned int& count, unsigned int &firstStartLocation) { unsigned int startLocation = 0; count = 0; firstStartLocation = 0; bool result = false; char **pSearchSpaceChArray = new char*; char **pPatternChArray = new char*; result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray); if (result) { unsigned int startLocation = 0; unsigned int stopLocation = 0; while (startLocation < strlen(*pSearchSpaceChArray)) { result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation); if (result) { count++; if (1 == count) { firstStartLocation = startLocation; } } else { break; } } } if (0 != count) { result = true; } return result; } bool StringHelper::replace(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs) { unsigned int count = 0; unsigned int startLocation = 0; replaceRs.clear(); bool rs = find(searchSpaceStr, needReplaceStr, count, startLocation); if (rs) { replaceRs.append(searchSpaceStr.substr(0, startLocation)); replaceRs.append(replaceToStr); unsigned int stopLocation = startLocation + strlen(needReplaceStr.c_str()); if (stopLocation < searchSpaceStr.length()) { replaceRs.append(searchSpaceStr.substr(stopLocation, searchSpaceStr.length()-stopLocation)); } } return rs; } bool StringHelper::replaceAll(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs) { replaceRs.clear(); int count = 0; int lastTimeStopLocation = 0; bool result = false; char **pSearchSpaceChArray = new char*; char **pPatternChArray = new char*; result = findInit(searchSpaceStr, needReplaceStr, pSearchSpaceChArray, pPatternChArray); if (result) { unsigned int startLocation = 0; unsigned int stopLocation = 0; while (startLocation < strlen(*pSearchSpaceChArray)) { result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation); if (result) { if (lastTimeStopLocation != startLocation) { replaceRs.append(searchSpaceStr.substr(lastTimeStopLocation, startLocation - lastTimeStopLocation)); } replaceRs.append(replaceToStr); lastTimeStopLocation = stopLocation; count++; } else { if (stopLocation < searchSpaceStr.length()) { replaceRs.append(searchSpaceStr.substr(stopLocation, searchSpaceStr.length() - stopLocation)); } break; } } } if (0 != count) { result = true; } return result; } bool StringHelper::findInit(const string searchSpaceStr, const string patternStr, char **pSearchSpaceChArray, char **pPatternChArray) { bool result = false; *pSearchSpaceChArray = new char[strlen(searchSpaceStr.c_str())+1]; *pPatternChArray = new char[strlen(patternStr.c_str())+1]; strcpy(*pSearchSpaceChArray, searchSpaceStr.c_str()); strcpy(*pPatternChArray, patternStr.c_str()); if (0 == strlen(*pSearchSpaceChArray) || 0 == strlen(*pPatternChArray)) { result = false; } else { result = true; if (NULL == nextArray) { nextArray = new int[strlen(*pPatternChArray)]; getNext(*pPatternChArray, nextArray); } else { delete[] nextArray; nextArray = NULL; nextArray = new int[strlen(*pPatternChArray)]; getNext(*pPatternChArray, nextArray); } } return result; } bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr) { if (NULL == nextArray) { return false; } int i = 0; int j = 0; int searchSpaceLength = strlen(pSearchSpaceStr); int patternStrLength = strlen(pPatternStr); while (i < searchSpaceLength && j < patternStrLength) { if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j]) { i++; j++; } else { j = nextArray[j]; } } if (j == patternStrLength) { return true; } return false; } bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation) { if (NULL == nextArray) { return false; } int i = stopLocation; int j = 0; int searchSpaceLength = strlen(pSearchSpaceStr); int patternStrLength = strlen(pPatternStr); while (i < searchSpaceLength && j < patternStrLength) { if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j]) { i++; j++; } else { j = nextArray[j]; } } if (j == patternStrLength) { startLocation = i - j; //start pos stopLocation = i; return true; } return false; } //nextArr means while current position compare failed, the compare should start at which position void StringHelper::getNext(const char *pStr, int *nextArr) { int i = 0, k = -1, pLen = strlen(pStr); nextArr[i] = k; int mLen = pLen - 1; while (i < mLen) { if (k == -1 || pStr[i] == pStr[k]) { i++; k++; if (pStr[i] == pStr[k]) { nextArr[i] = nextArr[k]; } else { nextArr[i] = k; } } else k = nextArr[k]; } }
相关文章推荐
- 基于KMP算法的C++字符串帮助类
- 基于KMP算法的C++字符串帮助类
- 基于KMP算法的C++字符串帮助类
- **KMP算法 Problem A. cal 2016/11/12 字符串 c++
- c++ 数据结构 字符串的自定义类 (文章最后解释了KMP算法)
- C++数据结构之字符串的KMP算法
- c/c++实现字符串模式匹配BM算法和KMP算法
- 基于C++的录入带空格字符串方法总结
- 基于KMP算法的路径下文本查询程序的c++实现2.0版
- [C++]KMP算法匹配字符串
- 基于C++字符串替换函数的使用详解
- 字符串的next特征函数,KMP算法,C++
- 基于c++的统计输入字符串中单词个数程序
- C++字符串完全指南(2) - 各种字符串类- CRT类
- 基于php常用函数总结(数组,字符串,时间,文件操作)
- C++字符串完全指南 - MFC类
- C++字符串完全指引之一 —— Win32 字符编码
- C++字符串完全指南
- C++字符串指南
- Visual C++.NET中 字符串转换方法[2]