您的位置:首页 > 编程语言


2006-03-02 14:22 330 查看
l UString.h
#ifndef __USTRING_H__
#define __USTRING_H__

#include <list>

* 文件名: UString.h
* 创建日期: 2005-12-12
* 创建者: Percy Lee
* 修改列表:
* 说明:
* Unicode string class for c++(in namespace UStr) with basic operation such as:
* length() : get the length of string;
* u_str(): get the unicode char array by pointer;
* c_str(): nil(return NULL);
* resize(Size): set the new capacity(=Size) of string;
* append(str): append the str back;
* sub_ustr(): get the sub-string under the given section;
* find(str): find sub-string str's section list;
* find_overlap(str): find sub-string str which can be overlap-occured in base-string;
* find_first(str): find the first pos of sub-string str;
* be_first(str): judge if str is the very beginning sub-string or not.
* And also it supplies two global functions US_TO_S & S_TO_US
* for transforming unicode string and mutibyte string.
* Copyright (c) the Semean Studio.All rights reserved.
* E-mail: semean@163.com
namespace UStr
* 联合体: Section
* 说明: 本联合体的对象可记录一个位置,或者一个区间(起始位置与长度)
union Section
typedef Section value_type;
typedef Section* pointer_type;
typedef Section& reference_type;

size_t _begin;
struct sect
unsigned int _begin;
unsigned int _length;
} _sect;

typedef long long Int64;
typedef std::list<Section> SectionList;
typedef std::list<Section>::iterator SectListIter;

* 类: UString
* 说明:
* UString具有两种不同类型的对象,一是普通字符串,内部存储空间大小
* 为字符串所需要的空间大小;一是缓存字符串,内部存储空间大小是1K
* 的整数倍(管理策略见实现).

enum USType //UString的存储管理类型
eUSNormal = 0, //普通字符串
eUSBuffer //用作缓存的字符串

class UString
typedef UString value_type;
typedef UString* pointer_type;
typedef UString& reference_type;

UString(USType Ustype = eUSNormal);
UString(const UString& Str);
UString(const char* pStr);
UString(const wchar_t* pStr);
public: //公共接口
size_t length(void) const;
wchar_t* u_str(void) const;
char* c_str(void) const;
void resize(size_t Size);
void append(const UString& Str);
void append(const char* pStr);
void append(const wchar_t* pStr);
void append(const wchar_t* pStr, size_t Begin, size_t Length);
UString sub_ustr(size_t Begin, size_t Length) const;
UString sub_ustr(const Section& Sect) const;
/* 检索(find*)系列的方法
* 说明:
* (1)find与find_overlap返回子串所有出现的位置列表(因长度固定,
* 故联合体只存储首位置).其中find_overlap所检索的子串允许交叉.
* (2)返回所有子串区间的列表仅提供一种方便的手段,但返回列表会
* 影响算法性能(若以引用参数输出STL list,却在DLL输出时有存储
* 管理的异常).故某些情况下可以使用find_first遍历所有子串.
* (3)本系列函数所实现的算法有待严格的大规模的测试.
* percylee 2006/3
SectionList find(const UString& Str) const;
SectionList find(wchar_t WCh) const;
SectionList find(const wchar_t* pStr,size_t Begin, size_t Length) const;
SectionList find_overlap(const UString& Str) const;
SectionList find_overlap(const wchar_t* pStr,size_t Begin, size_t Length) const;
Int64 find_first(const UString& Str,size_t Start) const;
Int64 find_first(wchar_t WCh, size_t Start) const;
Int64 find_first(const wchar_t* pStr,size_t Begin, size_t Length, size_t Start) const;
bool be_first(const UString& Str,size_t Start) const;
bool be_first(const wchar_t* pStr,size_t Begin,size_t Length,size_t Start) const;
public: //操作符重载
UString& operator =(const UString& Str);
UString& operator =(const char* pStr);
UString& operator =(const wchar_t* pStr);
wchar_t& operator [](size_t pos);

friend bool operator == ( const UString& Str1, const UString& Str2 );
friend bool operator < ( const UString& Str1, const UString& Str2 );
friend bool operator <= ( const UString& Str1, const UString& Str2 );
friend bool operator > ( const UString& Str1, const UString& Str2 );
friend bool operator >= ( const UString& Str1, const UString& Str2 );
wchar_t* _pUStr;
size_t _length;
size_t _capacity;
USType _usType;
void set_ustring(const UString& Str);
void set_ustring(const char* pStr);
void set_ustring(const wchar_t* pStr);
void default_construct(size_t DefaultSize = 0);
size_t d_capacity(size_t Size);

* 多字节流与unicode字符串的全局转换函数
* 说明:
* 对于S_TO_US,需满足UStrLen>=StrLen;
* 对于US_TO_S,需满足StrLen>=UStrLen*2
size_t S_TO_US(char* pStr,size_t StrLen,wchar_t* pUStr,size_t UStrLen);
size_t US_TO_S(wchar_t* pUStr,size_t UStrLen,char* pStr,size_t StrLen);

#endif //__USTRING_H__

l UString.cpp

#include "StdAfx.h"
#include "./ustring.h"
#include <stdlib.h>
#include <locale.h>

* 内部使用的全局变量与全局函数
const unsigned int DEFAULT_CAPACITY = 1024;

// KMP模式匹配算法的后移向量计算
inline size_t* KMPNext(const wchar_t* pStr, size_t Length)
if( !pStr || Length <= 0 )
return NULL;
size_t* pN = new size_t[Length];
if( !pN )
return NULL;
size_t var = 0;
pN[0] = 0;

for( size_t i = 1; i < Length; i ++ )
var = pN[i-1];
while( var > 0 && pStr[i] != pStr[var] )
var = pN[var-1];

if( pStr[i] == pStr[var] )
pN[i] = var + 1;
pN[i] = 0;

return pN;

inline size_t* KMPNext(const UStr::UString& Str)
return KMPNext(Str.u_str(),Str.length());

* UStr名空间内的类实现
_begin = 0;
_sect._begin = 0;
_sect._length = 0;

UStr::UString::UString(USType Ustype/* = eUSNormal*/)
_usType = Ustype;

UStr::UString::UString(const UString& Str)
_usType = eUSNormal;

UStr::UString::UString(const char* pStr)
_usType = eUSNormal;
if( pStr )

UStr::UString::UString(const wchar_t* pStr)
_usType = eUSNormal;
if( pStr )

delete[] _pUStr;
_length = 0;

size_t UStr::UString::length(void) const
return _length;

wchar_t* UStr::UString::u_str(void) const
return _pUStr;

char* UStr::UString::c_str(void) const
return NULL;

void UStr::UString::resize(size_t Size)
delete[] _pUStr;
_length = 0;

void UStr::UString::append(const UString& Str)
size_t len = Str.length();
if( len <= 0 )


void UStr::UString::append(const char* pStr)
if( !pStr )
UString ustr(pStr);

void UStr::UString::append(const wchar_t* pStr)
if( !pStr )
size_t len = wcslen(pStr);
if( len <= 0 )


void UStr::UString::append(const wchar_t* pStr, size_t Begin, size_t Length)
if( !pStr || Length <= 0 )

if( _capacity < _length + Length )
_capacity += d_capacity(Length);
wchar_t* pBuf = new wchar_t[_capacity+1];
for(size_t i = 0; i < Length; i ++)
pBuf[_length+i] = pStr[Begin+i];
_length += Length;
pBuf[_length] = L'/0';

delete[] _pUStr;
_pUStr = pBuf;
pBuf = NULL;
for( size_t i = 0; i < Length; i ++ )
_pUStr[_length+i] = pStr[Begin+i];
_length += Length;
_pUStr[_length] = L'/0';

UStr::UString UStr::UString::sub_ustr(size_t Begin, size_t Length) const
UString ustr;
if( _length < Begin + Length )
return ustr;

return ustr;

UStr::UString UStr::UString::sub_ustr(const UStr::Section& Sect) const
UString ustr;
if( _length < Sect._sect._begin + Sect._sect._length )
return ustr;

return ustr;

UStr::SectionList UStr::UString::find(const UString& Str) const
return find(Str.u_str(),0,Str.length());

UStr::SectionList UStr::UString::find(wchar_t WCh) const
SectionList ustrList;
Section aSection;
for( size_t i = 0; i < _length; i ++ )
if( _pUStr[i] == WCh )
aSection._begin = i;

return ustrList;

UStr::SectionList UStr::UString::find(const wchar_t* pStr,size_t Begin, size_t Length) const
SectionList ustrList;
if( Length > _length )
return ustrList;
const wchar_t* pStrBegin = pStr+Begin;
size_t* pKMPNext = KMPNext(pStrBegin,Length);
if( !pKMPNext )
return ustrList;
Section aSection;
size_t strPos = 0;
for( size_t i = 0; i < _length; i ++ )
while( pStrBegin[strPos] != _pUStr[i] && strPos > 0 )
strPos = pKMPNext[strPos-1];
if( pStrBegin[strPos] == _pUStr[i] )
strPos ++;
if( strPos == Length )
aSection._begin = i - Length + 1;
strPos = 0; //start form the very beginning of pKMPNext

delete[] pKMPNext;
return ustrList;

UStr::SectionList UStr::UString::find_overlap(const UString& Str) const
return find_overlap(Str.u_str(),0,Str.length());

UStr::SectionList UStr::UString::find_overlap(const wchar_t* pStr,size_t Begin, size_t Length) const
SectionList ustrList;
if( Length > _length )
return ustrList;
const wchar_t* pStrBegin = pStr+Begin;
size_t* pKMPNext = KMPNext(pStrBegin,Length);
if( !pKMPNext )
return ustrList;
Section aSection;
size_t strPos = 0;
for( size_t i = 0; i < _length; i ++ )
while( pStrBegin[strPos] != _pUStr[i] && strPos > 0 )
strPos = pKMPNext[strPos-1];
if( pStrBegin[strPos] == _pUStr[i] )
strPos ++;
if( strPos == Length )
aSection._begin = i - Length + 1;
strPos = 0; //start form the very beginning of pKMPNext
i = i - Length + 1;//overlap

delete[] pKMPNext;
return ustrList;

UStr::Int64 UStr::UString::find_first(const UStr::UString& Str,size_t Start) const
size_t length = Str.length();
if( _length < Start+length )
return -1;

return find_first(Str.u_str(),0,length,Start);

UStr::Int64 UStr::UString::find_first(wchar_t WCh, size_t Start) const
if( _length <= Start )
return -1;

for( size_t i = Start; i < _length; i ++ )
if( _pUStr[i] == WCh )
return (Int64)i;

return -1;

UStr::Int64 UStr::UString::find_first(const wchar_t* pStr,
size_t Begin,
size_t Length,
size_t Start
) const
if( _length < Start+Length )
return -1;
const wchar_t* pStrBegin = pStr+Begin;
size_t* pKMPNext = KMPNext(pStrBegin,Length);
if( !pKMPNext )
return -1;
size_t strPos = 0;
for( size_t i = Start; i < _length; i ++ )
while( pStrBegin[strPos] != _pUStr[i] && strPos > 0 )
strPos = pKMPNext[strPos-1];
if( pStrBegin[strPos] == _pUStr[i] )
strPos ++;
if( strPos == Length )
delete[] pKMPNext;
return (Int64)((Int64)i-(Int64)Length+1);

delete[] pKMPNext;
return -1;

bool UStr::UString::be_first(const UString& Str,size_t Start) const
return be_first(Str.u_str(),0,Str.length(),Start);

bool UStr::UString::be_first(const wchar_t* pStr,
size_t Begin,
size_t Length,
size_t Start
) const
if( _length < Start+Length )
return false;
for( size_t i = 0; i < Length; i ++ )
if( _pUStr[Start+i] != pStr[Begin+i] )
return false;

return true;

UStr::UString& UStr::UString::operator =(const UString& Str)
if( this != &Str )
return *this;

UStr::UString& UStr::UString::operator =(const char* pStr)
if( pStr )
delete[] _pUStr;
return *this;

UStr::UString& UStr::UString::operator =(const wchar_t* pStr)
if( pStr && pStr != this->_pUStr )
else if( !pStr )
delete[] _pUStr;
return *this;

wchar_t& UStr::UString::operator [](size_t pos)
static wchar_t wch;
if( pos >= _length )
return wch;

return _pUStr[pos];

void UStr::UString::set_ustring(const UString& Str)
_length = Str.length();
if( _capacity < _length )
delete[] _pUStr;
_capacity = d_capacity(_length);
_pUStr = new wchar_t[ _capacity+1 ];

_pUStr[_length] = L'/0';

void UStr::UString::set_ustring(const char* pStr)
size_t len = strlen(pStr);
if( _capacity < len )
delete[] _pUStr;
_capacity = d_capacity(len);
_pUStr = new wchar_t[_capacity+1];


_length = mbstowcs(_pUStr,pStr,len*sizeof(char));
_pUStr[ _length ] = L'/0';

void UStr::UString::set_ustring(const wchar_t* pStr)
_length = wcslen(pStr);
if( _capacity < _length )
delete[] _pUStr;
_capacity = d_capacity(_length);
_pUStr = new wchar_t[_capacity+1];

wcsncpy( _pUStr,pStr,_length );
_pUStr[_length] = L'/0';

void UStr::UString::default_construct(size_t DefaultSize/* = 0*/)
_capacity = d_capacity(DefaultSize);
if( _pUStr = new wchar_t[_capacity+1] )
_pUStr[0] = L'/0';
_capacity = 0;

_length = 0;

size_t UStr::UString::d_capacity(size_t Size)
if( _usType == eUSNormal )
return Size+1;
else //if( _usType == eUSBuffer )

* UString友元比较函数

bool UStr::operator == ( const UStr::UString& Str1, const UStr::UString& Str2 )
size_t len = Str1.length();
if( len != Str2.length() )
return false;

wchar_t* pStr1 = Str1.u_str();
wchar_t* pStr2 = Str2.u_str();

if( wcscmp(pStr1,pStr2) != 0 )
return false;

return true;

bool UStr::operator < ( const UStr::UString& Str1, const UStr::UString& Str2 )
size_t length1 = Str1.length();
size_t length2 = Str2.length();
if( length1 < length2 )
return true;
else if( length2 < length1 )
return false;
wchar_t* pStr1 = Str1.u_str();
wchar_t* pStr2 = Str2.u_str();

if( wcscmp(pStr1,pStr2) < 0 )
return true;

return false;

bool UStr::operator <= ( const UStr::UString& Str1, const UStr::UString& Str2 )
size_t length1 = Str1.length();
size_t length2 = Str2.length();
if( length1 < length2 )
return true;
else if( length2 < length1 )
return false;
wchar_t* pStr1 = Str1.u_str();
wchar_t* pStr2 = Str2.u_str();

if( wcscmp(pStr1,pStr2) <= 0 )
return true;

return false;

bool UStr::operator > ( const UStr::UString& Str1, const UStr::UString& Str2 )
size_t length1 = Str1.length();
size_t length2 = Str2.length();
if( length1 > length2 )
return true;
else if( length2 > length1 )
return false;
wchar_t* pStr1 = Str1.u_str();
wchar_t* pStr2 = Str2.u_str();

if( wcscmp(pStr1,pStr2) > 0 )
return true;

return false;

bool UStr::operator >= ( const UStr::UString& Str1, const UStr::UString& Str2 )
size_t length1 = Str1.length();
size_t length2 = Str2.length();
if( length1 > length2 )
return true;
else if( length2 > length1 )
return false;
wchar_t* pStr1 = Str1.u_str();
wchar_t* pStr2 = Str2.u_str();

if( wcscmp(pStr1,pStr2) >= 0 )
return true;

return false;

* UStr名空间内的全局函数的实现
size_t UStr::S_TO_US(char* pStr,size_t StrLen,wchar_t* pUStr,size_t UStrLen)
if( !pStr || !pUStr || UStrLen < StrLen )
return 0;

char* pStr2 = new char[StrLen+1];
pStr2[StrLen] = '/0';


size_t len = mbstowcs(pUStr,pStr2,StrLen*sizeof(char));
pUStr[ len ] = L'/0';

delete[] pStr2;
return len;

size_t UStr::US_TO_S(wchar_t* pUStr,size_t UStrLen,char* pStr,size_t StrLen)
if( !pStr || !pUStr || StrLen < 2*UStrLen )
return 0;

wchar_t* pUStr2 = new wchar_t[UStrLen+1];
pUStr2[UStrLen] = L'/0';


size_t len = wcstombs(pStr,pUStr2,UStrLen*sizeof(wchar_t));
pStr[ len ] = '/0';

delete[] pUStr2;
return len;
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息