您的位置:首页 > 其它

ANSI, UNICODE与UTF8相互转换模板类

2009-03-23 19:06 453 查看
在WIN32开发过程中经常需要在不同的字符集之间进行转换,这可以通过WIN32 API函数WideCharToMultiByte和MultiByteToWideChar来完成,但转换过程涉及空间的分配与回收问题,直接调用API稍显麻烦,故在此将字符集转换功能封装到一个模板类中,以简化转换过程。该类代码如下:

/**************************************************************************
* Copyright (C) 2009 by Linmei,Jiang *
* clough@hqu.edu.cn *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/

#ifndef _C_TRANS_CODE_
#define _C_TRANS_CODE_

template <typename T = LPCTSTR>
class CTransCode
{
public:
enum ECode {EN_NULL, EN_ANSI, EN_UNICODE, EN_UTF8};
public:
CTransCode(T pSrcStr, ECode enSrcCode = EN_NULL)
: m_pSrcStr(pSrcStr), m_enSrcCode(enSrcCode),
m_pAnsi(NULL), m_pUtf8(NULL), m_pMulti(NULL)
{
#ifdef _UNICODE
if (m_enSrcCode == EN_NULL)
{
m_enSrcCode = EN_UNICODE;
}
#else
if (m_enSrcCode == EN_NULL)
{
m_enSrcCode = EN_ANSI;
}
#endif
}
~CTransCode(void)
{
Release();
}
public:
const wchar_t* ToUnicode();
const char* ToAnsi();
const char* ToUtf8();
// You can manually release the buffer or wait for the destructor to do it.
void Release()
{
if (m_pAnsi)
{
delete[] m_pAnsi;
m_pAnsi = NULL;
}
if (m_pUtf8)
{
delete[] m_pUtf8;
m_pUtf8 = NULL;
}
if (m_pMulti)
{
delete[] m_pMulti;
m_pMulti = NULL;
}
}
private:
T m_pSrcStr;
ECode m_enSrcCode;
char* m_pAnsi;
char* m_pUtf8;
wchar_t* m_pMulti;
};

//////////////////////////////////////////////////////////////////////////
template <typename T>
const wchar_t* CTransCode<T>::ToUnicode()
{
//ASSERT(m_enSrcCode == EN_ANSI || m_enSrcCode == EN_UTF8);
if (m_enSrcCode == EN_UNICODE) return (const wchar_t*)m_pSrcStr;
if (m_pMulti) return m_pMulti;

UINT uCodePage = m_enSrcCode == EN_UTF8 ? CP_UTF8 : CP_ACP;
int iLen = MultiByteToWideChar(uCodePage, 0, (LPCSTR)m_pSrcStr, -1, NULL, 0);
m_pMulti = new wchar_t[iLen + 1];
memset(m_pMulti, 0, (iLen + 1) * sizeof(wchar_t));
MultiByteToWideChar(uCodePage, 0, (LPCSTR)m_pSrcStr, -1, m_pMulti, iLen);

return m_pMulti;
}

template <typename T>
const char* CTransCode<T>::ToAnsi()
{
//ASSERT(m_enSrcCode == EN_UNICODE || m_enSrcCode == EN_UTF8);
if (m_enSrcCode == EN_ANSI) return (const char*)m_pSrcStr;
if (m_pAnsi) return m_pAnsi;

if (m_enSrcCode == EN_UTF8)
{
ToUnicode();
}
const wchar_t* pMulti = m_enSrcCode == EN_UTF8 ? m_pMulti : (const wchar_t*)m_pSrcStr;

int iLen = WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)pMulti, -1, NULL, 0, NULL, NULL);
m_pAnsi = new char[iLen + 1];
memset(m_pAnsi, 0, (iLen + 1) * sizeof(char));
WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)pMulti, -1, m_pAnsi, iLen, NULL, NULL);

return m_pAnsi;
}

template <typename T>
const char* CTransCode<T>::ToUtf8()
{
//ASSERT(m_enSrcCode == EN_ANSI || m_enSrcCode == EN_UNICODE);
if (m_enSrcCode == EN_UTF8) return (const char*)m_pSrcStr;
if (m_pUtf8) return m_pUtf8;

if (m_enSrcCode == EN_ANSI)
{
ToUnicode();
}
const wchar_t* pMulti = m_enSrcCode == EN_ANSI ? m_pMulti : (const wchar_t*)m_pSrcStr;

int iLen = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)pMulti, -1, NULL, 0, NULL, NULL);
m_pUtf8 = new char[iLen + 1];
memset(m_pUtf8, 0, (iLen + 1) * sizeof(char));
WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)pMulti, -1, m_pUtf8, iLen, NULL, NULL);

return m_pUtf8;
}

#endif

使用方式如下:

CTransCode<> tc(_T("字符集转换"));

// 或显示指定源串的类型和字符集,如:

// CTransCode<LPCWSTR> tc(_T("字符集转换"), CTransCode<LPCWSTR>::EN_ANSI);

char * pAnsi = tc.ToAnsi(); // 返回一个ansi串

wchar_t * pUnicode = tc.ToUnicode(); // 返回一个unicode串

char* pUtf8 = tc.ToUtf8(); // 返回一个utf8串

// 使用完后,可以显式调用 Release() 方法释放资源如下:

tc.Release();

// 也可以不调用 Release() 方法,析构函数中会自动调用。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: