utf-7与UNICODE的转换。
2007-11-23 15:29
363 查看
在处理IMAP4时,发现从服务器发送过来的消息,是用UTF7进行编码的。
字符串形如
&XfJT0ZABkK5O9g-
这是一种UTF-7的编码,在进行处理时,需要将其首字母&改为+
变成
+XfJT0ZABkK5O9g-
然后进行转换,转换的代码如下。
1、C++代码
CString Convert(CString str, int sourceCodepage, int targetCodepage)
{
int len = str.GetLength();
int unicodeLen = MultiByteToWideChar(sourceCodepage, 0, str, -1, NULL, 0);
wchar_t *pUnicode = new wchar_t[unicodeLen + 1];
memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
MultiByteToWideChar(sourceCodepage, 0, str, -1, (LPWSTR)pUnicode, unicodeLen);//61 00 11 62 62 00 00 00
BYTE *pTargetData = NULL;
int targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char *)pTargetData, 0, NULL, NULL);
pTargetData = new BYTE[targetLen + 1];
memset(pTargetData, 0, targetLen + 1);
WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char *)pTargetData, targetLen, NULL, NULL);//61 e6 88 91 62 00
CString rt;
rt.Format("%s", pTargetData);
delete pUnicode;
delete pTargetData;
return rt;
}
2、DELPHI代码
function Convert(str: string; sourceCodepage: Cardinal; targetCodepage: Cardinal): string;
var
Len: Integer;
UnicodeLen: Integer;
pUnicode: PWideChar;
pTargetData: PChar;
targetLen: Integer;
begin
Len := Length(str);
UnicodeLen := MultiByteToWideChar(sourceCodepage, 0, PAnsiChar(str), -1, nil, 0);
GetMem(pUnicode, (UnicodeLen + 1) * SizeOf(WideChar));
ZeroMemory(pUnicode, (UnicodeLen + 1) * SizeOf(WideChar));
MultiByteToWideChar(sourceCodepage, 0, PAnsiChar(str), -1, pUnicode, unicodeLen);//61 00 11 62 62 00 00 00
pTargetData := nil;
targetLen := WideCharToMultiByte(targetCodepage, 0, PWideChar(pUnicode), -1, pTargetData, 0, nil, nil);
GetMem(pTargetData, targetLen + 1);
ZeroMemory(pTargetData, targetLen + 1);
WideCharToMultiByte(targetCodepage, 0, PWideChar(pUnicode), -1, pTargetData, targetLen, nil, nil);//61 e6 88 91 62 00
Result := Copy(PTargetData, 1, Length(pTargetData));
FreeMem(pTargetData);
FreeMem(pUnicode);
end;
字符串形如
&XfJT0ZABkK5O9g-
这是一种UTF-7的编码,在进行处理时,需要将其首字母&改为+
变成
+XfJT0ZABkK5O9g-
然后进行转换,转换的代码如下。
1、C++代码
CString Convert(CString str, int sourceCodepage, int targetCodepage)
{
int len = str.GetLength();
int unicodeLen = MultiByteToWideChar(sourceCodepage, 0, str, -1, NULL, 0);
wchar_t *pUnicode = new wchar_t[unicodeLen + 1];
memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
MultiByteToWideChar(sourceCodepage, 0, str, -1, (LPWSTR)pUnicode, unicodeLen);//61 00 11 62 62 00 00 00
BYTE *pTargetData = NULL;
int targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char *)pTargetData, 0, NULL, NULL);
pTargetData = new BYTE[targetLen + 1];
memset(pTargetData, 0, targetLen + 1);
WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char *)pTargetData, targetLen, NULL, NULL);//61 e6 88 91 62 00
CString rt;
rt.Format("%s", pTargetData);
delete pUnicode;
delete pTargetData;
return rt;
}
2、DELPHI代码
function Convert(str: string; sourceCodepage: Cardinal; targetCodepage: Cardinal): string;
var
Len: Integer;
UnicodeLen: Integer;
pUnicode: PWideChar;
pTargetData: PChar;
targetLen: Integer;
begin
Len := Length(str);
UnicodeLen := MultiByteToWideChar(sourceCodepage, 0, PAnsiChar(str), -1, nil, 0);
GetMem(pUnicode, (UnicodeLen + 1) * SizeOf(WideChar));
ZeroMemory(pUnicode, (UnicodeLen + 1) * SizeOf(WideChar));
MultiByteToWideChar(sourceCodepage, 0, PAnsiChar(str), -1, pUnicode, unicodeLen);//61 00 11 62 62 00 00 00
pTargetData := nil;
targetLen := WideCharToMultiByte(targetCodepage, 0, PWideChar(pUnicode), -1, pTargetData, 0, nil, nil);
GetMem(pTargetData, targetLen + 1);
ZeroMemory(pTargetData, targetLen + 1);
WideCharToMultiByte(targetCodepage, 0, PWideChar(pUnicode), -1, pTargetData, targetLen, nil, nil);//61 e6 88 91 62 00
Result := Copy(PTargetData, 1, Length(pTargetData));
FreeMem(pTargetData);
FreeMem(pUnicode);
end;
相关文章推荐
- Python编码UNICODE GBK UTF-8字符集转换的正确姿势
- UNICODE(UTF-16)与UTF-8编码的相互转换
- 黑马程序员-Unicode,ISO-8859-1,GBK,UTF-8编码及相互转换
- 多字节MBCS与UTF-8、Unicode之间的转换
- [Python] 中文编码问题:raw_input输入、文件读取、变量比较等str、unicode、utf-8转换问题
- gb2312 和 unicode (UTF-8)间的编码转换
- c++字符集之间转换(UTF-8,UNICODE,Gb2312)
- JAVA字符编码系列二:Unicode,ISO-8859,GBK,UTF-8编码及相互转换
- unicode、utf-8、ansi的故事及其相互转换
- Unicode,ISO-8859,GBK,UTF-8编码及相互转换
- 也谈UTF-8转换为Unicode方法
- 在J2ME实现UTF-8转换为Unicode编码的方法
- UTF-8与Unicode字符的相互转换 与 16进制Unicode转换汉字
- Ansi、Unicode、UTF-8字符串之间的转换和写入文本文件
- 多字节与UTF-8、Unicode之间的转换
- [Python爬虫] 中文编码问题:raw_input输入、文件读取、变量比较等str、unicode、utf-8转换问题
- UTF-8和Unicode之间的转换!
- C++中GB2312、UTF-8、unicode 之间转换
- UTF-8 和 Unicode 的转换(存疑)
- Unicode转换String UTF-8