您的位置:首页 > 其它

utf,unicode,ascii字符串之间的相互转换

2008-04-08 15:06 519 查看
utf,unicode,ascii字符串之间的相互转换



作者:飘飘白云



utf8 转换到 utf16

CStringW ConvertUTF8ToUTF16(const CStringA& utf8)
{
	int wLen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, 0, 0);

	CStringW buf;
	WCHAR*	dd = buf.GetBuffer(wLen);

	wLen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, dd, wLen);

	buf.ReleaseBuffer(wLen);

	return buf;
}




unicode 转换到 utf8



CStringA ConvertUnicodeToUTF8(const CStringW& unicode)
{
	// wide char to multi char
	int wLen = WideCharToMultiByte(CP_UTF8, 0, unicode, -1, NULL, 0, NULL, NULL);

	CStringA buf;
	char*  dd = buf.GetBuffer(wLen);

	WideCharToMultiByte(CP_UTF8, 0, unicode, -1, dd, wLen, NULL, NULL);

	buf.ReleaseBuffer(wLen);

	return buf;
}




unicode 转换到 ascii



CStringA ConvertUnicodeToAnsi(const CStringW& unicode)
{
	// wide char to multi char
	int wLen = WideCharToMultiByte(CP_ACP, 0, unicode, -1, NULL, 0, NULL, NULL);

	CStringA buf;
	char*  dd = buf.GetBuffer(wLen);

	WideCharToMultiByte(CP_ACP, 0, unicode, -1, dd, wLen, NULL, NULL);

	buf.ReleaseBuffer(wLen);

	return buf;
}






另有一个 utf16 转换到 utf8 的非调用 WideCharToMultiByte 的版本:



int ConvertUnicodeToUTF8(const wchar_t* src, int srcLen, unsigned char* dest, int destLen)
{
	int i = 0;
	int outputlen = 0;	/*bytes */
	char tchar;

	if (!src || !dest) {
		return outputlen;
	}

	for (i = 0; i < srcLen; i++) {
		if (outputlen >= destLen - 1) {

			//overflow detected
			break;
		}

		// 0xxxxxxx
		if (src[i] < 0x80) {
			dest[outputlen++] = (unsigned char)(src[i]);
		}

		// 110xxxxx 10xxxxxx
		else if (src[i] < 0x800) {
			dest[outputlen++] = (src[i] >> 6 | 0xc0);
			dest[outputlen++] = (src[i] & 0x3f | 0x80);
		}

		// 1110xxxx 10xxxxxx 10xxxxxx
		else if (src[i] > 0xd800 && src[i] < 0xdfff) {
			tchar = (((src[i] & 0x3c0) >> 6) + 1);
			dest[outputlen++] = (tchar >> 2 | 0xf0);
			dest[outputlen++] = ((tchar & 0x03 | 0x80) | (src[i] & 0x3e) >> 2);
		}
		else {
			dest[outputlen++] = (src[i] >> 12 | 0xe0);
			dest[outputlen++] = (src[i] >> 6 & 0x3f | 0x80);
			dest[outputlen++] = (src[i] & 0x3f | 0x80);
		}
	}

	dest[outputlen] = '/0';
	return outputlen;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: