utf_8转Unicode,char*(string)转BSTR,BSTR(unicode)转char*(string)
2017-05-19 14:29
176 查看
在进行上下位机通信的时候经常要和这些字符串类型打交道,这次来个彻底的总结。
1.utf_8转Unicode (但要注意自己机器的大小端问题)
wchar_t * utf_8ToUnicode(const char* szu8) {
if (NULL == szu8)
return NULL;
size_t sSize = strlen(szu8);
const unsigned char* p = (const unsigned char*) szu8;
wchar_t* dBuf = new wchar_t[sSize + 1];
unsigned char* des = (unsigned char*) dBuf;
memset(des, 0, sizeof(wchar_t) * (sSize + 1));
while (*p != NULL) {
if ((unsigned short) *p <= 0x7f) {
*des++ = *p;
*des++ = 0;
p++;
} else if ((unsigned short) *p <= 0xcf) {
unsigned char ch1 = *p++;
unsigned char ch2 = *p++;
*des++ = ch1 << 6 | (ch2 & 0x3f);
*des++ = ch1 >> 2 | 0x07;
} else if ((unsigned short) *p <= 0xef) {
unsigned char ch1 = *p++;
unsigned char ch2 = *p++;
unsigned char ch22 = ch2;
unsigned char ch3 = *p++;
*des++ = ch22 << 6 | (ch3 & 0x3f);
*des++ = ch1 << 4 | (ch2 >> 2 & 0x0f);
} else
break;
}
return dBuf;
}其实,BSTR和Unicode在存贮方式是一样的,都是两个字节保存一个字符(一个ASCII或一个汉字),因此,稍微包装下上面的函数就可以得到BSTR。
BSTR GetBSTRFromString(const char* inputStr) {
if (inputStr == NULL)
return NULL;
const wchar_t * pwidstr = utf_8ToUnicode(inputStr);
BSTR ret = ::SysAllocString((OLECHAR *) pwidstr);
delete[] pwidstr;
return ret;
}
BSTR GetBSTRFromString(std::string inputStr) {
if ("" == inputStr)
return NULL;
const wchar_t * pwidstr = utf_8ToUnicode(inputStr.c_str());
BSTR ret = ::SysAllocString((OLECHAR *) pwidstr);
delete[] pwidstr;
return ret;
}
2. BSTR(Unicode、wchar*)转char*
std::string BSTRToUtf_8(BSTR bstr) { //这里可以把BSTR换成wchar*
if(NULL == bstr)
return "";
size_t sSize = 0;
BSTR bstrr = bstr;
while (*bstrr++ != 0)
sSize++;
if (sSize == 0)
return NULL;
char* dBuf = new char[3 * sSize + 1];
unsigned char* des = (unsigned char*) dBuf;
memset(des, 0, 3 * sSize + 1);
while (*bstr != NULL) {
if (*bstr <= 0x7f) {
unsigned char* p = (unsigned char*) bstr;
unsigned char ch2 = *p++;
unsigned char ch1 = *p++; //high byte
bstr++;
*des++ = ch2;
} else if (*bstr <= 0x7ff) {
unsigned char* p = (unsigned char*) bstr;
unsigned char ch2 = *p++; //xxxx xxxx
unsigned char ch1 = *p++; //0000 0xxx
unsigned char ch22 = ch2;
bstr++;
*des++ = (ch1 << 2 & 0xdf) | ch2 >> 6;
*des++ = ch22 & 0xbf;
} else {
unsigned char* p = (unsigned char*) bstr;
unsigned char ch2 = *p++;
unsigned char ch1 = *p++; //high byte
unsigned char ch11 = ch1;
unsigned char ch22 = ch2;
bstr++;
*des++ = (ch1 >> 4) | 0xe0;
*des++ = (ch11 << 2 & 0xbc) | 0x80 | ch2 >> 6;
*des++ = ch22 & 0xbf | 0x80;
}
}
std::string rtStr = dBuf;
delete [] dBuf;
return rtStr;
}
1.utf_8转Unicode (但要注意自己机器的大小端问题)
wchar_t * utf_8ToUnicode(const char* szu8) {
if (NULL == szu8)
return NULL;
size_t sSize = strlen(szu8);
const unsigned char* p = (const unsigned char*) szu8;
wchar_t* dBuf = new wchar_t[sSize + 1];
unsigned char* des = (unsigned char*) dBuf;
memset(des, 0, sizeof(wchar_t) * (sSize + 1));
while (*p != NULL) {
if ((unsigned short) *p <= 0x7f) {
*des++ = *p;
*des++ = 0;
p++;
} else if ((unsigned short) *p <= 0xcf) {
unsigned char ch1 = *p++;
unsigned char ch2 = *p++;
*des++ = ch1 << 6 | (ch2 & 0x3f);
*des++ = ch1 >> 2 | 0x07;
} else if ((unsigned short) *p <= 0xef) {
unsigned char ch1 = *p++;
unsigned char ch2 = *p++;
unsigned char ch22 = ch2;
unsigned char ch3 = *p++;
*des++ = ch22 << 6 | (ch3 & 0x3f);
*des++ = ch1 << 4 | (ch2 >> 2 & 0x0f);
} else
break;
}
return dBuf;
}其实,BSTR和Unicode在存贮方式是一样的,都是两个字节保存一个字符(一个ASCII或一个汉字),因此,稍微包装下上面的函数就可以得到BSTR。
BSTR GetBSTRFromString(const char* inputStr) {
if (inputStr == NULL)
return NULL;
const wchar_t * pwidstr = utf_8ToUnicode(inputStr);
BSTR ret = ::SysAllocString((OLECHAR *) pwidstr);
delete[] pwidstr;
return ret;
}
BSTR GetBSTRFromString(std::string inputStr) {
if ("" == inputStr)
return NULL;
const wchar_t * pwidstr = utf_8ToUnicode(inputStr.c_str());
BSTR ret = ::SysAllocString((OLECHAR *) pwidstr);
delete[] pwidstr;
return ret;
}
2. BSTR(Unicode、wchar*)转char*
std::string BSTRToUtf_8(BSTR bstr) { //这里可以把BSTR换成wchar*
if(NULL == bstr)
return "";
size_t sSize = 0;
BSTR bstrr = bstr;
while (*bstrr++ != 0)
sSize++;
if (sSize == 0)
return NULL;
char* dBuf = new char[3 * sSize + 1];
unsigned char* des = (unsigned char*) dBuf;
memset(des, 0, 3 * sSize + 1);
while (*bstr != NULL) {
if (*bstr <= 0x7f) {
unsigned char* p = (unsigned char*) bstr;
unsigned char ch2 = *p++;
unsigned char ch1 = *p++; //high byte
bstr++;
*des++ = ch2;
} else if (*bstr <= 0x7ff) {
unsigned char* p = (unsigned char*) bstr;
unsigned char ch2 = *p++; //xxxx xxxx
unsigned char ch1 = *p++; //0000 0xxx
unsigned char ch22 = ch2;
bstr++;
*des++ = (ch1 << 2 & 0xdf) | ch2 >> 6;
*des++ = ch22 & 0xbf;
} else {
unsigned char* p = (unsigned char*) bstr;
unsigned char ch2 = *p++;
unsigned char ch1 = *p++; //high byte
unsigned char ch11 = ch1;
unsigned char ch22 = ch2;
bstr++;
*des++ = (ch1 >> 4) | 0xe0;
*des++ = (ch11 << 2 & 0xbc) | 0x80 | ch2 >> 6;
*des++ = ch22 & 0xbf | 0x80;
}
}
std::string rtStr = dBuf;
delete [] dBuf;
return rtStr;
}
相关文章推荐
- String~char~Unicode~UTF-8
- unicode CString string char 等类型字符串转换
- cocos2d-x类型转换(CCstring int string char UTF-8互转)
- cocos2d-x类型转换(CCstring int string char UTF-8互转)
- 2dx学习笔记-----类型转换(CCstring int string char UTF-8互转)
- C++文件操作——Unicode下有关字符类型转换(CString,string,char*)
- 在Python中使用protobuf2.6.1 string format utf-8 and unicode error
- Unicode下wstring(wchar_t*)和string(char*)互相转换
- cocos2d-x类型转换(CCstring int string char UTF-8互转)以及字符串详解
- Qt QString 中文 char* UTF-8 QByteArray QTextCodec unicode gb2312 GBK 乱码与转码问题
- CString,int,string,char,BSTR之间的转换
- 关于char, wchar_t, string, wstring, unicode编码问题
- cocos2d-x类型转换(CCstring int string char UTF-8互转)
- cocos2d-x类型转换(CCstring int string char UTF-8互转)
- UnicodeToMultiByte,ConvertBSTRToString,std::string,CString的区别
- ring3 char 字符数组 传入ring0后 转化为UNICODE_STRING处理
- CString,int,string,char,BSTR之间的转换
- 如何将String转换为unicode形式的char(即/uxxxx)?
- ANSI与UTF-8编码互相转换,CString,std::string,char*,const char*互相转换
- CString,int,string,char,BSTR之间的转换