您的位置:首页 > 其它

统计UTF-8编码方式字符串中的符号个数

2016-02-18 16:12 218 查看
方法1:转载自:http://blog.csdn.net/chrisniu1984/article/details/7359908
int utf8_wchar_count(char *buffer, int len)
{
#define UTF8_ASCII(byte) ( ((unsigned char)(byte)>=0x00)&&((unsigned char)(byte)<=0x7F) )
#define UTF8_FIRST(byte) ( ((unsigned char)(byte)>=0xC0)&&((unsigned char)(byte)<=0xFD) )
#define UTF8_OTHER(byte) ( ((unsigned char)(byte)>=0x80)&&((unsigned char)(byte)<=0xBF) )

char *p = 0;
long count = 0;

if (!buffer || len <= 0)
{
return 0;
}

for(p=buffer; p<buffer+len; p++)
{
if (UTF8_ASCII(*p) || (UTF8_FIRST(*p)))
{
count++;
}
}

return count;
}


方法2:转载自:http://www.tuicool.com/articles/v67jay

int get_utf8_count(const std::string &input)
{
int length = 0;
for (size_t i = 0, len = 0; i != input.length(); i += len)
{
unsigned char byte = input[i];

if (byte >= 0xFC)
len = 6;
else if (byte >= 0xF8)
len = 5;
else if (byte >= 0xF0)
len = 4;
else if (byte >= 0xE0)
len = 3;
else if (byte >= 0xC0)
len = 2;
else
len = 1;

length ++;
}
return length;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: