利用 libiconv 实现汉字编码 utf-8 格式 和 gbk格式的相互转换
2016-06-23 11:16
615 查看
参考文章:http://jimmee.iteye.com/blog/2174693
关于windows上编译libiconv的库,请参见:http://www.cnblogs.com/tangxin-blog/p/5608751.html
完整工程demo:http://download.csdn.net/detail/tangxin19930330/9557218
关于windows上编译libiconv的库,请参见:http://www.cnblogs.com/tangxin-blog/p/5608751.html
#include <stdio.h> #include <string.h> #include <stdint.h> #include <stdlib.h> #include "iconv.h" #define MAX_BUF_SIZE 1024 int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen, char *outbuf, size_t outlen) { iconv_t cd; char **pin = &inbuf; char **pout = &outbuf; cd = iconv_open(to_charset, from_charset); if (cd == 0) return -1; memset(outbuf, 0, outlen); if (iconv(cd, pin, &inlen, pout, &outlen) == -1) return -1; iconv_close(cd); *pout = '\0'; return 0; } int utf8_to_gbk(char *inbuf, size_t inlen, char *outbuf, size_t outlen) { return code_convert("utf-8", "gbk", inbuf, inlen, outbuf, outlen); } int gbk_to_utf8(char *inbuf, size_t inlen, char *outbuf, size_t outlen) { return code_convert("gbk", "utf-8", inbuf, inlen, outbuf, outlen); } void read_file(char buf[], const int32_t max_buf_size, const char *file_name) { FILE * pFile; long lSize; size_t result; fopen_s(&pFile, file_name, "rb"); if (pFile == NULL) { fputs("File error\n", stderr); exit(1); } // obtain file size: fseek(pFile, 0, SEEK_END); lSize = ftell(pFile); rewind(pFile); if (lSize >= max_buf_size){ fputs("file too large\n", stderr); exit(1); } result = fread(buf, 1, lSize, pFile); if (result != lSize) { fputs("Reading error\n", stderr); exit(3); } fclose(pFile); } //将gbk编码的str分隔成一个一个的字符,并判断是否是汉字,并输出编码,包括简体和繁体 void GetToken(const char *str) { int32_t i = 0; int32_t len = strlen(str); short high, low; uint32_t code; char cstr[3]; for (; i < len; ++i) { if (str[i] >= 0 || i == len - 1) { printf("%c >> no\n", str[i]); //ASCII字符 } else { // 计算编码 high = (short)str[i] + 256; low = (short)str[i + 1] + 256; code = high * 256 + low; //获取字符 cstr[0] = str[i]; cstr[1] = str[i + 1]; cstr[2] = 0; i++; printf("%s >> 0x%x", cstr, code); if ((code >= 0xB0A1 && code <= 0xF7FE) || (code >= 0x8140 && code <= 0xA0FE) || (code >= 0xAA40 && code <= 0xFEA0)) { printf(" yes\n"); } else { printf(" no\n"); } } } } int main(int argc, char *argv[]) { char in_buf[MAX_BUF_SIZE] = { 0 }, out_buf[MAX_BUF_SIZE] = { 0 }; read_file(in_buf, MAX_BUF_SIZE, "chinese_gbk.txt"); printf("%s\n", in_buf); GetToken(in_buf); read_file(in_buf, MAX_BUF_SIZE, "chinese_utf8.txt"); printf("%s\n", in_buf); GetToken(in_buf); utf8_to_gbk(in_buf, strlen(in_buf), out_buf, MAX_BUF_SIZE); printf("%s\n", out_buf); GetToken(out_buf); getchar(); return 0; }
完整工程demo:http://download.csdn.net/detail/tangxin19930330/9557218
相关文章推荐
- iOS--优秀博客记录
- 第二次冲刺阶段08
- java 环境配置 JSP+TOMCAT+MYS…
- 自动配置icon和launchImage不同尺寸的插件RTImageAssets
- java.lang.NoClassDefFoundError: …
- [2011年10月06日] Steve Jobs 1955…
- 2011年09月29日
- 字典树
- 知识管理
- oracle编程、操作不良习惯总结
- Linux下查看文件和文件夹大小
- python学习手册总结1
- 【转】《职来职往》杨石头语…
- MYSQL出错代码
- java enum(枚举)使用详解 + 总结
- ROUTE
- ANT 标签
- 常见水果/蔬菜/植物 英语词汇大全
- SSH 题目整理
- 高仿微博