您的位置:首页 > 其它

utf8与ansi之间的转换

2012-08-16 08:08 344 查看
// file.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <iostream>
#include <string>
#include <vector>
#include <fstream>
#include <windows.h>
#include <boost/filesystem.hpp>

namespace fs = boost::filesystem;

const int COUNT = 1024;
const std::string ret_success = "success";
const std::string ret_src_open = "error:源文件打开错误";
const std::string ret_dst_open = "error:目标文件打开错误";
const std::string ret_over_line = "error:文件中某行字符数过大";
const std::string ret_type_convert = "error:转换类型不正确";

typedef enum { UTF8_ANSI, ANSI_UTF8} TYPE_CONVERT; // 转换类型
std::string utf8_ansi(std::string utf8_path, std::string ansi_path); // UTF8转换为ANSI
std::string ansi_utf8(std::string ansi_path, std::string utf8_path); // ANSI转换为UTF8
int get_filenames(const std::string& dir, std::vector<std::string>& filenames); // 获取目录下的所有文件名
void code_convert(const std::string& src_dir,
const std::string& dst_dir,
TYPE_CONVERT type_convert,
std::string expanded_names = ".h.cpp.txt"); // 转换目录下所有指定扩展名的文件,路径用"//"

int _tmain(int argc, _TCHAR* argv[])
{

system("pause");
return 0;
}

std::string utf8_ansi(std::string utf8_path, std::string ansi_path)
{
std::string ret = ret_success;
std::fstream fsrc(utf8_path.c_str(), std::fstream::in);
std::fstream fdst(ansi_path.c_str(), std::fstream::out | std::fstream::trunc);
if (!fsrc.is_open())
{
return ret_src_open;
}

if (!fdst.is_open())
{
fsrc.close();
return ret_dst_open;
}

std::string line;
wchar_t wstr[COUNT];
char src_path[COUNT<<1];
bool is_first_line = true;
while (!fsrc.eof())
{
std::getline(fsrc, line);
//printf("0x%02x, 0x%02x", line[0], line[1]);
if (line.size() >= (COUNT<<3)) // UTF8一个汉字需要用3个字节表示
{
ret = ret_over_line;
break;
}
::ZeroMemory(wstr, sizeof(wstr));
::ZeroMemory(src_path, sizeof(src_path));
::MultiByteToWideChar(CP_UTF8, 0, line.c_str(), line.size(), wstr, COUNT);
::WideCharToMultiByte(CP_ACP, 0, wstr, wcslen(wstr), src_path, COUNT<<1, 0, 0);
int str_len = strlen(src_path);
src_path[str_len] = '\n';
if (is_first_line && (0x3F==src_path[0])) // 跳过第一行的第一个字符
{
fdst.write(src_path+1, str_len);
is_first_line = false;
}
else
{
fdst.write(src_path, str_len+1);
}
}

fsrc.close();
fdst.close();
return ret;
}

std::string ansi_utf8(std::string ansi_path, std::string utf8_path)
{
std::string ret = ret_success;
std::fstream fsrc(ansi_path.c_str(), std::fstream::in);
std::fstream fdst(utf8_path.c_str(), std::fstream::out | std::fstream::trunc);
if (!fsrc.is_open())
{
return ret_src_open;
}

if (!fdst.is_open())
{
fsrc.close();
return ret_dst_open;
}

unsigned char head[3] = {0xEF, 0xBB, 0xBF};
fdst.write((char*)head, 3); // UTF8文件头

std::string line;
wchar_t wstr[COUNT];
char str[COUNT<<1];
while (!fsrc.eof())
{
std::getline(fsrc, line);
if (line.size() >= COUNT)
{
ret = ret_over_line;
break;
}

::ZeroMemory(str, sizeof(str));
::ZeroMemory(wstr, sizeof(wstr));
::MultiByteToWideChar(CP_ACP, 0, line.c_str(), line.size(), wstr, COUNT);
::WideCharToMultiByte(CP_UTF8, 0, wstr, wcslen(wstr), str, COUNT<<1, 0, 0);
int len = strlen(str);
str[len] = '\n';
fdst.write(str, len+1);
}

fsrc.close();
fdst.close();
return ret;
}

int get_filenames(const std::string& dir, std::vector<std::string>& filenames)
{
fs::path path(dir);
if (!fs::exists(path))
{
return -1;
}

fs::directory_iterator end_iter;
for (fs::directory_iterator iter(path); iter!=end_iter; ++iter)
{
if (fs::is_regular_file(iter->status()))
{
filenames.push_back(iter->path().string());
}

if (fs::is_directory(iter->status()))
{
get_filenames(iter->path().string(), filenames);
}
}

return filenames.size();
}

void code_convert(const std::string& src_dir,
const std::string& dst_dir,
TYPE_CONVERT type_convert,
std::string expanded_names)
{
int pos, first, second;
std::string src_path;
std::string src_right;
std::string dst_path;
std::vector<std::string> vs_expnames;
std::vector<std::string> src_filenames;
std::vector<std::string>::iterator iter1, iter2;

if (UTF8_ANSI!=type_convert && ANSI_UTF8!=type_convert)
{
std::cout << ret_type_convert << std::endl;
return;
}

if (get_filenames(src_dir, src_filenames) > 0)
{
// 获取所有扩展名
while ((first = expanded_names.find('.')) != expanded_names.npos)
{
second = expanded_names.find('.', first+1);
if (second != expanded_names.npos)
{
vs_expnames.push_back(expanded_names.substr(first, second-first));
expanded_names = expanded_names.substr(second);
}
else
{
vs_expnames.push_back(expanded_names.substr(first));
break;
}
}

int num = 0;
for (iter1=src_filenames.begin(); iter1!=src_filenames.end(); ++iter1)
{
// 获取目标文件路径
src_path = (*iter1);
pos = src_path.find(src_dir);
src_right = src_path.substr(pos+src_dir.size(), src_path.size()-pos-src_dir.size());
dst_path = dst_dir + src_right;

// 转换指定扩展名文件
for (iter2=vs_expnames.begin(); iter2!=vs_expnames.end(); ++iter2)
{
if (dst_path.substr(dst_path.size()-(*iter2).size()) == (*iter2))
{
std::cout << ++num << ": " << src_path << " => " << dst_path << std::endl;

std::cout << ((UTF8_ANSI==type_convert) ?
utf8_ansi(src_path, dst_path) :
ansi_utf8(src_path, dst_path)) << std::endl;
break;
}
}
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: