您的位置:首页 > 移动开发 > IOS开发

fstream类读取UTF-8、Unicode和ANSI文本文档乱码问题的解决方案

2011-12-13 23:40 435 查看
 
1、解决UTF-8类型的文本文档中文乱码读取(思路:将UTF-8转成Unicode然后再转ANSI)

#include <fstream>
#include <iostream>
#include <string>
// #include <afx.h>
#include <Windows.h>

//changeTextFromUtf8ToAnsi读取UTF-8格式的文件并将之保存为ANSI格式的文件

void changeTextFromUtf8ToAnsi(const char* filename)
{
ifstream infile;string strLine="";string strResult="";
infile.open(filename);
if (infile)
{
while(!infile.eof()){
getline(infile,strLine);
strResult+=strLine+"\n";
}
}
infile.close();
char* changeTemp=new char[strResult.length()];
strcpy(changeTemp,strResult.c_str());
char* changeResult=changeTxtEncoding(changeTemp);
strResult=changeResult;

ofstream outfile;
outfile.open("I:\\ANSI.txt");
outfile.write(strResult.c_str(),strResult.length());
outfile.flush();
outfile.close();
}

//changeTxtEncoding修改字符串的编码

char* changeTxtEncoding(char* szU8){
int wcsLen = ::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), NULL, 0);
wchar_t* wszString = new wchar_t[wcsLen + 1];
::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), wszString, wcsLen);
wszString[wcsLen] = '\0';
cout<<wszString<<endl;

int ansiLen = ::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), NULL, 0, NULL, NULL);
char* szAnsi = new char[ansiLen + 1];
::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), szAnsi, ansiLen, NULL, NULL);
szAnsi[ansiLen] = '\0';
return szAnsi;
}


2、解决Unicode类型的文本文档中文乱码读取(此方法经测试不可用于打开ANSI的文本文档)

string ws2s(const std::wstring& ws)和string readTxt(char* filename)函数转帖自CSDN,readTxt函数由本人进行了一个小小的bug修正,从而可以避免末字符重复出现的问题;

// fstream中文乱码解决方案.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <fstream>
#include <iostream>
#include <string>
using namespace std;
string ws2s(const std::wstring& ws);
string readTxt(char* filename);
int _tmain(int argc, _TCHAR* argv[])
{
readTxt("E:\\testUnicode.txt");
}

std::string ws2s(const std::wstring& ws)
{
std::string curLocale = setlocale(LC_ALL, NULL); // curLocale = "C";
setlocale(LC_ALL, "chs");
const wchar_t* _Source = ws.c_str();
size_t _Dsize = 2 * ws.size() + 1;
char *_Dest = new char[_Dsize];
memset(_Dest,0,_Dsize);
wcstombs(_Dest,_Source,_Dsize);
std::string result = _Dest;
delete []_Dest;
setlocale(LC_ALL, curLocale.c_str());
return result;
}

std::string readTxt(char* filename) {
ifstream fin;
fin.open(filename, ios::binary);
size_t index = 2;
std::string strRet;
std::string strLineAnsi;
std::wstring wstrLine;

while (!fin.eof())
{
fin.seekg(index, ios::beg);
wchar_t wch;
fin.read((char *)(&wch), 2);
if (wch == 0x000D) // 判断回车
{
strLineAnsi = ws2s(wstrLine);
wstrLine.erase(0, wstrLine.size() + 1);
index += 2; // 跳过回车符和行开头符
strRet = strRet + strLineAnsi;
}
else
{
wstrLine.append(1, wch);
index += 2;
}
}
strLineAnsi = ws2s(wstrLine);
strRet = strRet + strLineAnsi;
fin.close();
//if语句解决文本段末有回车导致最后字符集重复的问题
if (strLineAnsi!="")
{
strRet=strRet.substr(0,strRet.length()-1);
}
printf("%s", strRet.c_str());
return strRet;
}


3、解决ANSI类型的文本文档中文乱码读取(此方法经测试不可用于打开Unicode的文本文档)

#include "stdafx.h"
#include <fstream>
#include <iostream>
#include <string>
using namespace std;
int _tmain(int argc, _TCHAR* argv[])
{
ifstream infile;
string strResult="";
string strTemp="";
infile.open("E:\\testANSI.txt");
if (infile)
{
while(!infile.eof())
{
getline(infile,strTemp);
strResult+=strTemp;
}
}
infile.close();
cout<<strResult;
}

 

 

 

原文:http://hi.baidu.com/shi074185/blog/item/4124976224fb90cf8db10d18.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息