您的位置:首页 > 其它

指定存储文件的编码格式(下)

2015-01-25 20:18 435 查看
指定存储文件编码格式共两篇。

此为下篇。

对于上篇的内容进行了优化,这样大大提高了代码的通用性和重用性。

有兴趣的可以都下载下来对比参考。

// test__EncodeAFileToEncodeBFile.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <windows.h>
#include <string>
#include <iostream>

#ifndef IN
#define IN
#endif

#ifndef OUT
#define OUT
#endif

#ifndef INOUT
#define INOUT
#endif

#ifndef OPTION
#define OPTION
#endif

#define UTF8_SIGN 3
#define UTF16_SIGN 2
#define FILE_HEADER 6

enum FileEncodeType
{
OTHER = 0,
UTF8,
UTF8_NO_BOM,
UTF16LE,
UTF16LE_NO_BOM,
UTF16BE,
UTF16BE_NO_BOM
};

//************************************
// Method:    IsUTF8EncodeText
// FullName:  IsUTF8EncodeText
// Access:    public
// Returns:   BOOL
// Qualifier:判断输入内容是否时UTF8编码格式(可以判断不带BOM的UTF8编码)
// Parameter: BYTE * lpText:判断是否时UTF8的内容
// Parameter: INT cchText:UTF8的内容长度(按个数)
//************************************
BOOL IsUTF8EncodeText(IN CONST BYTE* lpText, IN INT cchText)
{
//    UTF8判断规则
//  0000 0000 - 0000 007F 0xxxxxxx
//    0000 0080 - 0000 07FF 110xxxxx 10xxxxxx
//    0000 0800 - 0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
//    0001 0000 - 001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
//    0020 0000 - 03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
//    0400 0000 - 7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx

int i;
DWORD nBytes = 0;//UFT8可用1-6个字节编码,ASCII用一个字节
BYTE lpByte;
BOOL bAllAscii = TRUE; //如果全部都是ASCII, 说明不是UTF-8
for (i = 0; i < cchText; i++)
{
lpByte = *(lpText + i);
if ((lpByte & 0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8,ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx
bAllAscii = FALSE;
if (nBytes == 0) //如果不是ASCII码,应该是多字节符,计算字节数
{
if (lpByte >= 0x80)
{
if (lpByte >= 0xFC && lpByte <= 0xFD)
nBytes = 6;
else if (lpByte >= 0xF8)
nBytes = 5;
else if (lpByte >= 0xF0)
nBytes = 4;
else if (lpByte >= 0xE0)
nBytes = 3;
else if (lpByte >= 0xC0)
nBytes = 2;
else
{
return FALSE;
}
nBytes--;
}
}
else //多字节符的非首字节,应为 10xxxxxx
{
if ((lpByte & 0xC0) != 0x80)
{
return FALSE;
}
nBytes--;
}
}
if (nBytes > 0) //违返规则
{
return FALSE;
}
if (bAllAscii) //如果全部都是ASCII, 说明不是UTF-8
{
return FALSE;
}

return TRUE;
}

//************************************
// Method:    GetEncodeType
// FullName:  GetEncodeType
// Access:    public
// Returns:   FileEncodeType
// Qualifier:识别指定文件编码
// Parameter: IN CONST LPTSTR lpFileName:指定文件名称
// *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
//************************************
FileEncodeType GetEncodeType(IN CONST LPTSTR lpFileName)
{
FileEncodeType lFileEncodeType = OTHER;
HANDLE hFile = NULL;
LPBYTE lpFileHeader = NULL;
INT cbFileHeader = FILE_HEADER;
INT cchFileHeader = FILE_HEADER;

hFile = ::CreateFile(lpFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (INVALID_HANDLE_VALUE == hFile)
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
return lFileEncodeType;
}

lpFileHeader = (BYTE *)malloc(cbFileHeader);
if (NULL == lpFileHeader)
{
printf_s("malloc error\n");
::CloseHandle(hFile);
return lFileEncodeType;
}
ZeroMemory(lpFileHeader, cbFileHeader);

SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
if (FALSE == ReadFile(hFile, lpFileHeader, FILE_HEADER, NULL, NULL))
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
free(lpFileHeader);
::CloseHandle(hFile);
return lFileEncodeType;
}

if (0xef == lpFileHeader[0] && 0xbb == lpFileHeader[1] && 0xbf == lpFileHeader[2])
{
free(lpFileHeader);
::CloseHandle(hFile);
return lFileEncodeType = UTF8;
}
else if (0xff == lpFileHeader[0] && 0xfe == lpFileHeader[1])
{
free(lpFileHeader);
::CloseHandle(hFile);
return lFileEncodeType = UTF16BE;
}
else if (0xfe == lpFileHeader[0] && 0xff == lpFileHeader[1])
{
free(lpFileHeader);
::CloseHandle(hFile);
return lFileEncodeType = UTF16LE;
}
else
{
free(lpFileHeader);

cbFileHeader = SetFilePointer(hFile, 0, NULL, FILE_END);
if (INVALID_SET_FILE_POINTER == cbFileHeader)
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
::CloseHandle(hFile);
return lFileEncodeType = OTHER;
}

lpFileHeader = (BYTE *)malloc(cbFileHeader);
if (NULL == lpFileHeader)
{
printf_s("malloc error\n");
::CloseHandle(hFile);
return lFileEncodeType = OTHER;
}
ZeroMemory(lpFileHeader, cbFileHeader);

SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
if (FALSE == ReadFile(hFile, lpFileHeader, cbFileHeader, NULL, NULL))
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
free(lpFileHeader);
::CloseHandle(hFile);
return lFileEncodeType = OTHER;
}

if (TRUE == IsUTF8EncodeText(lpFileHeader, cbFileHeader))
{
free(lpFileHeader);
::CloseHandle(hFile);
return lFileEncodeType = UTF8_NO_BOM;
}

lFileEncodeType = OTHER;
}

free(lpFileHeader);
::CloseHandle(hFile);
return lFileEncodeType;
}

//************************************
// Method:    GetEncodeType
// FullName:  GetEncodeType
// Access:    public
// Returns:   FileEncodeType
// Qualifier:识别指定字节流编码
// Parameter: IN CONST BYTE * lpBytes:指定字节流
// Parameter: IN CONST INT cchBytes:指定字节流长度
// *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
//************************************
FileEncodeType GetEncodeType(IN CONST BYTE *lpBytes, IN CONST INT cchBytes)
{
if (0xef == lpBytes[0] && 0xbb == lpBytes[1] && 0xbf == lpBytes[2])
{
return UTF8;
}
else if (0xff == lpBytes[0] && 0xfe == lpBytes[1])
{
return UTF16LE;
}
else if (0xfe == lpBytes[0] && 0xff == lpBytes[1])
{
return UTF16BE;
}
else
{
if (TRUE == IsUTF8EncodeText(lpBytes, cchBytes))
{
return UTF8_NO_BOM;
}
}

return OTHER;
}

//************************************
// Method:    Utf16leAndUtf16beTransfers
// FullName:  Utf16leAndUtf16beTransfers
// Access:    public
// Returns:   BOOL
// Qualifier:相互转换UTF16LE 和 UTF16BE
// Parameter: INOUT LPBYTE lpUtf16String:指定UTF16编码字节流
// Parameter: IN CONST INT cchUtf16String:指定UTF16编码字节流长度
//************************************
BOOL Utf16leAndUtf16beTransfers(INOUT LPBYTE lpUtf16String, IN CONST INT cchUtf16String)
{
if (NULL == lpUtf16String || cchUtf16String < 0)
{
return FALSE;
}

for (INT i = 0; i < cchUtf16String; i += 2)//每两值交换
{
lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1];
lpUtf16String[i + 1] = lpUtf16String[i + 1] ^ lpUtf16String[i];
lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1];
}

return TRUE;
}

//************************************
// Method:    Utf8ToUtf8NoBOM
// FullName:  Utf8ToUtf8NoBOM
// Access:    public
// Returns:   BOOL
// Qualifier:UTF8编码转换到UTF8 without BOM编码
// Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串
// Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度
// Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串
// Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度
//************************************
BOOL Utf8ToUtf8NoBOM(IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String, INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString)
{
if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0)
{
return FALSE;
}
if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN)
{
return FALSE;
}

::CopyMemory(lpUtf8NoBOMString, lpUtf8String + UTF8_SIGN, cchUtf8String - UTF8_SIGN);

return TRUE;
}
//************************************
// Method:    Utf8NoBOMToUtf8
// FullName:  Utf8NoBOMToUtf8
// Access:    public
// Returns:   BOOL
// Qualifier:UTF8 without BOM编码转换到UTF8编码
// Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串
// Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度
// Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串
// Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度
//************************************
BOOL Utf8NoBOMToUtf8(INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString, IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String)
{
if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0)
{
return FALSE;
}
if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN)
{
return FALSE;
}

lpUtf8String[0] = 0xef;
lpUtf8String[1] = 0xbb;
lpUtf8String[2] = 0xbf;
::CopyMemory(lpUtf8String + UTF8_SIGN, lpUtf8NoBOMString, cchUtf8NoBOMString);

return TRUE;
}

//************************************
// Method:    BYTETOUTF8
// FullName:  BYTETOUTF8
// Access:    public
// Returns:   BOOL
// Qualifier:指定编码转换为UTF8(或without BOM)编码
// Parameter: IN CONST LPBYTE lpBytes:指定需要转换的字节流
// Parameter: IN CONST INT cbBytes:指定需要转换的字节流长度(字节单位)
// Parameter: IN CONST FileEncodeType tpBytes:指定需要转换的字节流的实际编码格式
// Parameter: INOUT LPSTR lpText:指定输出的编码流
// Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位)
// Parameter: IN CONST BOOL bWithBOM:指定输出的编码是否有BOM
//************************************
BOOL BYTETOUTF8(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPSTR lpText, INOUT LPINT lpcbText, IN CONST BOOL bWithBOM = TRUE)
{
BOOL bResult = FALSE;
LPSTR lpSourceA = NULL;
INT cbSourceA = 0;
INT cchSourceA = 0;
LPWSTR lpSourceW = NULL;
INT cbSourceW = 0;
INT cchSourceW = 0;

if (NULL == lpBytes ||
NULL == lpText ||
!(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) ||
cbBytes < 0 ||
NULL == lpcbText)
{
return FALSE;
}

switch (tpBytes)
{
case UTF8_NO_BOM:
{
if (FALSE == bWithBOM)
{
return FALSE;
}
if (*lpcbText < cbBytes + UTF8_SIGN)
{
return FALSE;
}

cbSourceA = cbBytes;
cchSourceA = cbSourceA;
lpSourceA = (CHAR *)malloc(cbSourceA);
if (NULL == lpSourceA)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpSourceA, cbSourceA);
CopyMemory(lpSourceA, lpBytes, cbSourceA);

bResult = Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpText, *lpcbText);
free(lpSourceA);
}
break;
case UTF8:
{
if (TRUE == bWithBOM)
{
return FALSE;
}
if (*lpcbText < cbBytes - UTF8_SIGN)
{
return FALSE;
}

cbSourceA = cbBytes;
cchSourceA = cbSourceA;
lpSourceA = (CHAR *)malloc(cbSourceA);
if (NULL == lpSourceA)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpSourceA, cbSourceA);
CopyMemory(lpSourceA, lpBytes, cbSourceA);

bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
free(lpSourceA);
}
break;
case UTF16LE:
{
cbSourceW = cbBytes;
cchSourceW = cbSourceW / sizeof(WCHAR);
lpSourceW = (WCHAR *)malloc(cbSourceW);
if (NULL == lpSourceW)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpSourceW, cbSourceW);
::CopyMemory(lpSourceW, lpBytes, cbBytes);

*lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小

if (TRUE == bWithBOM)
{
if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL))
{
free(lpSourceW);
return FALSE;
}
bResult = TRUE;
}
else
{
cbSourceA = *lpcbText;
cchSourceA = cbSourceA;
lpSourceA = (CHAR *)malloc(cbSourceA);
if (NULL == lpSourceA)
{
printf_s("malloc error\n");
free(lpSourceW);
return FALSE;
}
ZeroMemory(lpSourceA, cbSourceA);
CopyMemory(lpSourceA, lpBytes, cbSourceA);
if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL))
{
free(lpSourceW);
free(lpSourceA);
return FALSE;
}
bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
*lpcbText -= UTF8_SIGN;
free(lpSourceA);
}

free(lpSourceW);
}
break;
case UTF16BE:
{
LPBYTE lpBytesTemp = NULL;
INT cbBytesTemp = 0;
INT cchBytesTemp = 0;

cbBytesTemp = cbBytes;
lpBytesTemp = (BYTE *)malloc(cbBytesTemp);
if (NULL == lpBytesTemp)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpBytesTemp, cbBytesTemp);
::CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp);

if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp))
{
free(lpBytesTemp);
return FALSE;
}

cbSourceW = cbBytes;
cchSourceW = cbSourceW / sizeof(WCHAR);
lpSourceW = (WCHAR *)malloc(cbSourceW);
if (NULL == lpSourceW)
{
printf_s("malloc error\n");
free(lpBytesTemp);
return FALSE;
}
ZeroMemory(lpSourceW, cbSourceW);
::CopyMemory(lpSourceW, lpBytesTemp, cbSourceW);
free(lpBytesTemp);
lpBytesTemp = NULL;

*lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小

if (TRUE == bWithBOM)
{
if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL))
{
free(lpSourceW);
return FALSE;
}
bResult = TRUE;
}
else
{
cbSourceA = *lpcbText;
cchSourceA = cbSourceA;
lpSourceA = (CHAR *)malloc(cbSourceA);
if (NULL == lpSourceA)
{
printf_s("malloc error\n");
free(lpSourceW);
return FALSE;
}
ZeroMemory(lpSourceA, cbSourceA);
CopyMemory(lpSourceA, lpBytes, cbSourceA);
if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL))
{
free(lpSourceW);
free(lpSourceA);
return FALSE;
}
bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
*lpcbText -= UTF8_SIGN;
free(lpSourceA);
}

free(lpSourceW);
}
break;
default:
return FALSE;
}

return bResult;
}

//************************************
// Method:    BYTETOUTF16LE
// FullName:  BYTETOUTF16LE
// Access:    public
// Returns:   BOOL
// Qualifier:指定编码流转换为UTF16LE编码格式
// Parameter: IN CONST LPBYTE lpBytes:指定的编码流
// Parameter: IN CONST INT cbBytes:指定的编码流长度(字节单位)
// Parameter: IN CONST FileEncodeType tpBytes:指定的编码流的实际编码格式
// Parameter: INOUT LPWSTR lpText:指定输出的编码流
// Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位)
//************************************
BOOL BYTETOUTF16LE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText)
{
BOOL bResult = FALSE;
LPSTR lpSourceA = NULL;
INT cbSourceA = 0;
INT cchSourceA = 0;
LPWSTR lpSourceW = NULL;
INT cbSourceW = 0;
INT cchSourceW = 0;

if (NULL == lpBytes ||
NULL == lpText ||
!(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) ||
cbBytes < 0 ||
NULL == lpcbText)
{
return FALSE;
}

switch (tpBytes)
{
case UTF8_NO_BOM:
{
LPSTR lpSourceATemp = NULL;
INT cbSourceATemp = 0;
INT cchSourceATemp = 0;

if (*lpcbText < cbBytes + UTF8_SIGN)
{
return FALSE;
}

cbSourceA = cbBytes;
cchSourceA = cbSourceA;
lpSourceA = (CHAR *)malloc(cbSourceA);
if (NULL == lpSourceA)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpSourceA, cbSourceA);
CopyMemory(lpSourceA, lpBytes, cbSourceA);

cbSourceATemp = cbBytes + UTF8_SIGN;
cchSourceATemp = cbSourceATemp;
lpSourceATemp = (CHAR *)malloc(cbSourceATemp);
if (NULL == lpSourceATemp)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpSourceATemp, cbSourceATemp);

if (FALSE == Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpSourceATemp, cchSourceATemp))
{
free(lpSourceA);
free(lpSourceATemp);
return FALSE;
}
free(lpSourceA);
lpSourceA = NULL;

*lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, 0);
if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, *lpcbText))
{
free(lpSourceATemp);
return FALSE;
}

*lpcbText *= sizeof(WCHAR);

free(lpSourceATemp);
bResult = TRUE;
}
break;
case UTF8:
{
if (*lpcbText < cbBytes + UTF8_SIGN)
{
return FALSE;
}

cbSourceA = cbBytes;
cchSourceA = cbSourceA;
lpSourceA = (CHAR *)malloc(cbSourceA);
if (NULL == lpSourceA)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpSourceA, cbSourceA);
CopyMemory(lpSourceA, lpBytes, cbSourceA);

*lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, 0);
if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, *lpcbText))
{
free(lpSourceA);
return FALSE;
}

*lpcbText *= sizeof(WCHAR);

free(lpSourceA);
bResult = TRUE;
}
break;
case UTF16LE:
{
CopyMemory(lpText, lpBytes, cbBytes);
*lpcbText = cbBytes;
return TRUE;
}
break;
case UTF16BE:
{
if (*lpcbText < cbBytes)
{
return FALSE;
}

LPBYTE lpBytesTemp = NULL;
INT cbBytesTemp = 0;

cbBytesTemp = cbBytes;
lpBytesTemp = (BYTE *)malloc(cbBytesTemp);
if (NULL == lpBytesTemp)
{
printf_s("malloc error\n");
return FALSE;
}
ZeroMemory(lpBytesTemp, cbBytesTemp);
CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp);
if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp))
{
free(lpSourceW);
return FALSE;
}
CopyMemory(lpText, lpBytesTemp, cbBytesTemp);
*lpcbText = cbBytesTemp;
free(lpBytesTemp);

bResult = TRUE;
}
break;
default:
return FALSE;
}

return bResult;
}
//************************************
// Method:    BYTETOUTF16BE
// FullName:  BYTETOUTF16BE
// Access:    public
// Returns:   BOOL
// Qualifier:指定编码流转换为UTF16BE编码格式
// Parameter: IN CONST LPBYTE lpBytes:指定需要转换的编码流
// Parameter: IN CONST INT cbBytes:指定需要转换的编码流长度(字节单位)
// Parameter: IN CONST FileEncodeType tpBytes::指定需要转换的编码流的实际编码格式
// Parameter: INOUT LPWSTR lpText:指定输出流
// Parameter: INOUT LPINT lpcbText:指定输出流的长度(字节单位)
//************************************
BOOL BYTETOUTF16BE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText)
{
if (FALSE == BYTETOUTF16LE(lpBytes, cbBytes, tpBytes, lpText, lpcbText))
{
return FALSE;
}
return Utf16leAndUtf16beTransfers((LPBYTE)lpText, *lpcbText);
}

//************************************
// Method:    FileAToFileB
// FullName:  FileAToFileB
// Access:    public
// Returns:   BOOL
// Qualifier:指定文件A中内容转换为指定编码存入指定文件B中
// Parameter: CONST LPTSTR lpFileA:输入文件A(只读文件)
// Parameter: CONST LPSTR lpFileB:输出文件B(总是创建文件)
// Parameter: FileEncodeType tpFileB:指定输出文件编码(UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE)
// *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
//************************************
BOOL FileAToFileB(IN CONST LPTSTR lpFileA, IN CONST LPTSTR lpFileB, FileEncodeType tpFileB)
{
BOOL bResult = FALSE;
FileEncodeType tpFileA = OTHER;
HANDLE hFileA = NULL;
HANDLE hFileB = NULL;
LPBYTE lpReadFileBytes = NULL;
INT cbReadFileBytes = 0;
INT cchReadFileBytes = 0;
LPSTR lpWriteFileString = NULL;
INT cbWriteFileString = 0;
INT cchWriteFileString = 0;
LPWSTR lpWriteFileWString = NULL;
INT cbWriteFileWString = 0;
INT cchWriteFileWString = 0;

hFileA = ::CreateFile(lpFileA, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (INVALID_HANDLE_VALUE == hFileA)
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
return FALSE;
}

cbReadFileBytes = SetFilePointer(hFileA, 0, NULL, FILE_END);
if (INVALID_SET_FILE_POINTER == cbReadFileBytes)
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
::CloseHandle(hFileA);
return FALSE;
}

if (1 != sizeof(BYTE))
{
printf_s("byte cell width error\n");
return FALSE;
}

lpReadFileBytes = (BYTE *)malloc(cbReadFileBytes);
if (NULL == lpReadFileBytes)
{
printf_s("malloc error\n");
::CloseHandle(hFileA);
return FALSE;
}
ZeroMemory(lpReadFileBytes, cbReadFileBytes);

SetFilePointer(hFileA, 0, NULL, FILE_BEGIN);
if (FALSE == ReadFile(hFileA, lpReadFileBytes, cbReadFileBytes, NULL, NULL))
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
free(lpReadFileBytes);
::CloseHandle(hFileA);
return FALSE;
}
::CloseHandle(hFileA);
hFileA = NULL;

if (!(UTF8 == tpFileB || UTF8_NO_BOM == tpFileB || UTF16LE == tpFileB || UTF16BE == tpFileB))
{
printf_s("Unable to identify type error\n");
free(lpReadFileBytes);
return FALSE;
}

hFileB = ::CreateFile(lpFileB, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (INVALID_HANDLE_VALUE == hFileB)
{
int errLogNumber = GetLastError();
printf_s("error number:%d\n", errLogNumber);
free(lpReadFileBytes);
return FALSE;
}

tpFileA = GetEncodeType(lpReadFileBytes, cbReadFileBytes);
switch (tpFileA)
{
case UTF8:
{
switch (tpFileB)
{
case UTF8_NO_BOM:
cbWriteFileString = cbReadFileBytes - UTF8_SIGN;
lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
ZeroMemory(lpWriteFileString, cbWriteFileString);
bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileString, &cbWriteFileString, FALSE);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF16LE:
cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
ZeroMemory(lpWriteFileWString, cbWriteFileWString);
bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF16BE:
cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
ZeroMemory(lpWriteFileWString, cbWriteFileWString);
bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
default:;
}
}
break;
case UTF8_NO_BOM:
{
switch (tpFileB)
{
case UTF8:
cbWriteFileString = cbReadFileBytes + UTF8_SIGN;
lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
ZeroMemory(lpWriteFileString, cbWriteFileString);
bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileString, &cbWriteFileString, TRUE);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF16LE:
cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR);
lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
ZeroMemory(lpWriteFileWString, cbWriteFileWString);
bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF16BE:
cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR);
lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
ZeroMemory(lpWriteFileWString, cbWriteFileWString);
bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
default:;
}
}
break;
case UTF16LE:
{
switch (tpFileB)
{
case UTF8:
cbWriteFileString = cbReadFileBytes;
lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
ZeroMemory(lpWriteFileString, cbWriteFileString);
bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, TRUE);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF8_NO_BOM:
cbWriteFileString = cbReadFileBytes;
lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
ZeroMemory(lpWriteFileString, cbWriteFileString);
bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, FALSE);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF16BE:
cbWriteFileWString = cbReadFileBytes;
lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
ZeroMemory(lpWriteFileWString, cbWriteFileWString);
bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileWString, &cbWriteFileWString);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
default:;
}
}
break;
case UTF16BE:
{
switch (tpFileB)
{
case UTF8:
cbWriteFileString = cbReadFileBytes;
lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
ZeroMemory(lpWriteFileString, cbWriteFileString);
bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, TRUE);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF8_NO_BOM:
cbWriteFileString = cbReadFileBytes;
lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
ZeroMemory(lpWriteFileString, cbWriteFileString);
bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, FALSE);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
case UTF16LE:
cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
ZeroMemory(lpWriteFileWString, cbWriteFileWString);
bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileWString, &cbWriteFileWString);
free(lpReadFileBytes);
lpReadFileBytes = NULL;
break;
default:;
}
}
break;
default:
printf("file encode unable to identify.\n");
free(lpReadFileBytes);
return FALSE;
}

if (NULL != lpWriteFileString)
{
if (FALSE == WriteFile(hFileB, lpWriteFileString, cbWriteFileString, NULL, NULL))
{
free(lpWriteFileString);
return FALSE;
}
free(lpWriteFileString);
lpWriteFileString = NULL;
}
if (NULL != lpWriteFileWString)
{
if (FALSE == WriteFile(hFileB, lpWriteFileWString, cbWriteFileWString, NULL, NULL))
{
free(lpWriteFileWString);
return FALSE;
}
free(lpWriteFileWString);
lpWriteFileWString = NULL;
}
::CloseHandle(hFileB);
hFileB = NULL;

return bResult;
}

int _tmain(int argc, _TCHAR* argv[])
{
LPTSTR lpFileA_utf8 = TEXT("Input-utf8.txt");
LPTSTR lpFileA_utf8_no_bom = TEXT("Input-utf8-no-bom.txt");
LPTSTR lpFileA_utf16le = TEXT("Input-utf16le.txt");
LPTSTR lpFileA_utf16be = TEXT("Input-utf16be.txt");

LPTSTR lpFileB_utf8 = TEXT("Output-utf8.txt");
LPTSTR lpFileB_utf8_no_bom = TEXT("Output-utf8-no-bom.txt");
LPTSTR lpFileB_utf16le = TEXT("Output-utf16le.txt");
LPTSTR lpFileB_utf16be = TEXT("Output-utf16be.txt");

//FileAToFileB(lpFileA_utf8, lpFileB_utf8_no_bom, UTF8_NO_BOM);
//FileAToFileB(lpFileA_utf8, lpFileB_utf16le, UTF16LE);
//FileAToFileB(lpFileA_utf8, lpFileB_utf16be, UTF16BE);

FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf8, UTF8);
//FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16le, UTF16LE);
//FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16be, UTF16BE);

//FileAToFileB(lpFileA_utf16le, lpFileB_utf8, UTF8);
//FileAToFileB(lpFileA_utf16le, lpFileB_utf8_no_bom, UTF8_NO_BOM);
//FileAToFileB(lpFileA_utf16le, lpFileB_utf16be, UTF16BE);

//FileAToFileB(lpFileA_utf16be, lpFileB_utf8, UTF8);
//FileAToFileB(lpFileA_utf16be, lpFileB_utf8_no_bom, UTF8_NO_BOM);
//FileAToFileB(lpFileA_utf16be, lpFileB_utf16le, UTF16LE);

return 0;
}


*注:源码下载地址,请点击这里
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: