指定存储文件的编码格式(下)
2015-01-25 20:18
435 查看
指定存储文件编码格式共两篇。
此为下篇。
对于上篇的内容进行了优化,这样大大提高了代码的通用性和重用性。
有兴趣的可以都下载下来对比参考。
*注:源码下载地址,请点击这里。
此为下篇。
对于上篇的内容进行了优化,这样大大提高了代码的通用性和重用性。
有兴趣的可以都下载下来对比参考。
// test__EncodeAFileToEncodeBFile.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include <windows.h> #include <string> #include <iostream> #ifndef IN #define IN #endif #ifndef OUT #define OUT #endif #ifndef INOUT #define INOUT #endif #ifndef OPTION #define OPTION #endif #define UTF8_SIGN 3 #define UTF16_SIGN 2 #define FILE_HEADER 6 enum FileEncodeType { OTHER = 0, UTF8, UTF8_NO_BOM, UTF16LE, UTF16LE_NO_BOM, UTF16BE, UTF16BE_NO_BOM }; //************************************ // Method: IsUTF8EncodeText // FullName: IsUTF8EncodeText // Access: public // Returns: BOOL // Qualifier:判断输入内容是否时UTF8编码格式(可以判断不带BOM的UTF8编码) // Parameter: BYTE * lpText:判断是否时UTF8的内容 // Parameter: INT cchText:UTF8的内容长度(按个数) //************************************ BOOL IsUTF8EncodeText(IN CONST BYTE* lpText, IN INT cchText) { // UTF8判断规则 // 0000 0000 - 0000 007F 0xxxxxxx // 0000 0080 - 0000 07FF 110xxxxx 10xxxxxx // 0000 0800 - 0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx // 0001 0000 - 001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // 0020 0000 - 03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx // 0400 0000 - 7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx int i; DWORD nBytes = 0;//UFT8可用1-6个字节编码,ASCII用一个字节 BYTE lpByte; BOOL bAllAscii = TRUE; //如果全部都是ASCII, 说明不是UTF-8 for (i = 0; i < cchText; i++) { lpByte = *(lpText + i); if ((lpByte & 0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8,ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx bAllAscii = FALSE; if (nBytes == 0) //如果不是ASCII码,应该是多字节符,计算字节数 { if (lpByte >= 0x80) { if (lpByte >= 0xFC && lpByte <= 0xFD) nBytes = 6; else if (lpByte >= 0xF8) nBytes = 5; else if (lpByte >= 0xF0) nBytes = 4; else if (lpByte >= 0xE0) nBytes = 3; else if (lpByte >= 0xC0) nBytes = 2; else { return FALSE; } nBytes--; } } else //多字节符的非首字节,应为 10xxxxxx { if ((lpByte & 0xC0) != 0x80) { return FALSE; } nBytes--; } } if (nBytes > 0) //违返规则 { return FALSE; } if (bAllAscii) //如果全部都是ASCII, 说明不是UTF-8 { return FALSE; } return TRUE; } //************************************ // Method: GetEncodeType // FullName: GetEncodeType // Access: public // Returns: FileEncodeType // Qualifier:识别指定文件编码 // Parameter: IN CONST LPTSTR lpFileName:指定文件名称 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE //************************************ FileEncodeType GetEncodeType(IN CONST LPTSTR lpFileName) { FileEncodeType lFileEncodeType = OTHER; HANDLE hFile = NULL; LPBYTE lpFileHeader = NULL; INT cbFileHeader = FILE_HEADER; INT cchFileHeader = FILE_HEADER; hFile = ::CreateFile(lpFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (INVALID_HANDLE_VALUE == hFile) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); return lFileEncodeType; } lpFileHeader = (BYTE *)malloc(cbFileHeader); if (NULL == lpFileHeader) { printf_s("malloc error\n"); ::CloseHandle(hFile); return lFileEncodeType; } ZeroMemory(lpFileHeader, cbFileHeader); SetFilePointer(hFile, 0, NULL, FILE_BEGIN); if (FALSE == ReadFile(hFile, lpFileHeader, FILE_HEADER, NULL, NULL)) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); free(lpFileHeader); ::CloseHandle(hFile); return lFileEncodeType; } if (0xef == lpFileHeader[0] && 0xbb == lpFileHeader[1] && 0xbf == lpFileHeader[2]) { free(lpFileHeader); ::CloseHandle(hFile); return lFileEncodeType = UTF8; } else if (0xff == lpFileHeader[0] && 0xfe == lpFileHeader[1]) { free(lpFileHeader); ::CloseHandle(hFile); return lFileEncodeType = UTF16BE; } else if (0xfe == lpFileHeader[0] && 0xff == lpFileHeader[1]) { free(lpFileHeader); ::CloseHandle(hFile); return lFileEncodeType = UTF16LE; } else { free(lpFileHeader); cbFileHeader = SetFilePointer(hFile, 0, NULL, FILE_END); if (INVALID_SET_FILE_POINTER == cbFileHeader) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); ::CloseHandle(hFile); return lFileEncodeType = OTHER; } lpFileHeader = (BYTE *)malloc(cbFileHeader); if (NULL == lpFileHeader) { printf_s("malloc error\n"); ::CloseHandle(hFile); return lFileEncodeType = OTHER; } ZeroMemory(lpFileHeader, cbFileHeader); SetFilePointer(hFile, 0, NULL, FILE_BEGIN); if (FALSE == ReadFile(hFile, lpFileHeader, cbFileHeader, NULL, NULL)) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); free(lpFileHeader); ::CloseHandle(hFile); return lFileEncodeType = OTHER; } if (TRUE == IsUTF8EncodeText(lpFileHeader, cbFileHeader)) { free(lpFileHeader); ::CloseHandle(hFile); return lFileEncodeType = UTF8_NO_BOM; } lFileEncodeType = OTHER; } free(lpFileHeader); ::CloseHandle(hFile); return lFileEncodeType; } //************************************ // Method: GetEncodeType // FullName: GetEncodeType // Access: public // Returns: FileEncodeType // Qualifier:识别指定字节流编码 // Parameter: IN CONST BYTE * lpBytes:指定字节流 // Parameter: IN CONST INT cchBytes:指定字节流长度 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE //************************************ FileEncodeType GetEncodeType(IN CONST BYTE *lpBytes, IN CONST INT cchBytes) { if (0xef == lpBytes[0] && 0xbb == lpBytes[1] && 0xbf == lpBytes[2]) { return UTF8; } else if (0xff == lpBytes[0] && 0xfe == lpBytes[1]) { return UTF16LE; } else if (0xfe == lpBytes[0] && 0xff == lpBytes[1]) { return UTF16BE; } else { if (TRUE == IsUTF8EncodeText(lpBytes, cchBytes)) { return UTF8_NO_BOM; } } return OTHER; } //************************************ // Method: Utf16leAndUtf16beTransfers // FullName: Utf16leAndUtf16beTransfers // Access: public // Returns: BOOL // Qualifier:相互转换UTF16LE 和 UTF16BE // Parameter: INOUT LPBYTE lpUtf16String:指定UTF16编码字节流 // Parameter: IN CONST INT cchUtf16String:指定UTF16编码字节流长度 //************************************ BOOL Utf16leAndUtf16beTransfers(INOUT LPBYTE lpUtf16String, IN CONST INT cchUtf16String) { if (NULL == lpUtf16String || cchUtf16String < 0) { return FALSE; } for (INT i = 0; i < cchUtf16String; i += 2)//每两值交换 { lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1]; lpUtf16String[i + 1] = lpUtf16String[i + 1] ^ lpUtf16String[i]; lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1]; } return TRUE; } //************************************ // Method: Utf8ToUtf8NoBOM // FullName: Utf8ToUtf8NoBOM // Access: public // Returns: BOOL // Qualifier:UTF8编码转换到UTF8 without BOM编码 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度 //************************************ BOOL Utf8ToUtf8NoBOM(IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String, INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString) { if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0) { return FALSE; } if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN) { return FALSE; } ::CopyMemory(lpUtf8NoBOMString, lpUtf8String + UTF8_SIGN, cchUtf8String - UTF8_SIGN); return TRUE; } //************************************ // Method: Utf8NoBOMToUtf8 // FullName: Utf8NoBOMToUtf8 // Access: public // Returns: BOOL // Qualifier:UTF8 without BOM编码转换到UTF8编码 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度 //************************************ BOOL Utf8NoBOMToUtf8(INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString, IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String) { if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0) { return FALSE; } if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN) { return FALSE; } lpUtf8String[0] = 0xef; lpUtf8String[1] = 0xbb; lpUtf8String[2] = 0xbf; ::CopyMemory(lpUtf8String + UTF8_SIGN, lpUtf8NoBOMString, cchUtf8NoBOMString); return TRUE; } //************************************ // Method: BYTETOUTF8 // FullName: BYTETOUTF8 // Access: public // Returns: BOOL // Qualifier:指定编码转换为UTF8(或without BOM)编码 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的字节流 // Parameter: IN CONST INT cbBytes:指定需要转换的字节流长度(字节单位) // Parameter: IN CONST FileEncodeType tpBytes:指定需要转换的字节流的实际编码格式 // Parameter: INOUT LPSTR lpText:指定输出的编码流 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位) // Parameter: IN CONST BOOL bWithBOM:指定输出的编码是否有BOM //************************************ BOOL BYTETOUTF8(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPSTR lpText, INOUT LPINT lpcbText, IN CONST BOOL bWithBOM = TRUE) { BOOL bResult = FALSE; LPSTR lpSourceA = NULL; INT cbSourceA = 0; INT cchSourceA = 0; LPWSTR lpSourceW = NULL; INT cbSourceW = 0; INT cchSourceW = 0; if (NULL == lpBytes || NULL == lpText || !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) || cbBytes < 0 || NULL == lpcbText) { return FALSE; } switch (tpBytes) { case UTF8_NO_BOM: { if (FALSE == bWithBOM) { return FALSE; } if (*lpcbText < cbBytes + UTF8_SIGN) { return FALSE; } cbSourceA = cbBytes; cchSourceA = cbSourceA; lpSourceA = (CHAR *)malloc(cbSourceA); if (NULL == lpSourceA) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpSourceA, cbSourceA); CopyMemory(lpSourceA, lpBytes, cbSourceA); bResult = Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpText, *lpcbText); free(lpSourceA); } break; case UTF8: { if (TRUE == bWithBOM) { return FALSE; } if (*lpcbText < cbBytes - UTF8_SIGN) { return FALSE; } cbSourceA = cbBytes; cchSourceA = cbSourceA; lpSourceA = (CHAR *)malloc(cbSourceA); if (NULL == lpSourceA) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpSourceA, cbSourceA); CopyMemory(lpSourceA, lpBytes, cbSourceA); bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText); free(lpSourceA); } break; case UTF16LE: { cbSourceW = cbBytes; cchSourceW = cbSourceW / sizeof(WCHAR); lpSourceW = (WCHAR *)malloc(cbSourceW); if (NULL == lpSourceW) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpSourceW, cbSourceW); ::CopyMemory(lpSourceW, lpBytes, cbBytes); *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小 if (TRUE == bWithBOM) { if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL)) { free(lpSourceW); return FALSE; } bResult = TRUE; } else { cbSourceA = *lpcbText; cchSourceA = cbSourceA; lpSourceA = (CHAR *)malloc(cbSourceA); if (NULL == lpSourceA) { printf_s("malloc error\n"); free(lpSourceW); return FALSE; } ZeroMemory(lpSourceA, cbSourceA); CopyMemory(lpSourceA, lpBytes, cbSourceA); if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL)) { free(lpSourceW); free(lpSourceA); return FALSE; } bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText); *lpcbText -= UTF8_SIGN; free(lpSourceA); } free(lpSourceW); } break; case UTF16BE: { LPBYTE lpBytesTemp = NULL; INT cbBytesTemp = 0; INT cchBytesTemp = 0; cbBytesTemp = cbBytes; lpBytesTemp = (BYTE *)malloc(cbBytesTemp); if (NULL == lpBytesTemp) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpBytesTemp, cbBytesTemp); ::CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp); if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp)) { free(lpBytesTemp); return FALSE; } cbSourceW = cbBytes; cchSourceW = cbSourceW / sizeof(WCHAR); lpSourceW = (WCHAR *)malloc(cbSourceW); if (NULL == lpSourceW) { printf_s("malloc error\n"); free(lpBytesTemp); return FALSE; } ZeroMemory(lpSourceW, cbSourceW); ::CopyMemory(lpSourceW, lpBytesTemp, cbSourceW); free(lpBytesTemp); lpBytesTemp = NULL; *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小 if (TRUE == bWithBOM) { if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL)) { free(lpSourceW); return FALSE; } bResult = TRUE; } else { cbSourceA = *lpcbText; cchSourceA = cbSourceA; lpSourceA = (CHAR *)malloc(cbSourceA); if (NULL == lpSourceA) { printf_s("malloc error\n"); free(lpSourceW); return FALSE; } ZeroMemory(lpSourceA, cbSourceA); CopyMemory(lpSourceA, lpBytes, cbSourceA); if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL)) { free(lpSourceW); free(lpSourceA); return FALSE; } bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText); *lpcbText -= UTF8_SIGN; free(lpSourceA); } free(lpSourceW); } break; default: return FALSE; } return bResult; } //************************************ // Method: BYTETOUTF16LE // FullName: BYTETOUTF16LE // Access: public // Returns: BOOL // Qualifier:指定编码流转换为UTF16LE编码格式 // Parameter: IN CONST LPBYTE lpBytes:指定的编码流 // Parameter: IN CONST INT cbBytes:指定的编码流长度(字节单位) // Parameter: IN CONST FileEncodeType tpBytes:指定的编码流的实际编码格式 // Parameter: INOUT LPWSTR lpText:指定输出的编码流 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位) //************************************ BOOL BYTETOUTF16LE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText) { BOOL bResult = FALSE; LPSTR lpSourceA = NULL; INT cbSourceA = 0; INT cchSourceA = 0; LPWSTR lpSourceW = NULL; INT cbSourceW = 0; INT cchSourceW = 0; if (NULL == lpBytes || NULL == lpText || !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) || cbBytes < 0 || NULL == lpcbText) { return FALSE; } switch (tpBytes) { case UTF8_NO_BOM: { LPSTR lpSourceATemp = NULL; INT cbSourceATemp = 0; INT cchSourceATemp = 0; if (*lpcbText < cbBytes + UTF8_SIGN) { return FALSE; } cbSourceA = cbBytes; cchSourceA = cbSourceA; lpSourceA = (CHAR *)malloc(cbSourceA); if (NULL == lpSourceA) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpSourceA, cbSourceA); CopyMemory(lpSourceA, lpBytes, cbSourceA); cbSourceATemp = cbBytes + UTF8_SIGN; cchSourceATemp = cbSourceATemp; lpSourceATemp = (CHAR *)malloc(cbSourceATemp); if (NULL == lpSourceATemp) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpSourceATemp, cbSourceATemp); if (FALSE == Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpSourceATemp, cchSourceATemp)) { free(lpSourceA); free(lpSourceATemp); return FALSE; } free(lpSourceA); lpSourceA = NULL; *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, 0); if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, *lpcbText)) { free(lpSourceATemp); return FALSE; } *lpcbText *= sizeof(WCHAR); free(lpSourceATemp); bResult = TRUE; } break; case UTF8: { if (*lpcbText < cbBytes + UTF8_SIGN) { return FALSE; } cbSourceA = cbBytes; cchSourceA = cbSourceA; lpSourceA = (CHAR *)malloc(cbSourceA); if (NULL == lpSourceA) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpSourceA, cbSourceA); CopyMemory(lpSourceA, lpBytes, cbSourceA); *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, 0); if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, *lpcbText)) { free(lpSourceA); return FALSE; } *lpcbText *= sizeof(WCHAR); free(lpSourceA); bResult = TRUE; } break; case UTF16LE: { CopyMemory(lpText, lpBytes, cbBytes); *lpcbText = cbBytes; return TRUE; } break; case UTF16BE: { if (*lpcbText < cbBytes) { return FALSE; } LPBYTE lpBytesTemp = NULL; INT cbBytesTemp = 0; cbBytesTemp = cbBytes; lpBytesTemp = (BYTE *)malloc(cbBytesTemp); if (NULL == lpBytesTemp) { printf_s("malloc error\n"); return FALSE; } ZeroMemory(lpBytesTemp, cbBytesTemp); CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp); if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp)) { free(lpSourceW); return FALSE; } CopyMemory(lpText, lpBytesTemp, cbBytesTemp); *lpcbText = cbBytesTemp; free(lpBytesTemp); bResult = TRUE; } break; default: return FALSE; } return bResult; } //************************************ // Method: BYTETOUTF16BE // FullName: BYTETOUTF16BE // Access: public // Returns: BOOL // Qualifier:指定编码流转换为UTF16BE编码格式 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的编码流 // Parameter: IN CONST INT cbBytes:指定需要转换的编码流长度(字节单位) // Parameter: IN CONST FileEncodeType tpBytes::指定需要转换的编码流的实际编码格式 // Parameter: INOUT LPWSTR lpText:指定输出流 // Parameter: INOUT LPINT lpcbText:指定输出流的长度(字节单位) //************************************ BOOL BYTETOUTF16BE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText) { if (FALSE == BYTETOUTF16LE(lpBytes, cbBytes, tpBytes, lpText, lpcbText)) { return FALSE; } return Utf16leAndUtf16beTransfers((LPBYTE)lpText, *lpcbText); } //************************************ // Method: FileAToFileB // FullName: FileAToFileB // Access: public // Returns: BOOL // Qualifier:指定文件A中内容转换为指定编码存入指定文件B中 // Parameter: CONST LPTSTR lpFileA:输入文件A(只读文件) // Parameter: CONST LPSTR lpFileB:输出文件B(总是创建文件) // Parameter: FileEncodeType tpFileB:指定输出文件编码(UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE) // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE //************************************ BOOL FileAToFileB(IN CONST LPTSTR lpFileA, IN CONST LPTSTR lpFileB, FileEncodeType tpFileB) { BOOL bResult = FALSE; FileEncodeType tpFileA = OTHER; HANDLE hFileA = NULL; HANDLE hFileB = NULL; LPBYTE lpReadFileBytes = NULL; INT cbReadFileBytes = 0; INT cchReadFileBytes = 0; LPSTR lpWriteFileString = NULL; INT cbWriteFileString = 0; INT cchWriteFileString = 0; LPWSTR lpWriteFileWString = NULL; INT cbWriteFileWString = 0; INT cchWriteFileWString = 0; hFileA = ::CreateFile(lpFileA, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (INVALID_HANDLE_VALUE == hFileA) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); return FALSE; } cbReadFileBytes = SetFilePointer(hFileA, 0, NULL, FILE_END); if (INVALID_SET_FILE_POINTER == cbReadFileBytes) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); ::CloseHandle(hFileA); return FALSE; } if (1 != sizeof(BYTE)) { printf_s("byte cell width error\n"); return FALSE; } lpReadFileBytes = (BYTE *)malloc(cbReadFileBytes); if (NULL == lpReadFileBytes) { printf_s("malloc error\n"); ::CloseHandle(hFileA); return FALSE; } ZeroMemory(lpReadFileBytes, cbReadFileBytes); SetFilePointer(hFileA, 0, NULL, FILE_BEGIN); if (FALSE == ReadFile(hFileA, lpReadFileBytes, cbReadFileBytes, NULL, NULL)) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); free(lpReadFileBytes); ::CloseHandle(hFileA); return FALSE; } ::CloseHandle(hFileA); hFileA = NULL; if (!(UTF8 == tpFileB || UTF8_NO_BOM == tpFileB || UTF16LE == tpFileB || UTF16BE == tpFileB)) { printf_s("Unable to identify type error\n"); free(lpReadFileBytes); return FALSE; } hFileB = ::CreateFile(lpFileB, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); if (INVALID_HANDLE_VALUE == hFileB) { int errLogNumber = GetLastError(); printf_s("error number:%d\n", errLogNumber); free(lpReadFileBytes); return FALSE; } tpFileA = GetEncodeType(lpReadFileBytes, cbReadFileBytes); switch (tpFileA) { case UTF8: { switch (tpFileB) { case UTF8_NO_BOM: cbWriteFileString = cbReadFileBytes - UTF8_SIGN; lpWriteFileString = (CHAR *)malloc(cbWriteFileString); ZeroMemory(lpWriteFileString, cbWriteFileString); bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileString, &cbWriteFileString, FALSE); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF16LE: cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR); lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); ZeroMemory(lpWriteFileWString, cbWriteFileWString); bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF16BE: cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR); lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); ZeroMemory(lpWriteFileWString, cbWriteFileWString); bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString); free(lpReadFileBytes); lpReadFileBytes = NULL; break; default:; } } break; case UTF8_NO_BOM: { switch (tpFileB) { case UTF8: cbWriteFileString = cbReadFileBytes + UTF8_SIGN; lpWriteFileString = (CHAR *)malloc(cbWriteFileString); ZeroMemory(lpWriteFileString, cbWriteFileString); bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileString, &cbWriteFileString, TRUE); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF16LE: cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR); lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); ZeroMemory(lpWriteFileWString, cbWriteFileWString); bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF16BE: cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR); lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); ZeroMemory(lpWriteFileWString, cbWriteFileWString); bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString); free(lpReadFileBytes); lpReadFileBytes = NULL; break; default:; } } break; case UTF16LE: { switch (tpFileB) { case UTF8: cbWriteFileString = cbReadFileBytes; lpWriteFileString = (CHAR *)malloc(cbWriteFileString); ZeroMemory(lpWriteFileString, cbWriteFileString); bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, TRUE); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF8_NO_BOM: cbWriteFileString = cbReadFileBytes; lpWriteFileString = (CHAR *)malloc(cbWriteFileString); ZeroMemory(lpWriteFileString, cbWriteFileString); bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, FALSE); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF16BE: cbWriteFileWString = cbReadFileBytes; lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); ZeroMemory(lpWriteFileWString, cbWriteFileWString); bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileWString, &cbWriteFileWString); free(lpReadFileBytes); lpReadFileBytes = NULL; break; default:; } } break; case UTF16BE: { switch (tpFileB) { case UTF8: cbWriteFileString = cbReadFileBytes; lpWriteFileString = (CHAR *)malloc(cbWriteFileString); ZeroMemory(lpWriteFileString, cbWriteFileString); bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, TRUE); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF8_NO_BOM: cbWriteFileString = cbReadFileBytes; lpWriteFileString = (CHAR *)malloc(cbWriteFileString); ZeroMemory(lpWriteFileString, cbWriteFileString); bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, FALSE); free(lpReadFileBytes); lpReadFileBytes = NULL; break; case UTF16LE: cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR); lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); ZeroMemory(lpWriteFileWString, cbWriteFileWString); bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileWString, &cbWriteFileWString); free(lpReadFileBytes); lpReadFileBytes = NULL; break; default:; } } break; default: printf("file encode unable to identify.\n"); free(lpReadFileBytes); return FALSE; } if (NULL != lpWriteFileString) { if (FALSE == WriteFile(hFileB, lpWriteFileString, cbWriteFileString, NULL, NULL)) { free(lpWriteFileString); return FALSE; } free(lpWriteFileString); lpWriteFileString = NULL; } if (NULL != lpWriteFileWString) { if (FALSE == WriteFile(hFileB, lpWriteFileWString, cbWriteFileWString, NULL, NULL)) { free(lpWriteFileWString); return FALSE; } free(lpWriteFileWString); lpWriteFileWString = NULL; } ::CloseHandle(hFileB); hFileB = NULL; return bResult; } int _tmain(int argc, _TCHAR* argv[]) { LPTSTR lpFileA_utf8 = TEXT("Input-utf8.txt"); LPTSTR lpFileA_utf8_no_bom = TEXT("Input-utf8-no-bom.txt"); LPTSTR lpFileA_utf16le = TEXT("Input-utf16le.txt"); LPTSTR lpFileA_utf16be = TEXT("Input-utf16be.txt"); LPTSTR lpFileB_utf8 = TEXT("Output-utf8.txt"); LPTSTR lpFileB_utf8_no_bom = TEXT("Output-utf8-no-bom.txt"); LPTSTR lpFileB_utf16le = TEXT("Output-utf16le.txt"); LPTSTR lpFileB_utf16be = TEXT("Output-utf16be.txt"); //FileAToFileB(lpFileA_utf8, lpFileB_utf8_no_bom, UTF8_NO_BOM); //FileAToFileB(lpFileA_utf8, lpFileB_utf16le, UTF16LE); //FileAToFileB(lpFileA_utf8, lpFileB_utf16be, UTF16BE); FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf8, UTF8); //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16le, UTF16LE); //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16be, UTF16BE); //FileAToFileB(lpFileA_utf16le, lpFileB_utf8, UTF8); //FileAToFileB(lpFileA_utf16le, lpFileB_utf8_no_bom, UTF8_NO_BOM); //FileAToFileB(lpFileA_utf16le, lpFileB_utf16be, UTF16BE); //FileAToFileB(lpFileA_utf16be, lpFileB_utf8, UTF8); //FileAToFileB(lpFileA_utf16be, lpFileB_utf8_no_bom, UTF8_NO_BOM); //FileAToFileB(lpFileA_utf16be, lpFileB_utf16le, UTF16LE); return 0; }
*注:源码下载地址,请点击这里。
相关文章推荐
- 指定存储文件的编码格式(上)
- java文件读写操作指定编码格式
- java文件读写操作指定编码格式
- myeclipse下如何设置指定类型文件的编码格式
- c#读取并异步写入文件,简单版,指定编码,保持原格式。
- Java(Android)写文件时指定编码格式的问题
- Eclipse 对项目中特定文件使用指定编码格式
- php文件存储时的编码格式--utf8无BOM
- java文件读写操作指定编码格式[转]
- java文件读写操作指定编码格式
- Python write 写文件编码格式指定为utf-8
- java文件读写操作指定编码格式[转]
- Heritrix使用UTF-8编码格式存储文件
- 解决python ConfigParser文件编码问题(按指定格式存储文件(txt))
- 小时代5-Perl创建指定文件编码格式(如utf-8)
- 使用Perl创建指定编码格式(如utf-8)文件的实现代码
- java文件读写操作指定编码格式
- python指定文件编码格式
- java IO 指定输出文件的编码格式
- java文件读写操作指定编码格式