您的位置:首页 > 移动开发

关于使用FileMapping和直接操作File+缓存的IO性能对比

2017-12-01 11:27 519 查看
最近在看Windows核心编程17章

作者推崇使用FileMapping来进行大文件读写。虽然这样可以直接把文件映射到进程地址空间,由操作系统来进行后备的换成,换页等复杂操作。程序员只需要直接把文件当做内存地址来操作即可。

可是笔者发现这可能对于编码来说相对简单了,可是把缓存这些全交给操作系统来处理并不是一个好的方法。尤其在处理大文件IO的时候。于是笔者对原书上的一个例子进行了改写对比。首先这是改写后的代码。此代码当然也支持原书本上的逻辑。

完整代码:

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <tchar.h>
#include <windows.h>
#include <iostream>

//////////////////////////////////////////////////////////////////////////
class CStopwatch {
public:
CStopwatch() { QueryPerformanceFrequency(&m_liPerfFreq); Start(); }

void Start() { QueryPerformanceCounter(&m_liPerfStart); }

__int64 Now() const { // Returns # of millisecond since Start was called
LARGE_INTEGER liPerfNow;
QueryPerformanceCounter(&liPerfNow);
return(((liPerfNow.QuadPart - m_liPerfStart.QuadPart) * 1000)
/ m_liPerfFreq.QuadPart);
}

__int64 NowInMicro() const { // Return # of microseconds
// since Start was called
LARGE_INTEGER liPerfNow;
QueryPerformanceCounter(&liPerfNow);
return(((liPerfNow.QuadPart - m_liPerfStart.QuadPart) * 1000000)
/ m_liPerfFreq.QuadPart);
}

private:
LARGE_INTEGER m_liPerfFreq;	// Counts per second
LARGE_INTEGER m_liPerfStart; // Starting count
};

CStopwatch stopwatch;
//////////////////////////////////////////////////////////////////////////

typedef struct {
PVOID pvAddr;
DWORD dwBlockSize;
DWORD dwCount;
bool bIsFinished;
} WORK_DATA, *PWORK_DATA;

typedef struct {
WORK_DATA wDATA;
PTP_WORK pWork;
} WORKITEM_INFO, *PWORKITEM_INFO;

#define MAX_THREADS	4
PWORKITEM_INFO  g_WORKITEM[MAX_THREADS] = { 0 };
PBYTE		g_BUFF[MAX_THREADS] = { 0 };

HANDLE hSemp = NULL;
//////////////////////////////////////////////////////////////////////////

VOID CALLBACK WorkCallback(
PTP_CALLBACK_INSTANCE Instance,
PVOID Context,
PTP_WORK Work) {

PWORK_DATA pData = (PWORK_DATA)Context;

// Count the number of 0s in this block.
PBYTE pFile = (PBYTE)pData->pvAddr;
for (DWORD dwByte = 0; dwByte < pData->dwBlockSize; dwByte++) {
if (pFile[dwByte] == 0)
pData->dwCount++;
}
pData->bIsFinished = true;

ReleaseSemaphoreWhenCallbackReturns(Instance, hSemp, 1);
}

__int64 CountWithFileMapMultiThreads(HANDLE hFileMap, __int64 qwFileSize, DWORD dwBlockSize, DWORD nThreads) {

if (nThreads > MAX_THREADS)
return -1;		// invalid threads count.

ZeroMemory(g_WORKITEM, sizeof(PTP_WORK) * nThreads);

__int64 qwFileOffset = 0, qwNumOfZeros = 0;

hSemp = CreateSemaphore(NULL, nThreads, nThreads, NULL);

// allocate the buff.
for (unsigned int i = 0; i < nThreads; i++) {
g_WORKITEM[i] = (PWORKITEM_INFO)malloc(sizeof(WORKITEM_INFO));
g_WORKITEM[i]->wDATA.bIsFinished = true;
g_WORKITEM[i]->pWork = NULL;
g_WORKITEM[i]->wDATA.pvAddr = NULL;
}

while (qwFileSize > 0) {
WaitForSingleObject(hSemp, INFINITE);	// wait for a resource.
// do some resource clean up.
for (unsigned int i = 0; i < nThreads; i++) {
if (g_WORKITEM[i]) {	// there is a working item.
if (g_WORKITEM[i]->wDATA.bIsFinished && g_WORKITEM[i]->pWork) { // this item has been finished
// get the data.
qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;

// Unmap the file.
UnmapViewOfFile(g_WORKITEM[i]->wDATA.pvAddr);

// close the work item.
CloseThreadpoolWork(g_WORKITEM[i]->pWork);

g_WORKITEM[i]->pWork = NULL;
}
}
}

// get a free slot
unsigned int nSlotID;
for (nSlotID = 0; nSlotID < nThreads; nSlotID++) {
if (g_WORKITEM[nSlotID]->pWork == NULL)	// get a free slot.
break;
}
// there is a thread available.
if (nSlotID < nThreads)
{
// Determine the number of bytes to be mapped in this view
DWORD dwBytesInBlock = (qwFileSize < dwBlockSize) ?
(DWORD)qwFileSize : dwBlockSize;

// map the file.
PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMap, FILE_MAP_READ,
(DWORD)(qwFileOffset >> 32),
(DWORD)(qwFileOffset & 0xFFFFFFFF),
dwBytesInBlock);

// initialize some data.
g_WORKITEM[nSlotID]->wDATA.pvAddr = pbFile;
g_WORKITEM[nSlotID]->wDATA.dwBlockSize = dwBytesInBlock;
g_WORKITEM[nSlotID]->wDATA.dwCount = 0;
g_WORKITEM[nSlotID]->wDATA.bIsFinished = false;

g_WORKITEM[nSlotID]->pWork = CreateThreadpoolWork(
WorkCallback,
&g_WORKITEM[nSlotID]->wDATA,
NULL
);

// submit the working data to thread pool
SubmitThreadpoolWork(g_WORKITEM[nSlotID]->pWork);

qwFileOffset += dwBytesInBlock;		// modify the offset.
qwFileSize -= dwBytesInBlock;		// reduce the file size.
}
}

// wait for all of the working item.
for (unsigned int i = 0; i < nThreads; i++) {
if (g_WORKITEM[i]) {	// there is a working item memory block.
if (g_WORKITEM[i]->pWork) // there is an active work in the thread pool
{
if(g_WORKITEM[i]->wDATA.bIsFinished == false) // Is that the work has been finished?
WaitForThreadpoolWorkCallbacks(g_WORKITEM[i]->pWork, FALSE);	 // Wait for the work to be finished.

// get the data from the active work
qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;

// Unmap the file.
UnmapViewOfFile(g_WORKITEM[i]->wDATA.pvAddr);

// close the work item.
CloseThreadpoolWork(g_WORKITEM[i]->pWork);

g_WORKITEM[i]->pWork = NULL;
}

// free the memory for the workitem.
free(g_WORKITEM[i]);
g_WORKITEM[i] = NULL;
}
}

CloseHandle(hSemp);
hSemp = NULL;

return qwNumOfZeros;
}

// count directly with the logic that read block from file.
__int64 CountWithFileMultiThread(HANDLE hFile, __int64 qwFileSize, DWORD dwBlockSize, DWORD nThreads){
if (nThreads > MAX_THREADS)
return -1;		// invalid threads count.

ZeroMemory(g_WORKITEM, sizeof(PTP_WORK) * nThreads);

__int64 qwFileOffset = 0, qwNumOfZeros = 0;

hSemp = CreateSemaphore(NULL, nThreads, nThreads, NULL);

// allocate the buff.
for (unsigned int i = 0; i < nThreads; i++) {
g_BUFF[i] = (PBYTE)malloc(dwBlockSize);

g_WORKITEM[i] = (PWORKITEM_INFO)malloc(sizeof(WORKITEM_INFO));
g_WORKITEM[i]->wDATA.bIsFinished = true;
g_WORKITEM[i]->pWork = NULL;
g_WORKITEM[i]->wDATA.pvAddr = g_BUFF[i];
}

while (qwFileSize > 0) {
WaitForSingleObject(hSemp, INFINITE);	// wait for a resource.
// do some resource clean up.
for (unsigned int i = 0; i < nThreads; i++) {
if (g_WORKITEM[i]) {	// there is a working item.
if (g_WORKITEM[i]->wDATA.bIsFinished && g_WORKITEM[i]->pWork) {	// this item has been finished.
// get the data.
qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;

// close the work item.
CloseThreadpoolWork(g_WORKITEM[i]->pWork);

g_WORKITEM[i]->pWork = NULL;
}
}
}

// get a free slot
unsigned int nSlotID;
for (nSlotID = 0; nSlotID < nThreads; nSlotID++) {
if (g_WORKITEM[nSlotID]->pWork == NULL)	// get a free slot.
break;
}
// there is a thread available.
if (nSlotID < nThreads)
{
// Determine the number of bytes to be mapped in this view
DWORD dwBytesInBlock = (qwFileSize < dwBlockSize) ?
(DWORD)qwFileSize : dwBlockSize;

DWORD dwRead = 0;
ReadFile(hFile, g_BUFF[nSlotID], dwBytesInBlock, &dwRead, NULL);

// initialize some data.

g_WORKITEM[nSlotID]->wDATA.dwBlockSize = dwBytesInBlock;
g_WORKITEM[nSlotID]->wDATA.dwCount = 0;
g_WORKITEM[nSlotID]->wDATA.bIsFinished = false;

g_WORKITEM[nSlotID]->pWork = CreateThreadpoolWork(
WorkCallback,
&g_WORKITEM[nSlotID]->wDATA,
NULL
);

// submit the working data to thread pool
SubmitThreadpoolWork(g_WORKITEM[nSlotID]->pWork);

qwFileOffset += dwBytesInBlock;		// modify the offset.
qwFileSize -= dwBytesInBlock;		// reduce the file size.
}

}

// wait for all of the working item.
for (unsigned int i = 0; i < nThreads; i++) {
if (g_WORKITEM[i]) {	// there is a working item.
if (g_WORKITEM[i]->pWork) // there is an active work in the thread pool
{
if (g_WORKITEM[i]->wDATA.bIsFinished == false) // Is that the work has been finished?
WaitForThreadpoolWorkCallbacks(g_WORKITEM[i]->pWork, FALSE);	 // Wait for the work to be finished.

// get the data.
qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;

// close the work item.
CloseThreadpoolWork(g_WORKITEM[i]->pWork);

g_WORKITEM[i]->pWork = NULL;
}
}
}

// clean up for the buff.
for (unsigned int i = 0; i < nThreads; i++) {
free(g_BUFF[i]);
g_BUFF[i] = NULL;

free(g_WORKITEM[i]);
g_WORKITEM[i] = NULL;
}

CloseHandle(hSemp);
hSemp = NULL;

return qwNumOfZeros;
}

//
__int64 CountZeros(LPCTSTR szFileName, DWORD dwBlockSize, DWORD nThreads = 1, bool bUseFileMapp = true) {

// Vies must always start on a multiple
// of the allocation granularity
SYSTEM_INFO sinf;
GetSystemInfo(&sinf);

// Open the data file.
HANDLE hFile = CreateFile(szFileName, GENERIC_READ,
FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);

DWORD dwFileSizeHigh;
__int64 qwFileSize = GetFileSize(hFile, &dwFileSizeHigh);
qwFileSize += (((__int64)dwFileSizeHigh) << 32);

// ceiling algorithm
DWORD dwBlockCount = (dwBlockSize + sinf.dwAllocationGranularity - 1) / sinf.dwAllocationGranularity;
dwBlockSize = sinf.dwAllocationGranularity * dwBlockCount;

if (!bUseFileMapp) {
LONGLONG lLastTimeStamp = stopwatch.Now();
_tprintf(TEXT("Start to count the file with block size %d\n%s with size %I64dBytes\n"),
dwBlockSize, szFileName, qwFileSize);

__int64 qwNumOfZeros = CountWithFileMultiThread(hFile, qwFileSize, dwBlockSize, nThreads);

LONGLONG lElpasedTime = stopwatch.Now() - lLastTimeStamp;
_tprintf(TEXT("Count finished in %lldms\n"),
lElpasedTime);
CloseHandle(hFile);
return qwNumOfZeros;
}

// Create the file-mapping object.
HANDLE hFileMapping = CreateFileMapping(hFile, NULL,
PAGE_READONLY, 0, 0, NULL);

// We no longer need access the file object's handle.
CloseHandle(hFile);

__int64 qwFileOffset = 0, qwNumOfZeros = 0;

LONGLONG lLastTimeStamp = stopwatch.Now();
_tprintf(TEXT("Start to count the file with block size %d\n%s with size %I64dBytes\n"),
dwBlockSize, szFileName, qwFileSize);

if (nThreads > 1) // support multiple threads
{
qwNumOfZeros = CountWithFileMapMultiThreads(hFileMapping, qwFileSize, dwBlockSize, nThreads);
}
else {
while (qwFileSize > 0) {

// Determine the number of bytes to be mapped in this view
DWORD dwBytesInBlock = dwBlockSize;
if (qwFileSize < dwBlockSize)
dwBytesInBlock = (DWORD)qwFileSize;

PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMapping, FILE_MAP_READ,
(DWORD)(qwFileOffset >> 32),
(DWORD)(qwFileOffset & 0xFFFFFFFF),
dwBytesInBlock);

// Count the number of 0s in this block.
for (DWORD dwByte = 0; dwByte < dwBytesInBlock; dwByte++) {
if (pbFile[dwByte] == 0)
qwNumOfZeros++;
}

// Unmap the view; we don't want multiple vies
// in our address space.
UnmapViewOfFile(pbFile);

// Skip to the next set of bytes in the file.
qwFileOffset += dwBytesInBlock;
qwFileSize -= dwBytesInBlock;
}
}

LONGLONG lElpasedTime = stopwatch.Now() - lLastTimeStamp;
_tprintf(TEXT("Count finished in %lldms\n"),
lElpasedTime);

CloseHandle(hFileMapping);
return qwNumOfZeros;
}

void testcase1() {
// block 64KB
__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
64 * 1024, 1, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 1, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 4MB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
4 * 1024 * 1024, 1, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

}

void testcase2() {
// block 512KB
__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 1,true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 2, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 4, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

}

void testcase3() {
// block 512KB
__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 1, false);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 2, false);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 4, false);

_tprintf(TEXT("result : %I64d\n"), nCount);

}

int _tmain(int argc, TCHAR* argv[], TCHAR * env[])
{
testcase2();
}


笔者对其进行了扩展,比如支持了多线程并发,支持可以预先定义分配粒度(在笔者的机器上默认是64KB) 支持非文件映射而采用直接操作文件和缓存。

测试一 :分别测试单线程,多线程 和多线程非FileMap下的性能。关闭所有可能导致产生IO的进程(理性情况下最大性能对比)

1)首先对一个大小接近3GB的文件进行操作。全部使用单线程,文件映射为操作对象,分配粒度(blocksize)这里采用64KB, 512KB 和 4MB进行测试。

测试代码如下:

int _tmain(int argc, TCHAR* argv[], TCHAR * env[])
{
// block 64KB
__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
64 * 1024, 1, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 1, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 4MB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
4 * 1024 * 1024, 1, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

return 0;
}


运行结果。



2)接着测试一个大小接近3GB的文件进行操作。分别使用单线程,两线程和4线程,文件映射为操作对象,分配粒度(blocksize)这里采用 512KB 进行测试。

测试代码如下:

int _tmain(int argc, TCHAR* argv[], TCHAR * env[])
{
// block 512KB
__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 1, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 2, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 4, true);

_tprintf(TEXT("result : %I64d\n"), nCount);

return 0;
}


运行结果:



3)接着测试一个大小接近3GB的文件进行操作。分别使用单线程,两线程和4线程,文件操作对象,分配粒度(blocksize)这里采用 512KB 进行测试。

测试代码如下:

int _tmain(int argc, TCHAR* argv[], TCHAR * env[])
{
// block 512KB
__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 1, false);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 2, false);

_tprintf(TEXT("result : %I64d\n"), nCount);

// block 512KB
nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),
512 * 1024, 4, false);

_tprintf(TEXT("result : %I64d\n"), nCount);

return 0;
}


运行结果:





在IO带宽充足的情况下,多线程能提升性能。

测试二 :分别测试单线程,多线程 和多线程非FileMap下的性能。开启会产生IO干扰的软件(QQ,Chrome浏览器)

由于干扰因素不稳定干扰因素太多,测试在后续补充完成。。。



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  IO