您的位置:首页 > 其它

[菜鸟每天来段CUDA_C]CUDA实现简单热传导动态模拟

2013-12-03 15:14 375 查看
本文利用CUDA实现简单二维平面上的热传导模拟。假设有一个矩形房间,将其分成一个网格,在网格中随机散布一些热源,热源有不同的固定温度,然后就可以计算网格中每个单元的温度随时间的变化情况。本文将热传导模型做如下简化:某单元的新时刻温度等于原有温度加其与邻接单元的温差,邻接单元取上下左右四个单元。

温度更新算法:

Step1: 给定一个包含初始输入温度的网格,将其作为热源温度值复制到网格对应的单元;

Step2: 规定一个输入温度网格,根据热传导模型更新温度到输出网格;

Step3: 交换输入温度网格和输出温度网格的数据,转step1.

实验结果如下:下图显示按先后顺序的四个不同时刻的热量分布及变化情况。



主要程序代码如下:

/********************************************************************
*  heatTransfer.cu
*********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cutil_inline.h>
#include "CPUAnim.h"

#define DIM 1024
#define MAX_TEMP 1.0f
#define MIN_TEMP 0.0001f
#define SPEED 0.25f

struct DataBlock
{
unsigned char	*outputBitmap;
float			*dev_inSrc;
float			*dev_outSrc;
float			*dev_constSrc;
CPUAnimBitmap	*bitmap;

cudaEvent_t		start, stop;
float			totalTime;
float			frames;
};

template< typename T >
void swap( T& a, T& b ) {
T t = a;
a = b;
b = t;
}

/************************************************************************/
/* Init CUDA                                                            */
/************************************************************************/
bool InitCUDA(void)
{
......
}

/************************************************************************/
__global__ void copyConstKernel(float *iPtr, const float *cPtr)
{
int x = threadIdx.x + blockIdx.x + blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;

if (cPtr[offset] != 0)
{
iPtr[offset] = cPtr[offset];
}
}

__global__ void blendKernel(float* outSrc, const float* inSrc)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;

int left = offset - 1;
int right = offset + 1;
if (x == 0)
{
left++;
}
if (x == DIM - 1)
{
right--;
}

int top = offset - DIM;
int bottom = offset + DIM;
if (y == 0)
{
top += DIM;
}
if (y == DIM -1)
{
bottom -= DIM;
}

outSrc[offset] = inSrc[offset] + SPEED * (inSrc[top] + inSrc[bottom]
+ inSrc[left] + inSrc[right] - 4 * inSrc[offset]);
}

__device__ unsigned char value( float n1, float n2, int hue )
{
......
}
__global__ void floatToColor( unsigned char *optr,
const float *outSrc )
{
......
}

void animGPU(DataBlock *d, int ticks)
{
cutilSafeCall(cudaEventRecord(d->start, 0));
dim3	blocks(DIM/16, DIM/16);
dim3	threads(16, 16);
CPUAnimBitmap	*bitmap = d->bitmap;

for (int i=0; i<90; i++)
{
copyConstKernel<<<blocks, threads>>>(d->dev_inSrc, d->dev_constSrc);
blendKernel<<<blocks, threads>>>(d->dev_outSrc, d->dev_inSrc);
swap(d->dev_inSrc, d->dev_outSrc);
}
floatToColor<<<blocks, threads>>>(d->outputBitmap, d->dev_inSrc);

cudaMemcpy(bitmap->get_ptr(), d->outputBitmap, bitmap->image_size(), cudaMemcpyDeviceToHost);
cutilSafeCall(cudaEventRecord(d->stop, 0));
cutilSafeCall(cudaEventSynchronize(d->stop));

float elapsedTime;
cutilSafeCall(cudaEventElapsedTime(&elapsedTime, d->start, d->stop));
d->totalTime += elapsedTime;
++d->frames;

printf("Average Time per frame: %3.1f ms\n", d->totalTime/d->frames);
}

void animExit(DataBlock *d)
{
cudaFree(d->dev_inSrc);
cudaFree(d->dev_outSrc);
cudaFree(d->dev_constSrc);

cutilSafeCall(cudaEventDestroy(d->start));
cutilSafeCall(cudaEventDestroy(d->stop));
}
/************************************************************************/

int main(int argc, char* argv[])
{

if(!InitCUDA()) {
return 0;
}

DataBlock	data;
CPUAnimBitmap bitmap(DIM, DIM, &data);
data.bitmap = &bitmap;
data.totalTime = 0;
data.frames = 0;

cutilSafeCall(cudaEventCreate(&data.start));
cutilSafeCall(cudaEventCreate(&data.stop));

cutilSafeCall(cudaMalloc((void**)&data.outputBitmap, data.bitmap->image_size()));

cutilSafeCall(cudaMalloc((void**)&data.dev_inSrc, data.bitmap->image_size()));
cutilSafeCall(cudaMalloc((void**)&data.dev_outSrc, data.bitmap->image_size()));
cutilSafeCall(cudaMalloc((void**)&data.dev_constSrc, data.bitmap->image_size()));

float *temp = (float*)malloc(bitmap.image_size());
for (int i=0; i<DIM*DIM; i++)
{
temp[i] = 0;
int x = i % DIM;
int y = i / DIM;

if ((x>300) && (x<600) && (y>301) && (y<601))
{
temp[i] = MAX_TEMP;
}
}
temp[DIM*100+100] = (MAX_TEMP + MIN_TEMP) / 2;
temp[DIM*700+100] = MIN_TEMP;
temp[DIM*300+300] = MIN_TEMP;
temp[DIM*200+700] = MIN_TEMP;

for (int y=800; y<900; y++)
{
for (int x=400; x<500; x++)
{
temp[x+y*DIM] = MIN_TEMP;
}
}

cutilSafeCall(cudaMemcpy(data.dev_constSrc, temp, bitmap.image_size(), cudaMemcpyHostToDevice));

for(int y=800; y<DIM; y++)
{
for (int x=0; x<200; x++)
{
temp[x+y*DIM] = MAX_TEMP;
}
}
cutilSafeCall(cudaMemcpy(data.dev_inSrc, temp, bitmap.image_size(), cudaMemcpyHostToDevice));
free(temp);
bitmap.anim_and_exit((void(*)(void*, int))animGPU, (void(*)(void*))animExit);
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: