您的位置:首页 > 其它

CUDA之旅:矩阵相加

2017-06-07 18:43 141 查看
矩阵相加CUDA实现

//矩阵相加的CUDA程序实现
//Author: Eric Lv
//Email: Eric2014_Lv@sjtu.edu.cn
//Date: 6/7/2017

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
//#include <cuda.h>

#define N 32

__global__ void matrix_add(const int a[]
, const int b[]
, int c[]
)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int idy = blockIdx.y * blockDim.y + threadIdx.y;
c[idx][idy] = a[idx][idy] + b[idx][idy];
}

int main(void)
{
int i;
int *dev_a, *dev_b, *dev_c;
int *host_a, *host_b, *host_c;

//分配block里面线程的维数 N*N
dim3 threads_in_block (N, N);

cudaError_t err = cudaSuccess;

host_a = (int *)malloc( sizeof(int) * N * N );
host_b = (int *)malloc( sizeof(int) * N * N );
host_c = (int *)malloc( sizeof(int) * N * N );

err = cudaMalloc((void **)&dev_a, sizeof(int) * N * N);

if(err != cudaSuccess)
{
printf("cudaMalloc (a) is failed!\n");
return -1;
}
err = cudaMalloc((void **)&dev_b, sizeof(int) * N * N);

if(err != cudaSuccess)
{
printf("cudaMalloc (b) is failed!\n");
return -1;
}
err = cudaMalloc((void **)&dev_c, sizeof(int) * N * N);

if(err != cudaSuccess)
{
printf("cudaMalloc (c) is failed!\n");
return -1;
}

for(i = 0; i < N * N; i++)
{
host_a[i] = 2*i+1;
host_b[i] = 3*i-1;
}

err = cudaMemcpy(dev_a, host_a, sizeof(int) * N * N, cudaMemcpyHostToDevice);
if(err != cudaSuccess)
{
printf("Host to device (a) is failed!\n");
return -1;
}
err = cudaMemcpy(dev_b, host_b, sizeof(int) * N * N, cudaMemcpyHostToDevice);
if(err != cudaSuccess)
{
printf("Host to device (b) is failed!\n");
return -1;
}

// 调用GPU上的核函数
matrix_add<<<1, threads_in_block>>>((int (*)
)dev_a, (int (*)
)dev_b, (int (*)
)dev_c);

err = cudaMemcpy(host_c, dev_c, sizeof(int) * N * N, cudaMemcpyDeviceToHost);

if(err != cudaSuccess)
{
printf("Device to host (c) is failed!\n");
return -1;
}

for (i = 0; i < N * N; i++)
{
if (host_a[i] + host_b[i] != host_c[i])
{
printf("a[%d]%d + b[%d]%d != c[%d]%d.\n", i, host_a[i], i, host_b[i], i, host_c[i]);
return -1;
}
}
printf("Congratulations! All entris are correct! You have finished the CUDA code!\n");

free(host_a);
free(host_b);
free(host_c);

cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);

return 0;

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: