您的位置:首页 > 其它

cuda 简单数组运算

2014-12-29 11:07 183 查看
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#define data_size 1026
#define thread_num 256
using namespace std;
__global__ static void sumOfSquares(int *d_idata,int *d_odata)
{
const int tid=threadIdx.x;
printf("%d ",tid);
d_odata[tid]=d_idata[tid]*d_idata[tid];

__syncthreads();

}
int main()
{
int h_idata [data_size];
for (int i = 0; i < data_size; i ++)
{
h_idata[i] = i;//rand()%10;

}

int * d_idata;
int * d_odata;
cudaMalloc((void **)&d_idata,sizeof(int)*data_size);
cudaMalloc((void **)&d_odata,sizeof(int)*data_size);

cudaMemcpy(d_idata,h_idata,sizeof(int)*data_size,cudaMemcpyHostToDevice);

sumOfSquares<<<1,data_size,0>>>(d_idata,d_odata);

int gpu_sum[data_size];
cudaMemcpy(&gpu_sum,d_odata,sizeof(int)*data_size,cudaMemcpyDeviceToHost);
cudaFree(d_idata);
cudaFree(d_odata);
int final_gpu_sum=0;
for (int i=0;i<data_size;i++)
{
final_gpu_sum+=gpu_sum[i];

}
printf("final_gpu_sum=%d\n",final_gpu_sum);

int cpu_sum = 0;
for(int i = 0; i < data_size; i++)
{
cpu_sum+= h_idata[i] * h_idata[i];
}
printf("cpu_sum: %d\n", cpu_sum);

cin.get();
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: