cuda,内存变量定义
2016-03-24 16:13
309 查看
#include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdlib.h>
#include <conio.h>
using namespace std;
__global__ void func1(int* data1, int* data2, int* data3);
__device__ void func2(int *data2, int i);
#define ARRAY_SIZE 10
#define ARRAY_SIZE_IN_BYTES (sizeof(int)*(ARRAY_SIZE)) //定义了一个int型长度128的数组
int cpu_data1[ARRAY_SIZE];
int cpu_data2[ARRAY_SIZE];
int cpu_data3[ARRAY_SIZE];
int main()
{
const int num_blocks = 1;
const int num_threads = ARRAY_SIZE / num_blocks;
int * gpu_data1;
int * gpu_data2;
int * gpu_data3;
cudaMalloc((void **)&gpu_data1, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)&gpu_data2, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)&gpu_data3, ARRAY_SIZE_IN_BYTES);
func1 << <num_blocks, num_threads >> >(gpu_data1, gpu_data2, gpu_data3);
cudaMemcpy(cpu_data1, gpu_data1, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_data2, gpu_data2, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_data3, gpu_data3, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaFree(gpu_data1);
cudaFree(gpu_data2);
cudaFree(gpu_data3);
for (int i = 0; i < ARRAY_SIZE; i++)
{
cout << cpu_data1[i] << " " << cpu_data2[i] << " " << cpu_data3[i] << endl;
}
cin.get();
return 0;
}
__device__ int A = 10;
__constant__ int static C=10;
__shared__ unsigned int B;
__global__ void func1(int* data1, int* data2, int* data3)
{
int i = (blockIdx.x*blockDim.x) + threadIdx.x;
if (i % 2)
{
A = A + 1;
data1[i] = A;
B = B + 10;
data2[i] = B;
data3[i] &= C;
}
}
__device__ void func2(int *data2, int i)
{
}
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdlib.h>
#include <conio.h>
using namespace std;
__global__ void func1(int* data1, int* data2, int* data3);
__device__ void func2(int *data2, int i);
#define ARRAY_SIZE 10
#define ARRAY_SIZE_IN_BYTES (sizeof(int)*(ARRAY_SIZE)) //定义了一个int型长度128的数组
int cpu_data1[ARRAY_SIZE];
int cpu_data2[ARRAY_SIZE];
int cpu_data3[ARRAY_SIZE];
int main()
{
const int num_blocks = 1;
const int num_threads = ARRAY_SIZE / num_blocks;
int * gpu_data1;
int * gpu_data2;
int * gpu_data3;
cudaMalloc((void **)&gpu_data1, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)&gpu_data2, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)&gpu_data3, ARRAY_SIZE_IN_BYTES);
func1 << <num_blocks, num_threads >> >(gpu_data1, gpu_data2, gpu_data3);
cudaMemcpy(cpu_data1, gpu_data1, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_data2, gpu_data2, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_data3, gpu_data3, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaFree(gpu_data1);
cudaFree(gpu_data2);
cudaFree(gpu_data3);
for (int i = 0; i < ARRAY_SIZE; i++)
{
cout << cpu_data1[i] << " " << cpu_data2[i] << " " << cpu_data3[i] << endl;
}
cin.get();
return 0;
}
__device__ int A = 10;
__constant__ int static C=10;
__shared__ unsigned int B;
__global__ void func1(int* data1, int* data2, int* data3)
{
int i = (blockIdx.x*blockDim.x) + threadIdx.x;
if (i % 2)
{
A = A + 1;
data1[i] = A;
B = B + 10;
data2[i] = B;
data3[i] &= C;
}
}
__device__ void func2(int *data2, int i)
{
}
相关文章推荐
- 如果说人生是自我编写的程序
- 基于ubuntu13.04搜狗输入法安装方法
- 七种排序算法---Java实现
- java 遍历的4种方法
- digit image process --- python-skimage
- IntelliJ IDEA 14.x 与 Tomcat 集成,创建并运行Java Web项目
- VS2013 编译ffmpeg
- 对象的创建
- 动态分析maillog日志,把恶意链接直接用防火墙禁止
- 实现自己的字符串类String
- 即使被拖库,也可以保证密码不泄露
- zoj 2587 kmp的应用
- 百度前端技术学院-task1.8源代码
- [Awt]——图片获取方式
- ftp连接不上的问题
- 多态的详细介绍
- JVM内存划分
- Json 和 Jsonlib 的使用
- 安卓应用反编译(三)-结束
- KMP算法