您的位置:首页 > 其它

cuda 学习(三) Page-Locked Host Memory

2016-09-08 20:14 363 查看
一、cudaHostAlloc的使用

#include <iostream>
#include <numeric>
#include <stdlib.h>

__global__ void add1(float* input){
int idx = threadIdx.x;
input[idx] += idx;
}
int main(void)
{
float* temp;
cudaHostAlloc(&temp, sizeof(float)*12, cudaHostAllocDefault);
for(int i = 0; i < 12; ++i){
temp[i] = i;
}
add1<<<1,12>>>(temp);
for(int i = 0; i < 12; ++i){
std::cout<< temp[i] << std::endl;
}
cudaFreeHost(temp);
return 0;
}


二、cudaHostRegister与cudaHostGetDevicePointer使用

#include <iostream>
#include <numeric>
#include <stdlib.h>

__global__ void add1(float* input){
int idx = threadIdx.x;
input[idx] += idx;
}
int main(void)
{
float* temp = (float*)malloc(sizeof(float)*12);
cudaHostRegister(temp, sizeof(float)*12, cudaHostRegisterMapped);
for(int i = 0; i < 12; ++i){
temp[i] = i;
}
float* device;
cudaHostGetDevicePointer(&device, temp, 0);
add1<<<1,12>>>(device);
for(int i = 0; i < 12; ++i){
std::cout<< temp[i] << std::endl;
}
cudaHostUnregister(temp);
return 0;
}


运行结果是:0 2 4 6 8 10 12 14 16 18 20 22

这两个函数有一个参数是flag,对应的含义为:

Portable Memory: a block of page-locked memory can be used in conjunction with any device in the
system
Write-Combining Memory: By default page-locked host memory is allocated as cacheable, it can improve the transportation from cpu to gpu.
Mapped Memory:a block of page-locked memory can be used in conjunction with any device in the
system
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: