您的位置:首页 > Web前端

caffe源码分析--SyncedMemory类

2016-08-25 14:16 204 查看
转载自http://blog.csdn.net/lingerlanlan/article/details/24379607

数据成员

private:
void to_cpu();    //把数据放到cpu上
void to_gpu();    //把数据放到gpu上
void* cpu_ptr_;   //指向数据在CPU中的位置
void* gpu_ptr_;   //指向数据在GPU中的位置
size_t size_;     //数据的大小
SyncedHead head_; /* 表示数据的状态,有四种状态,分别是未初始化,数据在cpu中,数据在gpu中,数据在cpu和gpu中都有
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };/
bool own_cpu_data_;        //拥有cpu数据
bool cpu_malloc_use_cuda_; //是否是使用cuda分配的cpu数据
bool own_gpu_data_;        //拥有gpu数据
int gpu_device_;           //gpu的编号


成员函数

构造函数
SyncedMemory()
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false),
gpu_device_(-1) {}
explicit SyncedMemory(size_t size) //只是设置了size
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false),
gpu_device_(-1) {}</span>

析构函数
SyncedMemory::~SyncedMemory() {
if (cpu_ptr_ && own_cpu_data_) {
CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
}
//宏,在没有定义CPU_ONLY下运行,直到遇见endif
#ifndef CPU_ONLY
if (gpu_ptr_ && own_gpu_data_) {
int initial_device;
cudaGetDevice(&initial_device);
if (gpu_device_ != -1) {
CUDA_CHECK(cudaSetDevice(gpu_device_));
}
CUDA_CHECK(cudaFree(gpu_ptr_));
cudaSetDevice(initial_device);
}
#endif  // CPU_ONLY
}

其他函数
SyncedHead head() { return head_; }
size_t size() { return size_; }

inline void SyncedMemory::to_cpu() {
switch (head_) {
case UNINITIALIZED: //数据未定义
CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_); //分配cpu上存储空间
caffe_memset(size_, 0, cpu_ptr_);
head_ = HEAD_AT_CPU;  //首先定义在cpu上
own_cpu_data_ = true;
break;
case HEAD_AT_GPU:  //数据在gpu上
#ifndef CPU_ONLY
if (cpu_ptr_ == NULL) {
CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
own_cpu_data_ = true;
}
caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
head_ = SYNCED; //如此cpu和gpu上都有数据了
#else
NO_GPU;
#endif
break;
case HEAD_AT_CPU:
case SYNCED:
break;
}
}//head_在经过to_cpu后只有两个状态:SYNCED和HEAD_AT_CPU

inline void SyncedMemory::to_gpu() {
#ifndef CPU_ONLY
switch (head_) {
case UNINITIALIZED:
CUDA_CHECK(cudaGetDevice(&gpu_device_));
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
caffe_gpu_memset(size_, 0, gpu_ptr_);
head_ = HEAD_AT_GPU; //首先定义在gpu上
own_gpu_data_ = true;
break;
case HEAD_AT_CPU:
if (gpu_ptr_ == NULL) {
CUDA_CHECK(cudaGetDevice(&gpu_device_));
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
}
caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
head_ = SYNCED;
break;
case HEAD_AT_GPU:
case SYNCED:
break;
}
#else
NO_GPU;
#endif
}//head_在经过to_gpu后只有两个状态:SYNCED和HEAD_AT_GPU
const void* SyncedMemory::cpu_data() { //拷贝数据到cpu,并返回cpu上的数据地址
to_cpu();
return (const void*)cpu_ptr_;
}//不改变head_状态

void SyncedMemory::set_cpu_data(void* data) {
CHECK(data);
if (own_cpu_data_) {
CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);  //删除原数据
}
cpu_ptr_ = data;
head_ = HEAD_AT_CPU;
own_cpu_data_ = false;
}

const void* SyncedMemory::gpu_data() {
#ifndef CPU_ONLY
to_gpu();
return (const void*)gpu_ptr_;
#else
NO_GPU;
return NULL;
#endif
}//不改变head_状态

void SyncedMemory::set_gpu_data(void* data) {
#ifndef CPU_ONLY
CHECK(data);
if (own_gpu_data_) {
int initial_device;
cudaGetDevice(&initial_device);
if (gpu_device_ != -1) {
CUDA_CHECK(cudaSetDevice(gpu_device_));
}
CUDA_CHECK(cudaFree(gpu_ptr_));
cudaSetDevice(initial_device);
}
gpu_ptr_ = data;
head_ = HEAD_AT_GPU;
own_gpu_data_ = false;
#else
NO_GPU;
#endif
}

void* SyncedMemory::mutable_cpu_data() {
to_cpu();
head_ = HEAD_AT_CPU;   //head_在经过mutable_to_cpu后只有一个状态:HEAD_AT_CPU
return cpu_ptr_;
}

void* SyncedMemory::mutable_gpu_data() {
#ifndef CPU_ONLY
to_gpu();
head_ = HEAD_AT_GPU; //head_在经过mutable_to_cpu后只有一个状态:HEAD_AT_CPU
return gpu_ptr_;
#else
NO_GPU;
return NULL;
#endif
}
#ifndef CPU_ONLY
void SyncedMemory::async_gpu_push(const cudaStream_t& stream) {
CHECK(head_ == HEAD_AT_CPU);
if (gpu_ptr_ == NULL) {
CUDA_CHECK(cudaGetDevice(&gpu_device_));
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
}
const cudaMemcpyKind put = cudaMemcpyHostToDevice;
CUDA_CHECK(cudaMemcpyAsync(gpu_ptr_, cpu_ptr_, size_, put, stream));
// Assume caller will synchronize on the stream before use
head_ = SYNCED;
}
#endif

内存管理
inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) {
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaMallocHost(ptr, size));
*use_cuda = true;
return;
}
#endif
*ptr = malloc(size);
*use_cuda = false;
CHECK(*ptr) << "host allocation of size " << size << " failed";
}

inline void CaffeFreeHost(void* ptr, bool use_cuda) {
#ifndef CPU_ONLY
if (use_cuda) {
CUDA_CHECK(cudaFreeHost(ptr));
return;
}
#endif
free(ptr);
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  caffe c++