您的位置:首页 > 运维架构

OpenCL实现向量加的简单例子

2011-01-06 21:33 429 查看
看了两天OpenCL的入门书,对OpenCL编程的流程有了大概的了解。下面是一个实现向量加的例子,从网上拿过来的,刚开始有些问题,修改以后可以跑通。代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>

#define LEN(arr) (sizeof(arr) / sizeof(arr[0]))

//kernel source code
const char* src[] = {
"__kernel void vec_add(__global const float *a, __global const float *b, __global float *c)/n",
"{/n",
"	int gid = get_global_id(0);/n",
"	c[gid] = a[gid] + b[gid];/n",
"}/n"
};

int main()
{

cl_uint numPlatforms;
cl_platform_id platform = NULL;
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if(status != CL_SUCCESS)
{
printf("error: getting platforms/n");
exit(1);
}
cl_platform_id *platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(status != CL_SUCCESS)
{
printf("error: getting platform IDs/n");
exit(1);
}
platform = platforms[0];
cl_context_properties cps[3] ={
CL_CONTEXT_PLATFORM,
(cl_context_properties) platform, 0};
cl_context_properties *cprops =
(NULL == platform) ? NULL :cps;
//create OpenCL context
cl_context context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
if(status != CL_SUCCESS)
{
printf("create context error/n");
exit(1);
}

//get device id from context
size_t cb;
status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
if(status != CL_SUCCESS)
{
printf("get context info 1 error/n");
exit(1);
}
cl_device_id *devices= malloc(cb);
status = clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
if(status != CL_SUCCESS)
{
printf("get context info 2 error/n");
exit(1);
}
//create a command queue
cl_command_queue cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL);

//create kernel
cl_program program = clCreateProgramWithSource(context, LEN(src),src, NULL, NULL);
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "vec_add", NULL);

//host initialize
size_t n = 5;
float srcA[] = {1, 2, 3, 4, 5};
float srcB[] = {5, 4, 3, 2, 1};
float dst
;

//set the input and output arguments of kernel
cl_mem memobjs[3];
memobjs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*n, srcA, NULL);
memobjs[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*n, srcB, NULL);
memobjs[2] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*n, NULL, NULL);

//set "a", "b", "c" vector argument
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]);
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobjs[1]);
status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&memobjs[2]);

size_t global_work_size[1] = {n};

//execute kernel
status = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);

//read output array
status = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE, 0, n*sizeof(cl_float), dst, 0, NULL, NULL);

for(int i=0; i<n; ++i)
{
printf("-> %.2f/n", dst[i]);
}
return 0;
}


从以上的代码可以基本得出OpenCL编程的流程:

1、获得平台,clGetPlatformIDs

2、创建上下文,clCreateContextFromType,这个函数的第一个参数为NULL时在AMD的平台上运行不能通过,必须为该函数指定第一个参数。

3、通过上下文得到设备信息,clGetContextInfo

4、为相应设备创建comandQueue, clCreateCommandQueue

5、创建源程序,build源程序,生成kernel

6、分配buffer空间,逐个设置程序参数。

7、执行kernel,clEnqueueNDRangeKernel

8、从buffer读回数据,clEnqueueReadBuffer
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: