OpenCL 优化后的卷积代码
2016-01-07 19:49
197 查看
自己修改过的 可以直接运行 卷积模板
void prefetch_texture_samples_8x4(image2d_t src, sampler_t s, __local float4 rgb [10][6], int2 gid, int2 lid) { if (lid.x == 0) { // work-item 1 fetches all 60 rgb samples for (int i=-1; i<9; i++) { for (int j=-1; j<5; j++) rgb[i+1][j+1] = read_imagef(src, s, gid+(int2)(i, j)); } } barrier(CLK_LOCAL_MEM_FENCE); } void prefetch_8x4_optimized(image2d_t src, sampler_t s, __local float4 rgb[10][6]) { // Coord of wi0 in NRDange int2 wi0Coord = (int2)(get_group_id(0)*8, get_group_id(1)*4); // 2D to 1D address (from 8x4 to 32x1) int flatLocal = get_local_id(1)*8 + get_local_id(0); // Only first 30 work-items load, each loads 2 values in sequence if (flatLocal < 30) { /* Convert from flatLocal 1D id to 2D, 10x3 */ int i = flatLocal % 10; // Width int j = flatLocal / 10; // Height /* 30 work iteams reads 10x3 values, * values 0-9, 10-19, 20-29 from 10x6 - top half */ rgb[j][i] = read_imagef(src, s, (int2)(wi0Coord.x + i - 1, wi0Coord.y + j - 1)); /* 30 work iteams reads 10x3 values, * values 30-39, 40-49, 50-59 from 10x6 - bottom half */ rgb[j + 3][i] = read_imagef(src, s, (int2)(wi0Coord.x + i - 1, wi0Coord.y + j + 3 - 1)); } barrier(CLK_LOCAL_MEM_FENCE); } __attribute__((reqd_work_group_size(8, 4, 1))) __kernel void blur (image2d_t src, image2d_t dst, sampler_t s, float *weight) { int2 gid = (int2)(get_group_id(0)*8, get_group_id(1)*4); int2 lid = (int2)(get_local_id(0), get_local_id(1)); float4 pixel = 0.0f; __local float4 rgb[10][6]; prefetch_texture_samples_8x4(src, s, rgb, gid, lid); for (int j=-1; j<=1; j++) for (int i=-1; i<=1; i++) pixel += rgb[lid.x+1+i][lid.y+1+i] * weight[(j+1)*3+i+1]; int x = get_global_id(0); int y = get_global_id(1); write_imagef(dst, (int2)(x, y), pixel/9.f); }
相关文章推荐
- mysql-数据类型与java数据类型转化工具类
- Spring Security 3.2.x与Spring 4.0.x的Maven依赖管理
- 06.C#线程Thread
- IntelliJ IDEA导入多个eclipse项目到同一个workspace下
- Python按特定格式实现文件读写
- crfpp python
- php 写入txt文件
- struts2 -- interceptor(如何配置Interceptor)
- java 知识点百题 第一季
- c# 基本控件,窗口程序
- <<梦断代码>>阅读笔记三
- <<梦断代码>>阅读笔记二
- <<梦断代码>>阅读笔记一
- python编写windows平台下脚本二三事
- C和C++内存管理详解
- phpExcel 读取和导出
- [转]eclipse下编写android程序突然不会自动生成R.java文件和包的解决办法
- 欢迎使用CSDN-markdown编辑器
- 代码编写布局的顺序的一点疑惑
- Spring学习笔记—装配Bean