pavandsp

having problem  in using _local  in kernel

Discussion created by pavandsp on May 17, 2010
Latest reply on May 19, 2010 by pavandsp

Hi,

I have a problem in using local memory in kernel to optimize the performance.

I am not getting output from kernel.Please rectify me if i am doing anything weired with local qualifier.output buffer size=1280x720x3.

global thread=1280x720

local thread =16x16

GPU:ATI HD 5850,sdk 2.1, ubuntu 9.10.

 

 

#pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable #define LOCAL_ROW_SIZE 16 #define LOCAL_COL_SIZE 16 __kernel void kernel(__global unsigned char * output, const int rc,const int gc,const int bc , const int ravg,const int gavg,const int bavg, const int ra,const int ga,const int ba) { int B[3]; uint j; __local unsigned char local_temp[LOCAL_ROW_SIZE][LOCAL_COL_SIZE*3]; uint tx = get_global_id(0) * 3; uint ty = get_global_id(1) * 3; uint lx = get_local_id(0); uint ly = get_local_id(1); uint gx = get_group_id(0); uint gy = get_group_id(1); local_temp[ly][lx * 3 + 0] = output[ty * 1280 + tx + 0]; local_temp[ly][lx * 3 + 1] = output[ty * 1280 + tx + 1]; local_temp[ly][lx * 3 + 2] = output[ty * 1280 + tx + 2]; barrier(CLK_LOCAL_MEM_FENCE); B[0] = ((local_temp[ly][lx * 3 + 0] - rc) * ravg) + ra; B[1] = ((local_temp[ly][lx * 3 + 1] - gc) * gavg) + ga; B[2] = ((local_temp[ly][lx * 3 + 2] - bc) * bavg) + ba; for(j=0;j<3;j++) { if(B[j] < 0) B[j] = 0; if(B[j] > 255) B[j] = 255; output[ty * 1280 + tx + j ] = B[j]; } }

Outcomes