josling

the code can run on CPU,but cannot run on GPU.

Discussion created by josling on Feb 15, 2011
Latest reply on Feb 16, 2011 by genaganna

 

the code can run on CPU,used to get the median value, which is like this:

--------------------------------------

typedef unsigned int pixelvalue;

#define PIX_SORT(a,b) { if ((a)>(b)) PIX_SWAP((a),(b)); }

#define PIX_SWAP(a,b) { pixelvalue temp=(a);(a)=(b);(b)=temp; }

 

pixelvalue opt_med9(pixelvalue * p)

    {

    PIX_SORT(p[1], p[2]);

    PIX_SORT(p[4], p[5]);

    PIX_SORT(p[7], p[8]);

    PIX_SORT(p[0], p[1]);

    PIX_SORT(p[3], p[4]);

    PIX_SORT(p[6], p[7]);

    PIX_SORT(p[1], p[2]);

    PIX_SORT(p[4], p[5]);

    PIX_SORT(p[7], p[8]);

    PIX_SORT(p[0], p[3]);

    PIX_SORT(p[5], p[8]);

    PIX_SORT(p[4], p[7]);

    PIX_SORT(p[3], p[6]);

    PIX_SORT(p[1], p[4]);

    PIX_SORT(p[2], p[5]);

    PIX_SORT(p[4], p[7]);

    PIX_SORT(p[4], p[2]);

    PIX_SORT(p[6], p[4]);

    PIX_SORT(p[4], p[2]);

    return (p[4]);

    }

 

 

 

int main()

{

unsigned int a[9]={1,2,3,4,10,6,7,8,9};

       printf("%d \n",opt_med9(a));

}

 

typedef unsigned int T;

 

-------------------------------------

than I change to use GPU for median filtering,which is like this (with the same macro);

------------------------------------------

#define _M 3

#define _N 3

__kernel 

void median(__global CB* cb,

__read_only image2d_t SourceRgbaTex,sampler_t RowSampler,__global unsigned int* uiDest,

unsigned int uiWidth, unsigned int uiHeight)

    {

 

    unsigned int uiPackedPix=0;

    int gx=get_global_id(0);

    int gy=get_global_id(1);

    // float4 f4Sum = (float4)0.0f;

 

    if(gx < uiWidth && gy<uiHeight)

{

 

//int a = 0;

unsigned int r = 0;

unsigned int g = 0;

unsigned int b = 0;

T wr[_M * _N];

T wg[_M * _N];

T wb[_M * _N];

int n=0;

for (int krow = 0; krow < _M; krow++)

   {

   //#pragma unroll

   for (int kcol = 0; kcol < _N; kcol++)

{

uint4 pixel =read_imageui(SourceRgbaTex, RowSampler,(int2)(gcol + kcol - _N/2,grow + krow - _M/2));

 

wr[n]= pixel.x;

wg[n]= pixel.y;

wb[n]= pixel.z;

n++;

}

   }

 

/*

for (int i = 0; i <= (_M * _N) / 2; i++)

   {

   for (int j = i + 1; j < _M * _N; j++)

{

if (wr > wr[j])

   {

   T tmp = wr;

   wr = wr[j];

   wr[j] = tmp;

   }

if (wg > wg[j])

   {

   T tmp = wg;

   wg = wg[j];

   wg[j] = tmp;

   }

if (wb > wb[j])

   {

   T tmp = wb;

   wb = wb[j];

   wb[j] = tmp;

   }

}

   }

 

uint4 w =

   { // a PixelARGB on a 32 bit word

   wr[(_M * _N) / 2],

   wg[(_M * _N) / 2],

   wb[(_M * _N) / 2],

   0//(T)((a + ((a & lmask) << 1)) >> shift)

   };

*/

 

        r=opt_med9(wr);

g=opt_med9(wg);

b=opt_med9(wb);

 

uint4 w ={r,g,b,0};

uiDest[grow * get_global_size(0)+ get_global_id(0)] = rgbaInt4ToUint(w);

 

}

    }

-----------------------------------------------------------

the array of wr,wg,wb can get the value correctly, because when change to the code I commentted out upper,It runs well.

but change to use opt_med9 function, compiles OK ,but the kernel seems not executed.

 

anyone can help me?

thanks.



Outcomes