5 Replies Latest reply on Feb 16, 2011 5:20 AM by genaganna

    the code can run on CPU,but cannot run on GPU.

    josling

       

      the code can run on CPU,used to get the median value, which is like this:

      --------------------------------------

      typedef unsigned int pixelvalue;

      #define PIX_SORT(a,b) { if ((a)>(b)) PIX_SWAP((a),(b)); }

      #define PIX_SWAP(a,b) { pixelvalue temp=(a);(a)=(b);(b)=temp; }

       

      pixelvalue opt_med9(pixelvalue * p)

          {

          PIX_SORT(p[1], p[2]);

          PIX_SORT(p[4], p[5]);

          PIX_SORT(p[7], p[8]);

          PIX_SORT(p[0], p[1]);

          PIX_SORT(p[3], p[4]);

          PIX_SORT(p[6], p[7]);

          PIX_SORT(p[1], p[2]);

          PIX_SORT(p[4], p[5]);

          PIX_SORT(p[7], p[8]);

          PIX_SORT(p[0], p[3]);

          PIX_SORT(p[5], p[8]);

          PIX_SORT(p[4], p[7]);

          PIX_SORT(p[3], p[6]);

          PIX_SORT(p[1], p[4]);

          PIX_SORT(p[2], p[5]);

          PIX_SORT(p[4], p[7]);

          PIX_SORT(p[4], p[2]);

          PIX_SORT(p[6], p[4]);

          PIX_SORT(p[4], p[2]);

          return (p[4]);

          }

       

       

       

      int main()

      {

      unsigned int a[9]={1,2,3,4,10,6,7,8,9};

             printf("%d \n",opt_med9(a));

      }

       

      typedef unsigned int T;

       

      -------------------------------------

      than I change to use GPU for median filtering,which is like this (with the same macro);

      ------------------------------------------

      #define _M 3

      #define _N 3

      __kernel 

      void median(__global CB* cb,

      __read_only image2d_t SourceRgbaTex,sampler_t RowSampler,__global unsigned int* uiDest,

      unsigned int uiWidth, unsigned int uiHeight)

          {

       

          unsigned int uiPackedPix=0;

          int gx=get_global_id(0);

          int gy=get_global_id(1);

          // float4 f4Sum = (float4)0.0f;

       

          if(gx < uiWidth && gy<uiHeight)

      {

       

      //int a = 0;

      unsigned int r = 0;

      unsigned int g = 0;

      unsigned int b = 0;

      T wr[_M * _N];

      T wg[_M * _N];

      T wb[_M * _N];

      int n=0;

      for (int krow = 0; krow < _M; krow++)

         {

         //#pragma unroll

         for (int kcol = 0; kcol < _N; kcol++)

      {

      uint4 pixel =read_imageui(SourceRgbaTex, RowSampler,(int2)(gcol + kcol - _N/2,grow + krow - _M/2));

       

      wr[n]= pixel.x;

      wg[n]= pixel.y;

      wb[n]= pixel.z;

      n++;

      }

         }

       

      /*

      for (int i = 0; i <= (_M * _N) / 2; i++)

         {

         for (int j = i + 1; j < _M * _N; j++)

      {

      if (wr > wr[j])

         {

         T tmp = wr;

         wr = wr[j];

         wr[j] = tmp;

         }

      if (wg > wg[j])

         {

         T tmp = wg;

         wg = wg[j];

         wg[j] = tmp;

         }

      if (wb > wb[j])

         {

         T tmp = wb;

         wb = wb[j];

         wb[j] = tmp;

         }

      }

         }

       

      uint4 w =

         { // a PixelARGB on a 32 bit word

         wr[(_M * _N) / 2],

         wg[(_M * _N) / 2],

         wb[(_M * _N) / 2],

         0//(T)((a + ((a & lmask) << 1)) >> shift)

         };

      */

       

              r=opt_med9(wr);

      g=opt_med9(wg);

      b=opt_med9(wb);

       

      uint4 w ={r,g,b,0};

      uiDest[grow * get_global_size(0)+ get_global_id(0)] = rgbaInt4ToUint(w);

       

      }

          }

      -----------------------------------------------------------

      the array of wr,wg,wb can get the value correctly, because when change to the code I commentted out upper,It runs well.

      but change to use opt_med9 function, compiles OK ,but the kernel seems not executed.

       

      anyone can help me?

      thanks.