12 Replies Latest reply on Dec 10, 2014 2:51 AM by dipak

    Exception calling clBuildProgram on my kernel

    boxerab

      This must be karma

       

      My system:

      Windows 7 64 bits with HD7700 card

      Latest 14.9.2 Beta Driver

       

       

      When I try to compile the following kernel with clBuildProgram, I get the following exception:

       

      Unhandled exception at 0x000007FED7C9BEA2 (amdocl64.dll) in Roger.exe: 0xC0000005: Access violation reading location 0x0000000000000018.

       

      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

      #define CODE_BLOCK_WIDTH 32

      #define CODE_BLOCK_HEIGHT 32

       

      #define BOUNDARY 1

      #define BOUNDARY_X2 2

       

      #define BUFFER_STRIDE 34

      #define BUFFER_SIZE 640

       

      #define CURRENT_LOCATION_OFFSET 0

      #define NBH_OFFSET 34

      #define SIGMA_OLD_OFFSET 68

      #define SIGMA_NEW_OFFSET 102

      #define CXD_BUFFER_OFFSET 137

       

      #define INPUT_CODE_BLOCK_SIZE 512

       

      #define STRIPE_COLUMN_MASK 0xF

      #define NBH_FULL_MASK 7

      #define NBH_END_MASK  8

       

      #define SET_BIT(val,position) (val) = (val) | (1 << (position));

      #define CLEAR_BIT(val,position) (val) = (val) & ~(1 << (position));

       

      ////////////////////

      // Context Variables

       

      #define SPP 0

      #define MRP 1

      #define CUP 2

      #define CUP_RLC 3

       

      void kernel run(global uint* restrict idata, global uint* restrict msb, uint numChannels, uint width, uint height, uint decompositionLevels) {

       

      if (get_global_id(0) >= width || get_global_id(1) >= (height >> 5) )

      return;

       

      // state + context-decision buffer

      local uint buff[BUFFER_SIZE];

       

      //////////////////////////////////////////////////////////////////////////

      // calculate band:

      // 0 indicates LL, 1 indicates LH or HL, and 2 indicates HH

      int x = get_global_id(0);

      int y = get_global_id(1);

      int w = width;

          int h = height;

      int wNext = w >>1;

      int hNext = h >>1;

       

      int band = 0;  // initialize to LL band of highest decomposition level (== decompositionLevels)

       

      // iterator through decomposition level 1 through decompositionLevels-1

      for (int i = 1; i < decompositionLevels; ++i) {

      band += ( (x >= wNext && x < w) ||( y >= hNext && y < h) ) * (  x/wNext + y/hNext );

      x >>=1;

      y >>=1;

      w = wNext;

      h = hNext;

      wNext >>= 1;

      hNext >>=1;

      }

      //////////////////////////////////////////////////////////////////////////////

       

      // iterator through all channels

      for (int channelNumber = 0; channelNumber < numChannels; ++channelNumber) {

      int blockGridX = get_global_id(0) / CODE_BLOCK_WIDTH;

      int blockGridY = get_global_id(1)/CODE_BLOCK_HEIGHT;

      int blockGridOffset =  blockGridX + blockGridY * get_num_groups(0);

       

      uint msbVal =  (msb[blockGridOffset] >> (channelNumber * 8)) & 0xFF;

      if (msbVal == 0)

      return;

       

      // pointer to first row of current input code block

      global uint* src =  idata + (channelNumber * (get_num_groups(0)*get_num_groups(1)) + blockGridOffset) * INPUT_CODE_BLOCK_SIZE + get_local_id(0);

       

      // read in sign

      uint sgn = *src;

      src += CODE_BLOCK_WIDTH;

       

       

      //current code block

      uint current = *src;

      src += CODE_BLOCK_WIDTH;

      uint currentOffset = BOUNDARY + get_local_id(0);

       

      buff[currentOffset] = current;

      // nbh is not used in MSB CUP, so no need to initialize

      buff[currentOffset + SIGMA_OLD_OFFSET] = current;

      buff[currentOffset + SIGMA_NEW_OFFSET] = 0;

       

      // zero out location boundary columns

      if (get_local_id(0) == 0 || get_local_id(0) == CODE_BLOCK_WIDTH-1) {

       

      int delta = -1 + ((get_local_id(0)/(CODE_BLOCK_WIDTH-1)) << 1); // -1 or +1

      local uint* buffPtr = buff + currentOffset + delta;

       

      buffPtr[0] = 0;

      buffPtr += BUFFER_STRIDE;

      buffPtr[0] = 0;

      buffPtr += BUFFER_STRIDE;

      buffPtr[0] = 0;

      buffPtr += BUFFER_STRIDE ;

      buffPtr[0] = 0;

      }

       

      barrier(CLK_LOCAL_MEM_FENCE);

       

       

      // 2. calculate neighbourhood bits

      // (since this is MSB CUP, significance value is equivalent to bit value)

      uint top = current >> 1;

      uint left = buff[get_local_id(0)];

      uint leftTop = left >> 1;

      uint leftBottom = left << 1;

       

       

      int nbhMinusBottom = (top | leftTop | left);

      int nbh = nbhMinusBottom | leftBottom;

       

      // 3. CUP on 32 locations in column

      for (int i = 0; i < 8; ++i) {

       

      int nbhStripe = (nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)));

      int stripeBits = (current & (NBH_FULL_MASK << (i*4))) | (current & (NBH_END_MASK << (i*4)));

      if (!nbhStripe) {

      if (!stripeBits) {

      //RLC entire strip

       

      } else {

      // count trailing zeros

      uchar ctz = 7-clz(stripeBits & -stripeBits);

      }

      }

          for (int j = 0; j < 3; ++j) {

      }

      }

       

      // update sigma old

      buff[currentOffset + SIGMA_OLD_OFFSET] = current;

      barrier(CLK_LOCAL_MEM_FENCE);

       

      // 5 MQ Coding for CUP

      if (get_local_id(0) == 0 && get_local_id(1) == 0) {

      local uint* cxdPtr      = buff + CXD_BUFFER_OFFSET;

      for (int i = 0; i < CODE_BLOCK_HEIGHT>>2; ++i) {

      for (int j = 0; j < CODE_BLOCK_WIDTH; ++j) {

      cxdPtr[0] = 0;

      cxdPtr++;

      }

      cxdPtr+= BOUNDARY_X2;

      }

       

      }

       

      local char blockVote;

      local char zeroBitplane;

       

      for (int i = 0; i < msbVal-2; ++i) {

       

      blockVote = 0;

      zeroBitplane = 0;

       

      // 1. read in next bit, update sigma_old, clear sigma_new and calculate neighbourhood

      uint current = *src;

      src += CODE_BLOCK_WIDTH;

      buff[currentOffset] = current;

      uint sigmaOld = buff[currentOffset + SIGMA_OLD_OFFSET] | buff[currentOffset + SIGMA_NEW_OFFSET];

      buff[currentOffset + SIGMA_OLD_OFFSET] = sigmaOld;

      uint sigmaNew = 0;

       

      barrier(CLK_LOCAL_MEM_FENCE);

       

      // 2. preprocess bit plane

       

      // calculate neighbourhood bits

      uint left = buff[get_local_id(0)];

      uint leftTop = left >> 1;

      uint leftBottom = left << 1;

       

      uint right = buff[get_local_id(0) + BOUNDARY_X2];

      uint rightTop = right >> 1;

      uint rightBottom = right << 1;

       

      uint top = current >> 1;

      uint bottom = current << 1;

       

      int nbhMinusBottom =  rightTop | top | leftTop | left |  right;

      int nbh  = nbhMinusBottom | leftBottom | bottom | rightBottom;

       

      // 3. update significance on column

      int index = 0;

      for (int i = 0; i < 8; ++i) {

      int nbhStripe = ((nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)))) >> (i*4);

      int stripeBits = current >> (i*4);

      for (int j = 0; j < 3; ++j) {

        

      if ( (stripeBits&1) && (nbhStripe&1) && !(sigmaOld&1) ) {

          SET_BIT(sigmaNew, index);

      blockVote = 1;

      } else {

      CLEAR_BIT(sigmaNew, index);

      }

      nbhStripe >>= 1;

      stripeBits >>= 1;

      sigmaOld >>= 1;

      index++;

      }

      }

      buff[currentOffset + SIGMA_NEW_OFFSET] = sigmaNew;

      while (blockVote) {

       

      blockVote = 0;

      barrier(CLK_LOCAL_MEM_FENCE);

       

      }

       

      //2. bpc

      for (int i = 0; i < 8; ++i) {

      for (int j = 0; j < 3; ++j) {

      }

      }

         src += CODE_BLOCK_WIDTH;

      }

      }

      }

       

      ////////////////////////////////////////////////////////////////////////////////////////////////