AnsweredAssumed Answered

clBuildProgram had an unhandled exception

Question asked by danval on Jun 12, 2014
Latest reply on Aug 26, 2014 by dipak

The kernel below succeeds to compile in some devices, but gives an unhandled exception in others. I've put it through CodeXL in Analyze Mode, and got the error below. Minor changes to the code tend to hide the problem. If this is a compiler issue that will take time to fix, can you please suggest any work-around?

 

Error message from CodeXL in Analyze Mode:

========== Build started: Building test.cl on 21 devices. ==========

Compiling device:         Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz... Succeeded!

Compiling device: Barts... Succeeded!

Compiling device: BeaverCreek... Succeeded!

Compiling device: Bonaire... ...Failed!

OpenCL Compile Error: clBuildProgram had an unhanded exception.

--------

Compiling device: Caicos...

 

Kernel code:

 

#define  BLCUTOFFR                              3
#define  TILESIZ_X                             32
#define  TILESIZ_Y                             26
#define  WINDOWSIZ_Y                           32
#define  WINDOWSIZ_X                           40




inline float myfunct(int i,float pixD, float pixIn) {
  float i_f = convert_float_rte(i);
  float dR2  = i_f*i_f;             
  float pixDiff= pixD-pixIn;        
  float dI2  = pixDiff*pixDiff;     
  float   aux    = 2.f*(  -dR2*0.01f  -  dI2*0.05f  );
  return(aux);
}




__kernel  
void mykernel(
            __global uchar4*  imageOut,
   __global uchar4*  imageIn
  )
{
  __local float4  locMem[(WINDOWSIZ_X/4)*WINDOWSIZ_Y];
  int wrkItmId   = get_local_id(1);


  __local float horLoc[TILESIZ_X*WINDOWSIZ_Y];
  __local float* horLocPtr;
  __local float* locMemFloat;


  horLocPtr = horLoc + wrkItmId*TILESIZ_X;
  locMemFloat = (__local float*) locMem + wrkItmId*WINDOWSIZ_X + BLCUTOFFR;


  for( int k = 0; k < TILESIZ_X; k ++ ){
  float pixIn   = locMemFloat[k];
  float pixOutTmp = 0.f;
  float normalize = 0.f;
  for( int i = -BLCUTOFFR; i <= BLCUTOFFR; i ++ ) {
  float pixD = locMemFloat[k+i];
  float myKerWeight = myfunct(i,pixD,pixIn);
  normalize += myKerWeight;
  pixOutTmp += pixD * myKerWeight;
  }
  horLocPtr[k] = native_divide(pixOutTmp, normalize);
  }


  barrier(CLK_LOCAL_MEM_FENCE);

  horLocPtr = horLoc + BLCUTOFFR * TILESIZ_X + wrkItmId;
  locMemFloat = (__local float*) locMem + wrkItmId;
  for( int k = 0; k < TILESIZ_Y; k ++ ){
  float pixIn   = horLocPtr[k*TILESIZ_X];
  float pixOutTmp = 0.f;
  float normalize = 0.f;
  for( int i = -BLCUTOFFR; i <= BLCUTOFFR; i ++ ) {
  float pixD = horLocPtr[(k+i)*TILESIZ_X];
  float myKerWeight = myfunct(i,pixD,pixIn);
  normalize += myKerWeight;
  pixOutTmp += pixD * myKerWeight;
  }
  locMemFloat[k*WINDOWSIZ_X] = native_divide(pixOutTmp, normalize);
  }

  barrier(CLK_LOCAL_MEM_FENCE);
  imageOut[0] = (uchar4)(0,1,2,3);
}


Outcomes