AnsweredAssumed Answered

Unhandled exception compiling OpenCL kernel unless optimization is disabled

Question asked by jsonntag on May 22, 2014
Latest reply on Jun 19, 2014 by pinform

The following code fails to compile on an HD 6970 (Cayman) with an unhandled exception unless I disable all optimization. It actually crashes CodeXL when building. Yet, it compiles OK and runs successfully on an nVidia GPU.  I'm hoping someone can point out the error or what needs to be changed so that it can be compiled with optimization enabled? Yes, I'd also like the optimization bug fixed so it will either provide the error or optimize correctly, but I realize it takes time to get fixes added to releases, so I'd appreciate a work around if there is one in the mean time.

 

__constant unsigned int LOOKAHEAD=20;

__constant unsigned int BITS=32;

__constant unsigned long maxStep = 0x1000000ul;

 

 

inline ulong2 mul128(const unsigned long a, const unsigned long b) {

  return (ulong2)(a*b,mul_hi(a,b));

}

 

 

__kernel void kernelSteps64(__global const uint *sieve, const unsigned long offset, const ulong2 start, __global ulong4 *steps, __global const uint4 *mosc) {

   const uint lookahead = LOOKAHEAD;

  const uint4 sc = (uint4)((1<<lookahead)-1,BITS-lookahead,(1<<lookahead)+1,0);

  const uint t_offset = get_global_id(sc.w);

  const unsigned long totalOffset = offset + sieve[t_offset];

  ulong2 carry,mul_r;

  uint4 lut;

  ulong2 icont;

  ulong4 stepsOut,val;

   val.x = start.x;

   val.y = start.y;

  icont.x = sc.w;

  val.x += totalOffset;

  carry.x = (val.x < totalOffset);

  val.y += carry.x;

  val.z = val.w = sc.w;

  icont.y = 1;

  while(icont.y)

  {

   lut = mosc[val.x & sc.x];

 

 

   mul_r = mul128((val.x >> lookahead) + (val.y << sc.y), (unsigned long)lut.x);

   val.x = mul_r.x + lut.y;

   carry.x = mul_r.y + (val.x < mul_r.x);

 

 

   mul_r = mul128((val.y >> lookahead) + (val.z << sc.y), (unsigned long)lut.x);

   val.y = mul_r.x + carry.x;

   carry.y = mul_r.y + (val.y < mul_r.x);

   mul_r = mul128((val.z >> lookahead) + (val.w << sc.y), (unsigned long)lut.x);

   val.z = mul_r.x + carry.y;

   carry.x = mul_r.y + (val.z < mul_r.x);

   mul_r = mul128((val.w >> lookahead), (unsigned long)lut.x);

   val.w = mul_r.x + carry.x;

   carry.y = mul_r.y + (val.w < mul_r.x);

   icont.x += lut.z;

   icont.y = ((val.x > (unsigned long)sc.z) | val.y | val.z | val.w | carry.y) && (icont.x<maxStep);

  }

  icont.x += (unsigned long)mosc[(val.x-2u) & sc.x].w;

  if(carry.y)

  icont.x = 0x1000000u;

  if (offset == 0)

   val = (ulong4)(0,0,0,0);

  else

   val = steps[t_offset];

  stepsOut.z = carry.x = val.z + icont.x;

  stepsOut.w = val.w + (carry.x < icont.x);

  if (icont.x > val.x)

  {

   stepsOut.x = icont.x;

   stepsOut.y = totalOffset;

  } else {

   stepsOut.x = val.x;

   stepsOut.y = val.y;

  }

  steps[t_offset] = stepsOut;

}

Outcomes