Dr.Haribo

Bug report: 5 compiler bugs that ruined my day ;-)

Discussion created by Dr.Haribo on Jun 3, 2011
Latest reply on Jun 3, 2011 by Dr.Haribo

Test machine:

OS: 64-bit Windows 7

CPU: Intel Core 2 Duo E8400

GPU 1: AMD Radeon 6990 (dual Cayman) with AMD_Catalyst_11.5a_Hotfix_8.85.6RC2_Win7_May13

GPU 2: nVidia GeForce GTX 580 with 275.33


// BUG 1: AMD FAIL, NVIDIA OK
// Strangely, adding up some zeroes in a loop causes the entire function
// to become a NOP and nothing is written to *output.
// Tested on Cayman GPU (Radeon HD 6990): FAIL (output buffer not written)
// Tested on Geforce GTX 580: OK (0x600D written to output buffer)
__constant uint zeroes[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
__kernel void bug1(__global uint *output)
{
  uint empty[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
  *output = 0x600D; // this never happens on AMD
  uint x = 0;
  uint i = 0;
  while (i < 16) {
    x += empty + zeroes; i+=1;
    x += empty + zeroes; i+=1;
    x += empty + zeroes; i+=1;
    x += empty + zeroes; i+=1;
    x += empty + zeroes; i+=1;
    x += empty + zeroes; i+=1;
    x += empty + zeroes; i+=1;
    x += empty + zeroes; i+=1;
  }
  if (x == 42) // keep optimizer from removing everything
    *output = 0xBAD1;
}

// BUG 2A: AMD FAIL, NVIDIA FAIL
// Comparison with an uninitialized variable is always true.
// Please make "value used before set" an error, not a warning.
// You don't notice warnings when calling clBuildProgram()
// and what happens here makes no sense.
__kernel void bug2a(__global uint *output)
{
  uint undefined; // not initialized
  *output = 0x600D;
  if (undefined == 42) // always true using any literal value
    *output = 0xBAD2;
}

// BUG 2B: AMD FAIL, NVIDIA FAIL
// Assignments based on uninitialized variables turn
// the entire function into a NOP - it does nothing at all.
// Please make "value used before set" an error, not a warning.
// You don't notice warnings when calling clBuildProgram()
// and what happens here makes no sense.
__kernel void bug2b(__global uint *output)
{
  uint test = 0;
  *output = 0xBAD3; // strangely, even this assignment disappears
  for (uint x; x < 1000; x++) // x not initialized - oops!
    test += x;
  *output = test; // this assignment disappears
}

// BUG 3: AMD FAIL, NVIDIA OK
// Trying to unroll a loop where the loop variable is modified
// inside the loop crashes the compiler.
__kernel void crash1()
{
#pragma unroll // AMD: comment out line to avoid compiler crash
  for (uint i = 0; i < 10; i++)
    i++;
}

// BUG 4: AMD FAIL, NVIDIA OK
// AMD: Reading far outside array bounds causes the compiler to crash
//      if also a comparison is done on the value that was read.
__kernel void crash2(__global uint *output)
{
  uint tiny[1] = { 0 };
  uint x = 0;
  *output = 0x600D;
  for (uint i = 0; i < 25; i++) // AMD: 24 = OK, 25 = CRASH
    x += tiny;
  if (x == 42) // AMD: comment out line to avoid compiler crash
    *output = x; // this line by itself won't cause a crash
}

// BUG 5: AMD FAIL, NVIDIA FAIL
// Putting too much code inside a loop crashes the compiler.
// Also it can get the compiler into an infinite loop or cause
// a situation where the compiler returns, but every call to OpenCL
// functions after that results in CL_INVALID_COMMAND_QUEUE or
// CL_OUT_OF_RESOURCES. I was not able to reproduce the 2nd and 3rd
// cases when writing example code, but this code causes
// the 1st case (crash). Behavior seems the same on AMD and NVIDIA.
__kernel void compilercrash(__global uint *output)
{
  uint i1 = -1, i2 = 0, n = get_global_id(0), sum = 0;

  for (uint loop = 0; loop < 1; loop++) {
    // repeat the "i1 = .." line 2000 times
    // not done here to keep example short
    i1 = i1 ^ i2; i2 += i1; n++; // repeat me
    sum += i1 + n;
  }
 
  *output = sum;
}

Outcomes