4 Replies Latest reply on Jun 3, 2011 10:54 PM by Dr.Haribo

    Bug report: 5 compiler bugs that ruined my day ;-)

    Dr.Haribo

      Test machine:

      OS: 64-bit Windows 7

      CPU: Intel Core 2 Duo E8400

      GPU 1: AMD Radeon 6990 (dual Cayman) with AMD_Catalyst_11.5a_Hotfix_8.85.6RC2_Win7_May13

      GPU 2: nVidia GeForce GTX 580 with 275.33


      // BUG 1: AMD FAIL, NVIDIA OK
      // Strangely, adding up some zeroes in a loop causes the entire function
      // to become a NOP and nothing is written to *output.
      // Tested on Cayman GPU (Radeon HD 6990): FAIL (output buffer not written)
      // Tested on Geforce GTX 580: OK (0x600D written to output buffer)
      __constant uint zeroes[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
      __kernel void bug1(__global uint *output)
      {
        uint empty[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
        *output = 0x600D; // this never happens on AMD
        uint x = 0;
        uint i = 0;
        while (i < 16) {
          x += empty + zeroes; i+=1;
          x += empty + zeroes; i+=1;
          x += empty + zeroes; i+=1;
          x += empty + zeroes; i+=1;
          x += empty + zeroes; i+=1;
          x += empty + zeroes; i+=1;
          x += empty + zeroes; i+=1;
          x += empty + zeroes; i+=1;
        }
        if (x == 42) // keep optimizer from removing everything
          *output = 0xBAD1;
      }

      // BUG 2A: AMD FAIL, NVIDIA FAIL
      // Comparison with an uninitialized variable is always true.
      // Please make "value used before set" an error, not a warning.
      // You don't notice warnings when calling clBuildProgram()
      // and what happens here makes no sense.
      __kernel void bug2a(__global uint *output)
      {
        uint undefined; // not initialized
        *output = 0x600D;
        if (undefined == 42) // always true using any literal value
          *output = 0xBAD2;
      }

      // BUG 2B: AMD FAIL, NVIDIA FAIL
      // Assignments based on uninitialized variables turn
      // the entire function into a NOP - it does nothing at all.
      // Please make "value used before set" an error, not a warning.
      // You don't notice warnings when calling clBuildProgram()
      // and what happens here makes no sense.
      __kernel void bug2b(__global uint *output)
      {
        uint test = 0;
        *output = 0xBAD3; // strangely, even this assignment disappears
        for (uint x; x < 1000; x++) // x not initialized - oops!
          test += x;
        *output = test; // this assignment disappears
      }

      // BUG 3: AMD FAIL, NVIDIA OK
      // Trying to unroll a loop where the loop variable is modified
      // inside the loop crashes the compiler.
      __kernel void crash1()
      {
      #pragma unroll // AMD: comment out line to avoid compiler crash
        for (uint i = 0; i < 10; i++)
          i++;
      }

      // BUG 4: AMD FAIL, NVIDIA OK
      // AMD: Reading far outside array bounds causes the compiler to crash
      //      if also a comparison is done on the value that was read.
      __kernel void crash2(__global uint *output)
      {
        uint tiny[1] = { 0 };
        uint x = 0;
        *output = 0x600D;
        for (uint i = 0; i < 25; i++) // AMD: 24 = OK, 25 = CRASH
          x += tiny;
        if (x == 42) // AMD: comment out line to avoid compiler crash
          *output = x; // this line by itself won't cause a crash
      }

      // BUG 5: AMD FAIL, NVIDIA FAIL
      // Putting too much code inside a loop crashes the compiler.
      // Also it can get the compiler into an infinite loop or cause
      // a situation where the compiler returns, but every call to OpenCL
      // functions after that results in CL_INVALID_COMMAND_QUEUE or
      // CL_OUT_OF_RESOURCES. I was not able to reproduce the 2nd and 3rd
      // cases when writing example code, but this code causes
      // the 1st case (crash). Behavior seems the same on AMD and NVIDIA.
      __kernel void compilercrash(__global uint *output)
      {
        uint i1 = -1, i2 = 0, n = get_global_id(0), sum = 0;

        for (uint loop = 0; loop < 1; loop++) {
          // repeat the "i1 = .." line 2000 times
          // not done here to keep example short
          i1 = i1 ^ i2; i2 += i1; n++; // repeat me
          sum += i1 + n;
        }
       
        *output = sum;
      }