0 Replies Latest reply on Aug 18, 2017 3:46 AM by joej

    Vulkan compiler bug since 17.7.2

    joej

      A bug shows off with recent driver on FuryX, Win10-64 (maybe some versions before that, but it surely was not there with 17.5.1).

      Here is example prefix sum code for thread group size of 256, but size of 64 is also broken.

       

      It reminds me to an older OpenCL bug that has been fixed OpenCL Driver Bug FuryX 32bit ,

      but unlike the chaotic behaviour from that this time the reults are consistent and more predictable.

       

      Test files have already been sent to co compiler team by dwitczak.

       

       

       

      #version 450

      #define WG_WIDTH 256

       

      layout (local_size_x = WG_WIDTH) in;

       

      layout (std430, binding = 0) buffer bTEST    { float    _G_test[]; };

       

      void main ()

      {

          uint lID = gl_LocalInvocationID.x;

          uint index = lID;

         

          _lds[lID] = 1;

          memoryBarrierShared(); barrier();

       

      #if 1 // wrong result: (1...128), (1...128) instead (1...256)

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 0) << 1) | (lID &   0) |   1) ]    += _lds[(((lID >> 0) << 1) |   0) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 1) << 2) | (lID &   1) |   2) ]    += _lds[(((lID >> 1) << 2) |   1) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 2) << 3) | (lID &   3) |   4) ]    += _lds[(((lID >> 2) << 3) |   3) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 3) << 4) | (lID &   7) |   8) ]    += _lds[(((lID >> 3) << 4) |   7) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 4) << 5) | (lID &  15) |  16) ]    += _lds[(((lID >> 4) << 5) |  15) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 5) << 6) | (lID &  31) |  32) ]    += _lds[(((lID >> 5) << 6) |  31) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 6) << 7) | (lID &  63) |  64) ]    += _lds[(((lID >> 6) << 7) |  63) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 7) << 8) | (lID & 127) | 128) ]    += _lds[(((lID >> 7) << 8) | 127) ];    memoryBarrierShared(); barrier();

      #else // wrong result: (1...64), (1...64), (1...32), (1...32), (1...32), (1...32)

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 0) << 1) | (lID &   0) |   1) ]    += _lds[(((lID >> 0) << 1) |   0) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 1) << 2) | (lID &   1) |   2) ]    += _lds[(((lID >> 1) << 2) |   1) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 2) << 3) | (lID &   3) |   4) ]    += _lds[(((lID >> 2) << 3) |   3) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 3) << 4) | (lID &   7) |   8) ]    += _lds[(((lID >> 3) << 4) |   7) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 4) << 5) | (lID &  15) |  16) ]    += _lds[(((lID >> 4) << 5) |  15) ];    memoryBarrierShared(); barrier();

          if (lID<(WG_WIDTH>>1)) _lds[(((lID >> 5) << 6) | (lID &  31) |  32) ]    += _lds[(((lID >> 5) << 6) |  31) ];    memoryBarrierShared(); barrier();

      #endif

       

          _G_test[index] = float(_lds[lID]);

      }