cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

antzrhere
Adept III

kernel crashes compiler and causes compiler to spew out text on my debug console

Hi.

This kernel below crashes the compiler.

Not only only that but during the process it causes some sort of internal compiler debug text to be printed on my own program console window (!!) hundreds of lines of myserious internal debug code...not sure how...

The obvious cause of the error is the lines "BSV0 &= InsideMask"   and    "BSV1 &= InsideMask;"  (which I know are incorrect)...the compiler doesn't crash with these removed.

The program crashes after printing this text, so I can't copy all of it, but it's along the lines of:

"...

0x6758540: v2f32,ch,glue = CopyFromReg 0x6757300, 0x675adc0, 0x6757308:1 [ ORD=1877] [ID=290]

0x675aba0: i32 = TargetConstant<1> [ORD=1761] [ID=38]

....

...."

Windows 7 64bit, ATI 5870, SDK 2.6, Catalyst 12.2 preview

Here's the code

#define InfinityCorrectfloat3(val) as_float3(as_uint3(val) - (((as_uint3(val) & 0x7F800000) + 1)>>31))
#define BACKGROUND_FILL_COLOUR      (float4)(1.0f,1.0f,1.0f,0.0f)
#define FLOAT_MAX_POS               3.402823466e+38f
#define FLOAT_MAX_NEG               -3.402823466e+38f

inline float3 safe_recip_float3(const float3 Val)
{
   float3 valinv = isequal(fabs(Val),0.0f) ? FLOAT_MAX_POS : 1.0f / Val;
   return InfinityCorrectfloat3(valinv);
};   

__kernel void  Test(__constant float* TraceParamsD, __global const int* TraceParamsI,__global const uint *ATree,__write_only image2d_t ScreenBuffer)
{
  float4 VertX,VertY,VertZ,OVertX,OVertY,OVertZ;
  float3 RayPosition,RayDirection,RayDirectionInv,VWeight,ClosestVWeight;
  int2 PixelPos;
  float xangle,yangle,val,val2,xzlen,sinX,cosX,sinY,cosY,IntersectZ,ClosestZ,PixelWidth;
  float BSV0,BSV1,MAX_TRACE_DISTANCE;
  int TriID,ClosestID,NodeSelect,NodesToCheck,NodeDepth;
  uint NodeOffsetStack[32],NodeSwitchSelect,NodeCompletionStack,NodeOffset,VDATAOFFSET,IDATAOFFSET,InBoundsMask,ActiveRayMask;
  float3 T1B,T2B,T1,T2,BoxMin,BoxMax;
  float TNEAR,TFAR;
  int3 OutsideTest;
  uint InsideMask;
  int3 RayDirectionParallelTest;

  PixelPos.x = TraceParamsI[32+get_global_id(0)];
  PixelPos = (int2)(PixelPos.x&65535,PixelPos.x>>16);
  PixelWidth = TraceParamsD[0];
 

  val = TraceParamsD[1] * PixelPos.y;
  RayDirection = (float3)(TraceParamsD[5],TraceParamsD[6],TraceParamsD[7]) + (float3)(TraceParamsD[8],TraceParamsD[9],TraceParamsD[10])  * val;
  RayDirection+= (((float3)(TraceParamsD[11],TraceParamsD[12],TraceParamsD[13]) + (float3)(TraceParamsD[14],TraceParamsD[15],TraceParamsD[16]) * val) - RayDirection) *(PixelWidth * PixelPos.x);
  RayDirection = normalize(RayDirection);
  RayPosition = (float3)(TraceParamsD[17],TraceParamsD[18],TraceParamsD[19])+ RayDirection;
  RayDirectionInv = -safe_recip_float3(RayDirection);  
  RayDirectionParallelTest = isequal(fabs(RayDirection), 0.0f);
 
  xzlen = length((float2)(RayDirection.s0,RayDirection.s2)); 
  xangle = acos(xzlen) * as_float((as_uint(RayDirection.s1)&0x80000000) | 0x3F800000);
  val = RayDirection.s2 / ((xzlen<=0.0f) ? 0.0000000001f : xzlen);
  yangle = -acos(fmin(1.0f, fmax(-1.0f,val)))  * as_float((as_uint(RayDirection.s0)&0x80000000) | 0x3F800000);
  sinX = sincos(xangle,&cosX);
  sinY = sincos(yangle,&cosY);


  MAX_TRACE_DISTANCE =10000000.0f;
  NodeDepth = 0;
  NodeOffset = 0;
  NodeOffsetStack[0] = 0;
  NodeCompletionStack = 0;
  NodeSwitchSelect = 0;
  ClosestID=-1;
  InBoundsMask=0xFFFFFFFF;
  ActiveRayMask = 0xFFFFFFFF;


  while(ActiveRayMask)
  {
         
          
          while((ATree[NodeOffset+3] & InBoundsMask) != 0)
          {
                   BoxMin = *(__global const float3*)(&ATree[NodeOffset]);
                    BoxMax = *(__global const float3*)(&ATree[NodeOffset+4]);
                    OutsideTest = signbit(RayPosition-BoxMin) |  signbit(BoxMax-RayPosition);
                    InsideMask   = as_uint(OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2);
                    OutsideTest &= RayDirectionParallelTest;
                    T1 = (RayPosition -  BoxMin )  * RayDirectionInv;
                    T1 = InfinityCorrectfloat3(T1);
                    T2 = (RayPosition -   BoxMax ) * RayDirectionInv;
                    T2 = InfinityCorrectfloat3(T2);
                    T1B = fmin(T1,T2);
                    T2B = fmax(T1,T2);
                    TNEAR = fmax(T1B.x, fmax(T1B.y, fmax(T1B.z, FLOAT_MAX_NEG) ));
                    TFAR  = fmin(T2B.x, fmin(T2B.y, fmin(T2B.z, FLOAT_MAX_POS) ));
                    BSV0 =  (OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2 | isgreater(TNEAR,TFAR) | isless(TNEAR,0.0f))  == 0 ? TNEAR : FLOAT_MAX_POS;    
                    BSV0 &= InsideMask; //bad code
                   
                    BoxMin = *(__global const float3*)(&ATree[NodeOffset+8]);
                    BoxMax = *(__global const float3*)(&ATree[NodeOffset+12]);
                    OutsideTest = signbit(RayPosition-BoxMin) |  signbit(BoxMax-RayPosition);
                    InsideMask   = as_uint(OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2);
                    OutsideTest &= RayDirectionParallelTest;
                    T1 = (RayPosition -  BoxMin )  * RayDirectionInv;
                    T1 = InfinityCorrectfloat3(T1);
                    T2 = (RayPosition -   BoxMax ) * RayDirectionInv;
                    T2 = InfinityCorrectfloat3(T2);
                    T1B = fmin(T1,T2);
                    T2B = fmax(T1,T2);
                    TNEAR = fmax(T1B.x, fmax(T1B.y, fmax(T1B.z, FLOAT_MAX_NEG) ));
                    TFAR  = fmin(T2B.x, fmin(T2B.y, fmin(T2B.z, FLOAT_MAX_POS) ));
                    BSV1 =  (OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2 | isgreater(TNEAR,TFAR) | isless(TNEAR,0.0f))  == 0 ? TNEAR : FLOAT_MAX_POS;    
                    BSV1 &= InsideMask; //bad code
                   
                    NodesToCheck  = isless(BSV0 , FLOAT_MAX_POS) + isless(BSV1 , FLOAT_MAX_POS);
                    NodeSelect    = isless(BSV1,BSV0);
                   
                   NodeCompletionStack  |= as_uint(max(NodesToCheck-1, 0)) << NodeDepth;

                    if(NodesToCheck == 0)
                    {         
                          NodeDepth = 31 - clz(NodeCompletionStack);
                          InBoundsMask = 0xFFFFFFFF + (as_uint(NodeDepth)>>31);
                          NodeCompletionStack &= ((1u<<NodeDepth)-1);
                          NodeSelect = ((NodeSwitchSelect >> NodeDepth) &1) ^ 1;                            
                    }
        
                   NodeSwitchSelect = (NodeSwitchSelect & ((1u<<NodeDepth)-1)) | (NodeSelect << NodeDepth);        
                    NodeOffset = ATree[NodeOffsetStack[as_uint(NodeDepth)&InBoundsMask] + 3 + (NodeSelect<<2) ];
                    NodeDepth++;
                    NodeOffsetStack[NodeDepth] = NodeOffset;       
          }
         
         
          BoxMin = *(__global const float3*)(&ATree[NodeOffset]);
          BoxMax = *(__global const float3*)(&ATree[NodeOffset+4]);
          T1 = (RayPosition -  BoxMin )  * RayDirectionInv;
          T1 = InfinityCorrectfloat3(T1);
          T2 = (RayPosition -   BoxMax ) * RayDirectionInv;
          T2 = InfinityCorrectfloat3(T2);        
          T1B = fmin(T1,T2);
          T2B = fmax(T1,T2);
          TNEAR = fmax(T1B.x, fmax(T1B.y, T1B.z));
          TFAR  = fmin(T2B.x, fmin(T2B.y, T2B.z )) * 1.000001f;


          if((isless(TNEAR,MAX_TRACE_DISTANCE) & InBoundsMask) == 0)break; 
          ClosestZ = fmin(MAX_TRACE_DISTANCE,TFAR);
          IDATAOFFSET  = ATree[NodeOffset+15];

          for(TriID = ATree[NodeOffset+11]-1; TriID >= 0; TriID--)
          {
                    VDATAOFFSET = ATree[IDATAOFFSET + TriID];                
                    OVertX = *((__global float4*)&ATree[VDATAOFFSET    ]) - RayPosition.s0;
                 OVertY = *((__global float4*)&ATree[VDATAOFFSET +4 ]) - RayPosition.s1;
                 OVertZ = *((__global float4*)&ATree[VDATAOFFSET +8 ]) - RayPosition.s2;

                 VertX = mad(OVertZ,sinY,OVertX*cosY);
                 VertZ = mad(OVertZ,cosY,-(OVertX*sinY));
                 VertY = mad(OVertY,cosX,-(VertZ*sinX));
                 VertZ = mad(OVertY,sinX,VertZ*cosX);
   
                 val2 = (VertX.s1 * VertY.s2) - (VertX.s2 * VertY.s1);
                    val = (VertX.s0 * VertY.s1) - (VertX.s0 * VertY.s2) - (VertX.s1 * VertY.s0)  + (VertX.s2 * VertY.s0) + val2;
                 VWeight  =  cross((float3)(VertX.s0,VertX.s1,VertX.s2),(float3)(VertY.s0,VertY.s1,VertY.s2)) * native_recip(val);

                 IntersectZ = dot((float3)(VertZ.s0,VertZ.s1,VertZ.s2),VWeight);
                    if( (isequal(val,0.0f) | any(as_int3(VWeight * (1.0f - VWeight))) | islessequal(ClosestZ,IntersectZ) | isless(IntersectZ,1.0f)) == 0)
              {
                           ClosestZ=IntersectZ;
                        ClosestID = TriID;
                   ClosestVWeight = VWeight;
                    } 

                 val = (VertX.s3 * VertY.s1) - (VertX.s3 * VertY.s2) - (VertX.s1 * VertY.s3) + (VertX.s2 * VertY.s3) + val2;
                    VWeight  =  cross((float3)(VertX.s3,VertX.s1,VertX.s2),(float3)(VertY.s3,VertY.s1,VertY.s2)) * native_recip(val);

                    IntersectZ = dot((float3)(VertZ.s3,VertZ.s1,VertZ.s2),VWeight);
                    if( (isequal(val,0.0f) | any(as_int3(VWeight * (1.0f - VWeight))) | islessequal(ClosestZ,IntersectZ) | isless(IntersectZ,1.0f)) == 0)
              {
                           ClosestZ=IntersectZ;
                        ClosestID = TriID | 0x40000000;
                   ClosestVWeight = VWeight;
                    }
                   
          }

  
           NodeDepth = 31 - as_int(clz(NodeCompletionStack));
           if(ClosestID!=-1 || NodeDepth<0)ActiveRayMask=0;     
           NodeCompletionStack &= ((1<<NodeDepth)-1);
           NodeSelect = ((NodeSwitchSelect >> NodeDepth) &1)  ^ 1;     
           NodeOffset = ATree[NodeOffsetStack[as_uint(NodeDepth)&InBoundsMask] + 3 + (NodeSelect<<2) ];
           NodeDepth++;
           NodeOffsetStack[NodeDepth] = NodeOffset;
         
  }
 
  float4 PixelColour = BACKGROUND_FILL_COLOUR;
  if(ClosestID>=0)PixelColour = (float4)(ClosestVWeight,0.0f)*(float4)(1.0f,1.0f,1.0f,0.0f);
  write_imagef(ScreenBuffer,PixelPos,PixelColour);


}

0 Likes
1 Reply

Thanks for reporting this issue. I've been able to reproduce it and have reported it to the correct person to investigate. The problem seems to stem from operation.

BSV0 &= InsideMask; <--

This is making it past our semantic checker and should not as BSV0 is a floating point operation and thus a binary operand is not legal. Please switch it to  BSV0 = as_float(as_int(BSV0) & InsideMask); as a work-around until we release a fix for the crash.

0 Likes