#define InfinityCorrectfloat3(val) as_float3(as_uint3(val) - (((as_uint3(val) & 0x7F800000) + 1)>>31)) #define BACKGROUND_FILL_COLOUR (float4)(1.0f,1.0f,1.0f,0.0f) #define FLOAT_MAX_POS 3.402823466e+38f #define FLOAT_MAX_NEG -3.402823466e+38f inline float3 safe_recip_float3(const float3 Val) { float3 valinv = isequal(fabs(Val),0.0f) ? FLOAT_MAX_POS : 1.0f / Val; return InfinityCorrectfloat3(valinv); }; __kernel void Test(__constant float* TraceParamsD, __global const int* TraceParamsI,__global const uint *ATree,__write_only image2d_t ScreenBuffer) { float4 VertX,VertY,VertZ,OVertX,OVertY,OVertZ; float3 RayPosition,RayDirection,RayDirectionInv,VWeight,ClosestVWeight; int2 PixelPos; float xangle,yangle,val,val2,xzlen,sinX,cosX,sinY,cosY,IntersectZ,ClosestZ,PixelWidth; float BSV0,BSV1,MAX_TRACE_DISTANCE; int TriID,ClosestID,NodeSelect,NodesToCheck,NodeDepth; uint NodeOffsetStack[32],NodeSwitchSelect,NodeCompletionStack,NodeOffset,VDATAOFFSET,IDATAOFFSET,InBoundsMask,ActiveRayMask; float3 T1B,T2B,T1,T2,BoxMin,BoxMax; float TNEAR,TFAR; int3 OutsideTest; uint InsideMask; int3 RayDirectionParallelTest; PixelPos.x = TraceParamsI[32+get_global_id(0)]; PixelPos = (int2)(PixelPos.x&65535,PixelPos.x>>16); PixelWidth = TraceParamsD[0]; val = TraceParamsD[1] * PixelPos.y; RayDirection = (float3)(TraceParamsD[5],TraceParamsD[6],TraceParamsD[7]) + (float3)(TraceParamsD[8],TraceParamsD[9],TraceParamsD[10]) * val; RayDirection+= (((float3)(TraceParamsD[11],TraceParamsD[12],TraceParamsD[13]) + (float3)(TraceParamsD[14],TraceParamsD[15],TraceParamsD[16]) * val) - RayDirection) *(PixelWidth * PixelPos.x); RayDirection = normalize(RayDirection); RayPosition = (float3)(TraceParamsD[17],TraceParamsD[18],TraceParamsD[19])+ RayDirection; RayDirectionInv = -safe_recip_float3(RayDirection); RayDirectionParallelTest = isequal(fabs(RayDirection), 0.0f); xzlen = length((float2)(RayDirection.s0,RayDirection.s2)); xangle = acos(xzlen) * as_float((as_uint(RayDirection.s1)&0x80000000) | 0x3F800000); val = RayDirection.s2 / ((xzlen<=0.0f) ? 0.0000000001f : xzlen); yangle = -acos(fmin(1.0f, fmax(-1.0f,val))) * as_float((as_uint(RayDirection.s0)&0x80000000) | 0x3F800000); sinX = sincos(xangle,&cosX); sinY = sincos(yangle,&cosY); MAX_TRACE_DISTANCE =10000000.0f; NodeDepth = 0; NodeOffset = 0; NodeOffsetStack[0] = 0; NodeCompletionStack = 0; NodeSwitchSelect = 0; ClosestID=-1; InBoundsMask=0xFFFFFFFF; ActiveRayMask = 0xFFFFFFFF;
while(ActiveRayMask) { while((ATree[NodeOffset+3] & InBoundsMask) != 0) { BoxMin = *(__global const float3*)(&ATree[NodeOffset]); BoxMax = *(__global const float3*)(&ATree[NodeOffset+4]); OutsideTest = signbit(RayPosition-BoxMin) | signbit(BoxMax-RayPosition); InsideMask = as_uint(OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2); OutsideTest &= RayDirectionParallelTest; T1 = (RayPosition - BoxMin ) * RayDirectionInv; T1 = InfinityCorrectfloat3(T1); T2 = (RayPosition - BoxMax ) * RayDirectionInv; T2 = InfinityCorrectfloat3(T2); T1B = fmin(T1,T2); T2B = fmax(T1,T2); TNEAR = fmax(T1B.x, fmax(T1B.y, fmax(T1B.z, FLOAT_MAX_NEG) )); TFAR = fmin(T2B.x, fmin(T2B.y, fmin(T2B.z, FLOAT_MAX_POS) )); BSV0 = (OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2 | isgreater(TNEAR,TFAR) | isless(TNEAR,0.0f)) == 0 ? TNEAR : FLOAT_MAX_POS; BSV0 &= InsideMask; //bad code BoxMin = *(__global const float3*)(&ATree[NodeOffset+8]); BoxMax = *(__global const float3*)(&ATree[NodeOffset+12]); OutsideTest = signbit(RayPosition-BoxMin) | signbit(BoxMax-RayPosition); InsideMask = as_uint(OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2); OutsideTest &= RayDirectionParallelTest; T1 = (RayPosition - BoxMin ) * RayDirectionInv; T1 = InfinityCorrectfloat3(T1); T2 = (RayPosition - BoxMax ) * RayDirectionInv; T2 = InfinityCorrectfloat3(T2); T1B = fmin(T1,T2); T2B = fmax(T1,T2); TNEAR = fmax(T1B.x, fmax(T1B.y, fmax(T1B.z, FLOAT_MAX_NEG) )); TFAR = fmin(T2B.x, fmin(T2B.y, fmin(T2B.z, FLOAT_MAX_POS) )); BSV1 = (OutsideTest.s0 | OutsideTest.s1 | OutsideTest.s2 | isgreater(TNEAR,TFAR) | isless(TNEAR,0.0f)) == 0 ? TNEAR : FLOAT_MAX_POS; BSV1 &= InsideMask; //bad code NodesToCheck = isless(BSV0 , FLOAT_MAX_POS) + isless(BSV1 , FLOAT_MAX_POS); NodeSelect = isless(BSV1,BSV0); NodeCompletionStack |= as_uint(max(NodesToCheck-1, 0)) << NodeDepth;
if(NodesToCheck == 0) { NodeDepth = 31 - clz(NodeCompletionStack); InBoundsMask = 0xFFFFFFFF + (as_uint(NodeDepth)>>31); NodeCompletionStack &= ((1u<<NodeDepth)-1); NodeSelect = ((NodeSwitchSelect >> NodeDepth) &1) ^ 1; } NodeSwitchSelect = (NodeSwitchSelect & ((1u<<NodeDepth)-1)) | (NodeSelect << NodeDepth); NodeOffset = ATree[NodeOffsetStack[as_uint(NodeDepth)&InBoundsMask] + 3 + (NodeSelect<<2) ]; NodeDepth++; NodeOffsetStack[NodeDepth] = NodeOffset; } BoxMin = *(__global const float3*)(&ATree[NodeOffset]); BoxMax = *(__global const float3*)(&ATree[NodeOffset+4]); T1 = (RayPosition - BoxMin ) * RayDirectionInv; T1 = InfinityCorrectfloat3(T1); T2 = (RayPosition - BoxMax ) * RayDirectionInv; T2 = InfinityCorrectfloat3(T2); T1B = fmin(T1,T2); T2B = fmax(T1,T2); TNEAR = fmax(T1B.x, fmax(T1B.y, T1B.z)); TFAR = fmin(T2B.x, fmin(T2B.y, T2B.z )) * 1.000001f; if((isless(TNEAR,MAX_TRACE_DISTANCE) & InBoundsMask) == 0)break; ClosestZ = fmin(MAX_TRACE_DISTANCE,TFAR); IDATAOFFSET = ATree[NodeOffset+15];
for(TriID = ATree[NodeOffset+11]-1; TriID >= 0; TriID--) { VDATAOFFSET = ATree[IDATAOFFSET + TriID]; OVertX = *((__global float4*)&ATree[VDATAOFFSET ]) - RayPosition.s0; OVertY = *((__global float4*)&ATree[VDATAOFFSET +4 ]) - RayPosition.s1; OVertZ = *((__global float4*)&ATree[VDATAOFFSET +8 ]) - RayPosition.s2; VertX = mad(OVertZ,sinY,OVertX*cosY); VertZ = mad(OVertZ,cosY,-(OVertX*sinY)); VertY = mad(OVertY,cosX,-(VertZ*sinX)); VertZ = mad(OVertY,sinX,VertZ*cosX); val2 = (VertX.s1 * VertY.s2) - (VertX.s2 * VertY.s1); val = (VertX.s0 * VertY.s1) - (VertX.s0 * VertY.s2) - (VertX.s1 * VertY.s0) + (VertX.s2 * VertY.s0) + val2; VWeight = cross((float3)(VertX.s0,VertX.s1,VertX.s2),(float3)(VertY.s0,VertY.s1,VertY.s2)) * native_recip(val); IntersectZ = dot((float3)(VertZ.s0,VertZ.s1,VertZ.s2),VWeight); if( (isequal(val,0.0f) | any(as_int3(VWeight * (1.0f - VWeight))) | islessequal(ClosestZ,IntersectZ) | isless(IntersectZ,1.0f)) == 0) { ClosestZ=IntersectZ; ClosestID = TriID; ClosestVWeight = VWeight; } val = (VertX.s3 * VertY.s1) - (VertX.s3 * VertY.s2) - (VertX.s1 * VertY.s3) + (VertX.s2 * VertY.s3) + val2; VWeight = cross((float3)(VertX.s3,VertX.s1,VertX.s2),(float3)(VertY.s3,VertY.s1,VertY.s2)) * native_recip(val); IntersectZ = dot((float3)(VertZ.s3,VertZ.s1,VertZ.s2),VWeight); if( (isequal(val,0.0f) | any(as_int3(VWeight * (1.0f - VWeight))) | islessequal(ClosestZ,IntersectZ) | isless(IntersectZ,1.0f)) == 0) { ClosestZ=IntersectZ; ClosestID = TriID | 0x40000000; ClosestVWeight = VWeight; } } NodeDepth = 31 - as_int(clz(NodeCompletionStack)); if(ClosestID!=-1 || NodeDepth<0)ActiveRayMask=0; NodeCompletionStack &= ((1<<NodeDepth)-1); NodeSelect = ((NodeSwitchSelect >> NodeDepth) &1) ^ 1; NodeOffset = ATree[NodeOffsetStack[as_uint(NodeDepth)&InBoundsMask] + 3 + (NodeSelect<<2) ]; NodeDepth++; NodeOffsetStack[NodeDepth] = NodeOffset; } float4 PixelColour = BACKGROUND_FILL_COLOUR; if(ClosestID>=0)PixelColour = (float4)(ClosestVWeight,0.0f)*(float4)(1.0f,1.0f,1.0f,0.0f); write_imagef(ScreenBuffer,PixelPos,PixelColour); }
|