cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

spectral
Adept II

Recursion problem... how to traverse a tree ?

Hi,

I contact you because I have a Tree hierarchy of objects and need to apply a "complex" processing to some levels of this tree. I traverse it and depending of the node... I apply some processing.

All my code is ready, except that I have some recursion that I can't solve...

The problem is with the method "Accelerator_BVH_TreeTraversal".

Can you help me ?

Here is my OpenCL code:

 

typedef struct { float4 Origin; float4 Direction; float Maximum; int IsShadowRay; int pad1; int pad2; } Ray; typedef struct { // Collection position and length int Instances_Index; int Instances_Length; int pad1; int pad2; } InstanceList; typedef struct { // Collection position and length int Transforms_Index; int Transforms_Length; int pad1; int pad2; } MovingMatrix4; typedef struct { int Type; int InstanceList_Index; int TriangleMesh_Index; int pad1; } PrimitivesList; typedef struct { // Accelerator int AcceleratorBVH_Index; int pad1; int pad2; int pad3; } Geometry; typedef struct { MovingMatrix4 o2w; MovingMatrix4 w2o; Geometry Geometry; } Instance; typedef struct { int Points_Index; int Points_Length; int Indices_Index; int Indices_Length; } TriangleMesh; typedef struct { float V01; float V02; float V03; float V04; float V05; float V06; float V07; float V08; float V09; float V10; float V11; float V12; float V13; float V14; float V15; float V16; } Matrix4; typedef struct { int PrimitivesList_Index; int PrimitivesList_PrimitivesCount; int Nodes_Index; int Nodes_Length; int PrimitivesIds_Index; int PrimitivesIds_Length; int pad1; int pad2; } Accelerator_BVH; typedef struct { /// <summary> /// The bounding box's coordinates /// </summary> float MinX, MinY, MinZ, MaxX, MaxY, MaxZ; } AABB; typedef struct { float Maximum; int PrimitiveId; float U; float V; } IntersectionResult; typedef struct { /// <summary> /// The bounding box's coordinates /// </summary> AABB BBox; /// <summary> /// The index of the sibbling node (next node at the same level). /// </summary> int SkipNodeIndex; /// <summary> /// The primitive Id. /// </summary> /// <remarks>If it is a set (Count > -1) it is the first index in the primitives' ids list (_primitivesIds)</remarks> int PrimitiveId; /// <summary> /// The number of primitives, used when it is a set. /// </summary> ushort Count; ushort pad1; int pad2; int pad3; int pad4; } BoundingVolume; typedef struct MITData { float4 InverseDirection; int IsXNegative; int IsYNegative; int IsZNegative; } MITData; typedef struct { __global Ray * Rays; __global IntersectionResult * Results; __global InstanceList * InstancesList; __global PrimitivesList * PrimitivesList; __global Instance * Instances; __global Matrix4 * Matrix4; __global TriangleMesh * TriangleMeshes; __global float * Points; __global int * Indices; __global Accelerator_BVH * AcceleratorBVHs; __global BoundingVolume * AcceleratorBVH_Nodes; __global int * AcceleratorBVH_PrimitivesIds; } GlobalBuffer; GlobalBuffer GB; //-------------------------------------------------------------------------------- // Intersects_BoxRay //-------------------------------------------------------------------------------- int Intersects_BoxRay(struct MITData * mitData, float4 origin, float4 direction, global AABB* aabb, float * minHit, float * maxHit) { float tmin, tmax, tymin, tymax, tzmin, tzmax; if (mitData->IsXNegative) { tmin = (aabb->MaxX - origin.x) * mitData->InverseDirection.x; tmax = (aabb->MinX - origin.x) * mitData->InverseDirection.x; } else { tmin = (aabb->MinX - origin.x) * mitData->InverseDirection.x; tmax = (aabb->MaxX - origin.x) * mitData->InverseDirection.x; } if (mitData->IsYNegative) { tymin = (aabb->MaxY - origin.y) * mitData->InverseDirection.y; tymax = (aabb->MinY - origin.y) * mitData->InverseDirection.y; } else { tymin = (aabb->MinY - origin.y) * mitData->InverseDirection.y; tymax = (aabb->MaxY - origin.y) * mitData->InverseDirection.y; } if (tmin > tymax || tymin > tmax) { minHit[0] = MAXFLOAT; maxHit[0] = MAXFLOAT; return 0; } if (tymin > tmin) tmin = tymin; if (tymax < tmax) tmax = tymax; if (mitData->IsZNegative) { tzmin = (aabb->MaxZ - origin.z) * mitData->InverseDirection.z; tzmax = (aabb->MinZ - origin.z) * mitData->InverseDirection.z; } else { tzmin = (aabb->MinZ - origin.z) * mitData->InverseDirection.z; tzmax = (aabb->MaxZ - origin.z) * mitData->InverseDirection.z; } if (tmin > tzmax || tzmin > tmax) { minHit[0] = MAXFLOAT; maxHit[0] = MAXFLOAT; return 0; } if (tzmin > tmin) tmin = tzmin; if (tzmax < tmax) tmax = tzmax; minHit[0] = tmin; maxHit[0] = tmax; return 1; } //-------------------------------------------------------------------------------- // Intersects_Primitive_TriangleMesh //-------------------------------------------------------------------------------- int Intersects_Primitive_TriangleMesh(global int * Indices, global float * Points, float4 origin, float4 direction, int primitiveId, global IntersectionResult * result, int GID) { int tri = 3 * primitiveId; int i1 = 3 * Indices[tri + 0]; int i2 = 3 * Indices[tri + 1]; int i3 = 3 * Indices[tri + 2]; float4 edge0 = (float4)( Points[i2 + 0] - Points[i1 + 0], Points[i2 + 1] - Points[i1 + 1], Points[i2 + 2] - Points[i1 + 2], 0); float4 edge1 = (float4)( Points[i1 + 0] - Points[i3 + 0], Points[i1 + 1] - Points[i3 + 1], Points[i1 + 2] - Points[i3 + 2], 0); float4 edge2 = (float4)( Points[i1 + 0] - origin.x, Points[i1 + 1] - origin.y, Points[i1 + 2] - origin.z, 0); float4 n = cross(edge0, edge1); float v = dot(direction, n); float iv = 1.0f / v; float va = dot(n, edge2); float t = iv * va; if (t <= 0 || t >= result[GID].Maximum) return 0; float4 i = cross(edge2, direction); float v1 = dot(i, edge1); float beta = iv * v1; if (beta < 0) return 0; float v2 = dot(i, edge0); if ((v1 + v2) * v > v * v) return 0; float gamma = iv * v2; if (gamma < 0) return 0; result[GID].Maximum = t; result[GID].U = beta; result[GID].V = gamma; result[GID].PrimitiveId = primitiveId; return 1; } //-------------------------------------------------------------------------------- // Intersects_Primitive_Geometry //-------------------------------------------------------------------------------- int Intersects_Primitive_Geometry(__global Geometry * geometry, float4 origin, float4 direction, __global IntersectionResult * result, int GID) { return Accelerator_BVH_TreeTraversal(GB.AcceleratorBVHs[geometry[0].AcceleratorBVH_Index]); //InitializeAccelerationStructure(null); //return Accelerator.Intersects(r, state); } //-------------------------------------------------------------------------------- // Intersects_Primitive_Instance //-------------------------------------------------------------------------------- int Intersects_Primitive_Instance(__global Instance * instance, float4 origin, float4 direction, __global IntersectionResult * result, int GID) { return Intersects_Primitive_Geometry(&instance[0].Geometry, origin, direction, result, GID); /* // Project the ray to the object space Matrix4 transform = w2o.Sample(intersection.Time); Ray localRay = ray.Transform(ref transform); intersection.Current = this; // Test the intersection with the geometry bool hasIntersection = Geometry.Intersect(localRay, intersection); // FIXME: transfer max distance to current ray ray.Maximum = localRay.Maximum; return hasIntersection; */ } //-------------------------------------------------------------------------------- // Intersects_Primitive_InstanceList //-------------------------------------------------------------------------------- int Intersects_Primitive_InstanceList(__global InstanceList * instanceList, float4 origin, float4 direction, int primitiveId, __global IntersectionResult * result, int GID) { __global Instance * instance = &GB.Instances[instanceList[0].Instances_Index + primitiveId]; return Intersects_Primitive_Instance(instance, origin, direction, result, GID); //if (primitiveId < Instances.Length) // return Instances[primitiveId].Intersect(r, state); //return _lights[primitiveId - Instances.Length].Intersect(r, state); } //-------------------------------------------------------------------------------- // Intersects_Primitive //-------------------------------------------------------------------------------- int Intersects_Primitive(__global PrimitivesList * primitivesList, float4 origin, float4 direction, int primitiveId, __global IntersectionResult * result, int GID) { // Triangle mesh if (primitivesList[0].Type == 2) { TriangleMesh triangleMesh = GB.TriangleMeshes[primitivesList[0].TriangleMesh_Index]; return Intersects_Primitive_TriangleMesh(&GB.Indices[triangleMesh.Indices_Index], &GB.Points[triangleMesh.Points_Index], origin, direction, primitiveId, result, GID); } // Instance List __global InstanceList * instanceList = &GB.InstancesList[primitivesList[0].InstanceList_Index]; return Intersects_Primitive_InstanceList(instanceList, origin, direction, primitiveId, result, GID); } //-------------------------------------------------------------------------------- // Accelerator_BVH_TreeTraversal_Internal //-------------------------------------------------------------------------------- void Accelerator_BVH_TreeTraversal_Internal( __global Ray * rays, __global BoundingVolume * _nodes, __global int * primitivesIds, __global PrimitivesList * primitivesList, __global IntersectionResult * result) { int GID = get_global_id(0); int bvNodeIndex = 0; // 1 = true, 0 = false MITData mitData; mitData.InverseDirection.x = 1.0f / rays[GID].Direction.x; mitData.InverseDirection.y = 1.0f / rays[GID].Direction.y; mitData.InverseDirection.z = 1.0f / rays[GID].Direction.z; if (mitData.InverseDirection.x < 0) mitData.IsXNegative = 1; else mitData.IsXNegative = 0; if (mitData.InverseDirection.y < 0) mitData.IsYNegative = 1; else mitData.IsYNegative = 0; if (mitData.InverseDirection.z < 0) mitData.IsZNegative = 1; else mitData.IsZNegative = 0; // End of the tree int stopNodeIndex = _nodes[bvNodeIndex].SkipNodeIndex; int hasIntersection = 0; int loopCount = 0; while (bvNodeIndex < stopNodeIndex) { //loopCount++; // Test for the best primitives //if (loopCount < 1 /*&& hitsCache != null*/) //{ // int[] bestHits = hitsCache.Primitives; // if (bestHits != null) // for (int index = 0; index < bestHits.Length; index++) // _primitiveList.IntersectPrimitive(ray, bestHits[index], istate); //} // Do a hit test with the bounding volume. // We use the "closest intersection" to check against the BV intersection. // If the 'closest intersection' < 'bv intersection' then there is no // primitive intersection possible ! float minHit; float maxHit; int hasHit = Intersects_BoxRay(&mitData, rays[GID].Origin, rays[GID].Direction, &_nodes[bvNodeIndex].BBox, &minHit, &maxHit); if (hasHit && minHit <= rays[GID].Maximum) { // It is a leaf -> test the primitives if (_nodes[bvNodeIndex].PrimitiveId > -1) { // Contains a simple primitive if(_nodes[bvNodeIndex].Count < 1) { if(Intersects_Primitive(primitivesList, rays[GID].Origin, rays[GID].Direction, _nodes[bvNodeIndex].PrimitiveId, result, GID)) { // If shadow ray if (rays[GID].IsShadowRay) return; //if (hitsCache != null) // hitsCache.AddHit(_nodes[bvNodeIndex].PrimitiveId, ray.Maximum); hasIntersection = 1; } } // Contains a set of primitive else if (_nodes[bvNodeIndex].Count > 0) { //Note : bv variable not defined int startIndex = _nodes[bvNodeIndex].PrimitiveId; int endIndex = startIndex + _nodes[bvNodeIndex].Count - 1; for (int index = startIndex; index <= endIndex; index++) if(Intersects_Primitive(primitivesList, rays[GID].Origin, rays[GID].Direction, primitivesIds[index], result, GID)) { // If shadow ray if (rays[GID].IsShadowRay) return; //if (hitsCache != null) // hitsCache.AddHit(_primitivesIds[index], ray.Maximum); hasIntersection = 1; } } } // Next node at the same level OR // the next sibbling of the parent. bvNodeIndex++; if ((bvNodeIndex >= stopNodeIndex || bvNodeIndex == _nodes[bvNodeIndex].SkipNodeIndex) && hasIntersection) return; } // Continue at the same level else bvNodeIndex = _nodes[bvNodeIndex].SkipNodeIndex; } return; } //-------------------------------------------------------------------------------- // Accelerator_NoAcceleration //-------------------------------------------------------------------------------- void Accelerator_NoAcceleration( __global Ray * rays, __global int * primitivesIds, __global IntersectionResult * result) { int GID = get_global_id(0); } //-------------------------------------------------------------------------------- // Accelerator_BVH_TreeTraversal //-------------------------------------------------------------------------------- void Accelerator_BVH_TreeTraversal(__global Accelerator_BVH * _acceleratorBVHs) { Accelerator_BVH_TreeTraversal_Internal( GB.Rays, &GB.AcceleratorBVH_Nodes[_acceleratorBVHs[0].Nodes_Index], &GB.AcceleratorBVH_PrimitivesIds[_acceleratorBVHs[0].PrimitivesIds_Index], &GB.PrimitivesList[_acceleratorBVHs[0].PrimitivesList_Index], GB.Results); /*void Accelerator_BVH_TreeTraversal_Internal( __global Ray * rays, __global BoundingVolume * _nodes, __global int * primitivesIds, __global PrimitivesList * primitivesList, __global IntersectionResult * result)*/ } //-------------------------------------------------------------------------------- // Trace //-------------------------------------------------------------------------------- __kernel void Trace( __global Ray * rays, __global IntersectionResult * results, __global InstanceList * _instancesList, __global PrimitivesList * _primitivesList, __global Instance * _instances, __global Matrix4 * _matrix4, __global TriangleMesh * _triangleMeshes, __global float * _points, __global int * _indices, __global Accelerator_BVH * _acceleratorBVHs, __global BoundingVolume * _acceleratorBVH_Nodes, __global int * _acceleratorBVH_PrimitivesIds ) { GB.Rays = rays; GB.Results = results; GB.InstancesList = _instancesList; GB.PrimitivesList = _primitivesList; GB.Instances = _instances; GB.Matrix4 = _matrix4; GB.TriangleMeshes = _triangleMeshes; GB.Points = _points; GB.Indices = _indices; GB.AcceleratorBVHs = _acceleratorBVHs; GB.AcceleratorBVH_Nodes = _acceleratorBVH_Nodes; GB.AcceleratorBVH_PrimitivesIds = _acceleratorBVH_PrimitivesIds; // Global accelerator ... always a bvh Accelerator_BVH_TreeTraversal(&_acceleratorBVHs[0]); }

0 Likes
8 Replies
Fr4nz
Journeyman III

In OpenCL recurson is not allowed, as written in the specs (section 6.8, point j). You have to find another way to solve your problem.

Another thing: in your code there are TOO many ifs, you should avoid them if possible and use the selection operator "test ? exp1 : exp2" when applicable.

0 Likes

Thanks for your answer,

1) Why should I use the "?" operator and not the "if"

2) Ok, there is no recusion support... but then... do you think it is possible to use a same "method" code and apply it to several "kind" of objects ?

By example, I have the method "Accelerator_BVH_TreeTraversal" and need to duplicate this code to apply it to "InstanceList" and to "TriangleMesh". Is there a way to avoid to duplicate the code ?

Thanks

0 Likes

Originally posted by: viewon01 Thanks for your answer,

 

1) Why should I use the "?" operator and not the "if"

 

2) Ok, there is no recusion support... but then... do you think it is possible to use a same "method" code and apply it to several "kind" of objects ?

 

By example, I have the method "Accelerator_BVH_TreeTraversal" and need to duplicate this code to apply it to "InstanceList" and to "TriangleMesh". Is there a way to avoid to duplicate the code ?

 

Thanks

 

1) Take a look here:

http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html

and read a post from MichaWillmow here about flow control in GPUs:

http://forums.amd.com/forum/messageview.cfm?catid=390&threadid=124618&highlight_key=y&keyword1=prefix%20sum

Essentially, you have to think in a SIMD way when you use flow control on GPUs. Moreover, the "? :" operators works, on a syntactical level, on expressions and not on statements. This allows you to avoid different paths of executions inside a work-group;

2) I don't know the nature of the problem you're trying to solve...anyway, usually if something can be solved recursively then it can also be solved iteratively...

 

0 Likes

Originally posted by: Fr4nz
Originally posted by: viewon01 Thanks for your answer,

1) Why should I use the "?" operator and not the "if"

1) Take a look here:

http://http.developer.nvidia.com/GPUGems2/gpugems2_chapter34.html

and read a post from MichaWillmow here about flow control in GPUs:

http://forums.amd.com/forum/messageview.cfm?catid=390&threadid=124618&highlight_key=y&keyword1=prefix%20sum

Essentially, you have to think in a SIMD way when you use flow control on GPUs. Moreover, the "? :" operators works, on a syntactical level, on expressions and not on statements. This allows you to avoid different paths of executions inside a work-group;

 

I looked over the links you provided and can't find a preference for ?: being expressed. Yes, use ALU instructions instead of flow control but I'm pretty sure ?: is going to boil down to flow control even if it appears in code to be a single, no flow controlled, statement.

0 Likes

viewon,
There is no template support in OpenCL, but there are macro's. You can turn your function into a giant macro and set the parameters at compile time for different objects.
0 Likes

coordz,
If no memory operation exists in the "?:" operator, it should map to the cmov_logical instruction, which is can execute in a single cycle instead of > 100 in an IF/ELSE block. If you have a test case that doesn't show this behavior, please let us know and we will look into why it is not generating the correct code.

You also can use the 'select' function call.
0 Likes

Originally posted by: MicahVillmow coordz, If no memory operation exists in the "?:" operator, it should map to the cmov_logical instruction, which is can execute in a single cycle instead of > 100 in an IF/ELSE block. If you have a test case that doesn't show this behavior, please let us know and we will look into why it is not generating the correct code. You also can use the 'select' function call.


All is well and my test case does generate the code as you describe. Why isn't a simple if/else with no memory access optimized out to use the cmov_logical instruction?

0 Likes

coordz,
If you can provide a test case we can analyze why it is not being optimized correctly and try to fix it for our next major release.
0 Likes