0 Replies Latest reply on Mar 6, 2012 5:04 AM by dikobraz

    Driver crashes trying to compile kernel

    dikobraz

      I encountered problem with AMD card when trying to compile the following kernel:

       

      #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable

       

      __constant sampler_t proj_sampler = CLK_NORMALIZED_COORDS_TRUE  | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

       

      // common

      float4 transform(float16 mat, float4 vec)

      {

          float4 res;

          res.x = dot(mat.s0123, vec);

          res.y = dot(mat.s4567, vec);

          res.z = dot(mat.s89AB, vec);

          res.w = dot(mat.sCDEF, vec);

          return res;

      }

       

      struct block_desc

      {

          int3   blockCoord;        /// active block

          int3   blockCount;        /// num blocks in each direction

          float3 offset;            /// offset of the block in world coordinates

          int3   blockSize;        /// size of the voxel grid inside block

          int3   blockSizeShift;    /// helper to get voxel coordinate by voxel index

          int3   blockSizeMask;    /// helper to get voxel coordinate by voxel index

          float3 voxelSize;        /// size of the voxel of the grid

      };

       

      // compute position in 3d grid from 1d index

      // only works for power of 2 sizes

      int3 calcGridPos(uint i,

                       int3 gridSizeShift,

                       int3 gridSizeMask)

      {

          int3 gridPos;

          gridPos.x = (i & gridSizeMask.x);

          gridPos.y = ((i >> gridSizeShift.y) & gridSizeMask.y);

          gridPos.z = ((i >> gridSizeShift.z) & gridSizeMask.z);

          return gridPos;

      }

       

      __kernel

      void updateDistanceFieldProj(__read_only image2d_t  normalDepthMap,

                                   uint                   numOccupiedVoxels,

                                   uint                   blockOffset,

                                   float16                modelViewMatrix,

                                   float16                projMatrix,

                                   float16                invProjMatrix,

                                   struct block_desc      block,

                                   __global char*         voxels,

                                   __global uchar*        voxelsWeights)

      {

          // compute position in 3d grid

          uint   voxel      = min(get_global_id(0), numOccupiedVoxels - 1);

          int3   gridPos    = calcGridPos(voxel, block.blockSizeShift, block.blockSizeMask);

          float4 cellCenter = (float4)(block.offset + (convert_float3(gridPos) + 0.5f) * block.voxelSize, 1.0f);

          cellCenter        = transform(modelViewMatrix, cellCenter);

         

          // project in the texture

          float4 projected  = transform(projMatrix, cellCenter);

          float2 tcoord     = 0.5f * projected.xy / projected.w + 0.5f;

          float  depth      = read_imagef(normalDepthMap, proj_sampler, tcoord).w;

       

          // if something is in depth map update distance field

          if (depth < 1.0f)

          {

              // reconstruct position in view space

              projected.xy /= projected.w;

              projected.z   = depth;

              projected.w   = 1.0f;

              float4 pointOnSurface = transform(invProjMatrix, projected);

              pointOnSurface.xyz /= pointOnSurface.w;

       

              // use simple distance metric, may be it is better to replace it to point-to-plane

              float dist = (cellCenter.z - pointOnSurface.z) * 0.3f / block.voxelSize.x;

       

              // merge

              voxel += blockOffset;

              float weightOld = convert_float(voxelsWeights[voxel]) / 255.0f;

              float weightNew = 0.1f;

              float weightSum = weightOld + weightNew;

              if (weightNew > 0.001f)

              {

                  float distOld = convert_float(voxels[voxel]) / 64.0f;

                  float distNew = (distOld * weightOld + dist * weightNew) / weightSum;

       

                  voxels[voxel]        = convert_char_sat_rte(distNew * 64.0f);

                  voxelsWeights[voxel] = convert_uchar_sat_rte(weightNew * 255.0);

              }

          }

      }

       

      Application crashes somewhere in opencl internals. It crashes even in AMD App KernelAnalyzer. Commenting the following lines solves the problem:

                 

      voxels[voxel]        = convert_char_sat_rte(distNew * 64.0f);

      voxelsWeights[voxel] = convert_uchar_sat_rte(weightNew * 255.0);

       

      This kernel compiles and works on NVidia. Thanks in advance.