2 Replies Latest reply on Mar 15, 2010 7:45 AM by omkaranathan

    Bug in OpenCL compiler? Segmentation fault...

    Fr4nz

      When compiling the kernel attached to this post, this error message in returned:

       

      0   llc             0x00000000009e18cf
      1   llc             0x00000000009e2aa9
      2   libpthread.so.0 0x00007f7a76fbb080
      3   llc             0x00000000005fc31a
      4   llc             0x000000000060227d
      5   llc             0x0000000000602b8f
      6   llc             0x00000000006033ef
      7   llc             0x00000000005f676a
      8   llc             0x00000000007d251a
      9   llc             0x00000000007d2d60
      10  llc             0x00000000007d2f9c
      11  llc             0x000000000041ba06
      12  libc.so.6       0x00007f7a76c585a6 __libc_start_main + 230
      13  llc             0x0000000000419779
      Stack dump:
      0.    Program arguments: /home/fr4nz/atistream/bin/x86_64/llc -mcpu=gummy -mattr=mwgs-3-256-1-1 -regalloc=linearscan -mtriple=amdil-pc-amdopencl /tmp/OCLOZTphA.bc -f -o /tmp/OCLOZTphA.il
      1.    Running pass 'AMD IL Control Flow Graph structurizer Pass' on function '@__OpenCL_intersect_kernel'
      Segmentation fault


      Specs: Ubuntu 9.04 64bit, ATI 5770, OpenCL 2.01 and 10.2 catalyst drivers.

      The problem seems to reside in this "for" loop:

       

      for(init.w=(k-3); init.w>=0; init.w--)
                      tmpIntersect &= tmpBuffer[tmpCandItems[lid + init.w*lsize]];


      In fact, if I modify the kernel in order to make a "forward" loop:

       

      for(init.w=0; init.w<(k-2); init.w++)
                      tmpIntersect &= tmpBuffer[tmpCandItems[lid + init.w*lsize]];


      then the kernel is correctly compiled.

      __kernel void intersect(__global uint4 *inputVD, __global uint *inputPrefixCk, __local uint4 *tmpBuffer, __local uint4 *tempIntersect, // Not used; __local uint *tmpCandItems, __local uint *tempCOUNTS, // Not used; __global uint *outputCounts, const int k, const int numItems, const int dimRowVec4, const int numIntersections) { const int gid = get_global_id(0); const int lid = get_local_id(0); const int lsize = get_local_size(0); uint4 tmpCOUNTS = 0; uint4 tmpIntersect; uint4 init; init.z = 1; init.x = (gid < numIntersections) ? 0 : k; for( ; init.x<k; init.x++) tmpCandItems[lid + init.x*lsize] = inputPrefixCk[gid*k + init.x]; for(init.x=0; init.x<dimRowVec4; init.x++) { init.y = (lid < numItems) ? lid : numItems; for( ; init.y<numItems; init.y+=lsize) tmpBuffer[init.y] = inputVD[init.y*dimRowVec4 + init.x]; barrier(CLK_LOCAL_MEM_FENCE); if(gid < numIntersections) { tmpIntersect = tmpBuffer[tmpCandItems[lid + (k-1)*lsize]] & tmpBuffer[tmpCandItems[lid + (k-2)*lsize]]; for(init.w=(k-3); init.w>=0; init.w--) tmpIntersect &= tmpBuffer[tmpCandItems[lid + init.w*lsize]]; // Bit count for(init.w=0 ; init.w<32 ; init.w++) { tmpCOUNTS += tmpIntersect & (uint4)init.z; tmpIntersect = tmpIntersect >> (uint4)init.z; } } } if(gid < numIntersections) outputCounts[gid] = tmpCOUNTS.x + tmpCOUNTS.y + tmpCOUNTS.z + tmpCOUNTS.w; }