cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

Fr4nz
Journeyman III

Bug in OpenCL compiler? Segmentation fault...

When compiling the kernel attached to this post, this error message in returned:

0   llc             0x00000000009e18cf
1   llc             0x00000000009e2aa9
2   libpthread.so.0 0x00007f7a76fbb080
3   llc             0x00000000005fc31a
4   llc             0x000000000060227d
5   llc             0x0000000000602b8f
6   llc             0x00000000006033ef
7   llc             0x00000000005f676a
8   llc             0x00000000007d251a
9   llc             0x00000000007d2d60
10  llc             0x00000000007d2f9c
11  llc             0x000000000041ba06
12  libc.so.6       0x00007f7a76c585a6 __libc_start_main + 230
13  llc             0x0000000000419779
Stack dump:
0.    Program arguments: /home/fr4nz/atistream/bin/x86_64/llc -mcpu=gummy -mattr=mwgs-3-256-1-1 -regalloc=linearscan -mtriple=amdil-pc-amdopencl /tmp/OCLOZTphA.bc -f -o /tmp/OCLOZTphA.il
1.    Running pass 'AMD IL Control Flow Graph structurizer Pass' on function '@__OpenCL_intersect_kernel'
Segmentation fault


Specs: Ubuntu 9.04 64bit, ATI 5770, OpenCL 2.01 and 10.2 catalyst drivers.

The problem seems to reside in this "for" loop:

for(init.w=(k-3); init.w>=0; init.w--)
                tmpIntersect &= tmpBuffer[tmpCandItems[lid + init.w*lsize]];


In fact, if I modify the kernel in order to make a "forward" loop:

for(init.w=0; init.w<(k-2); init.w++)
                tmpIntersect &= tmpBuffer[tmpCandItems[lid + init.w*lsize]];


then the kernel is correctly compiled.

__kernel void intersect(__global uint4 *inputVD, __global uint *inputPrefixCk, __local uint4 *tmpBuffer, __local uint4 *tempIntersect, // Not used; __local uint *tmpCandItems, __local uint *tempCOUNTS, // Not used; __global uint *outputCounts, const int k, const int numItems, const int dimRowVec4, const int numIntersections) { const int gid = get_global_id(0); const int lid = get_local_id(0); const int lsize = get_local_size(0); uint4 tmpCOUNTS = 0; uint4 tmpIntersect; uint4 init; init.z = 1; init.x = (gid < numIntersections) ? 0 : k; for( ; init.x<k; init.x++) tmpCandItems[lid + init.x*lsize] = inputPrefixCk[gid*k + init.x]; for(init.x=0; init.x<dimRowVec4; init.x++) { init.y = (lid < numItems) ? lid : numItems; for( ; init.y<numItems; init.y+=lsize) tmpBuffer[init.y] = inputVD[init.y*dimRowVec4 + init.x]; barrier(CLK_LOCAL_MEM_FENCE); if(gid < numIntersections) { tmpIntersect = tmpBuffer[tmpCandItems[lid + (k-1)*lsize]] & tmpBuffer[tmpCandItems[lid + (k-2)*lsize]]; for(init.w=(k-3); init.w>=0; init.w--) tmpIntersect &= tmpBuffer[tmpCandItems[lid + init.w*lsize]]; // Bit count for(init.w=0 ; init.w<32 ; init.w++) { tmpCOUNTS += tmpIntersect & (uint4)init.z; tmpIntersect = tmpIntersect >> (uint4)init.z; } } } if(gid < numIntersections) outputCounts[gid] = tmpCOUNTS.x + tmpCOUNTS.y + tmpCOUNTS.z + tmpCOUNTS.w; }

0 Likes
2 Replies
Fr4nz
Journeyman III

Up!

0 Likes

Fr4nz,

The issue has been reported to the developers and they are looking into it. Thanks for the feedback.

0 Likes