d.a.a.

Kernel crashing the AMD's OpenCL compiler

Discussion created by d.a.a. on Nov 26, 2010
Latest reply on Feb 17, 2011 by d.a.a.

The kernel attached below crashes the OpenCL compiler on my machine[1] when targeting the GPU device--it works under the Nvidia's OpenCL compiler, though. Strangely enough, when I change the value of WGS to 3 or less the compiler doesn't crash anymore.


1. GNU/Linux 2.6.36 64-bit, fglrx 10.11, AMD SDK v2.2, Radeon HD5750.

 

 

// ---------- BEGIN kernel #define WGS 256 #define NUM_POINTS 8192 #define X_DIM 4 #define MAX_TREE_SIZE 50 #define MAX_FLOAT 3.40282e+38f #define TOP ( stack[stack_top] ) #define POP ( stack[stack_top--] ) #define PUSH(arity, exp) stack[stack_top + 1 - arity] = (exp); stack_top = stack_top + 1 - arity; #define ARG(n) (stack[stack_top - n]) #define CREATE_STACK( type, size ) type stack[size]; int stack_top = -1; #define NODE program[op] #define INTERPRETER_CORE case 1:PUSH(1,ARG(0)) break; case 6:PUSH(2,(ARG(1) == 0.0f ? 1.0f : ARG(0)/ARG(1))) break; case 3:PUSH(2,ARG(0) + ARG(1)) break; case 4:PUSH(2,ARG(0) - ARG(1)) break; case 5:PUSH(2,ARG(0) * ARG(1)) break; #define COMPACT_RANGE 4194303 #define SCALE_FACTOR 16 #define ARITY( packed ) ((packed & 0xE0000000) >> 29) #define INDEX( packed ) ((packed & 0x1FC00000) >> 22) #define AS_INT( packed ) (packed & 0x3FFFFF) #define AS_FLOAT( packed ) ((float)( packed & 0x3FFFFF ) * SCALE_FACTOR / COMPACT_RANGE) __kernel void evaluate( __global const uint* pop, __global const float* X, __global const float* Y, __global float* E, __local uint* program ) { __local float PE[WGS]; __local unsigned int program_size; CREATE_STACK( float, MAX_TREE_SIZE ); uint i_id = get_local_id( 0 ); uint g_id = get_group_id( 0 ); uint wg_size = get_local_size( 0 ); if( i_id == 0 ) program_size = pop[(MAX_TREE_SIZE + 1) * g_id]; barrier(CLK_LOCAL_MEM_FENCE); if( i_id < program_size ) program[i_id] = pop[(MAX_TREE_SIZE + 1) * g_id + i_id + 1]; barrier(CLK_LOCAL_MEM_FENCE); PE[i_id] = 0.0f; for( uint iter = 0; iter < NUM_POINTS/wg_size; ++iter ) { // ------------------------------- // Calls the interpreter (C macro) // ------------------------------- for( int op = program_size; op-- ; ) switch( INDEX( program[op] ) ) { INTERPRETER_CORE default: PUSH( 0, X[iter * wg_size + NUM_POINTS * AS_INT( program[op] ) + i_id] ); } // ------------------------------- PE[i_id] += pown( POP - Y[ iter * wg_size + i_id ], 2 ); } for( uint d = 2; d<= WGS; d *= 2 ) { barrier(CLK_LOCAL_MEM_FENCE); if( i_id % d == 0 ) PE[i_id] += PE[i_id + d/2]; } if( i_id == 0 ) E[g_id] = ( isinf( PE[0] ) || isnan( PE[0] ) ) ? MAX_FLOAT : PE[0] / (float) NUM_POINTS; } // ------------------- END kernel I get this GDB output when I try to build the above kernel: Program received signal SIGSEGV, Segmentation fault. 0x00007ffff47e1edd in ?? () from /usr/lib/libaticaldd.so (gdb) bt #0 0x00007ffff47e1edd in ?? () from /usr/lib/libaticaldd.so #1 0x00007ffff47d9c20 in ?? () from /usr/lib/libaticaldd.so #2 0x00007ffff47dabf7 in ?? () from /usr/lib/libaticaldd.so #3 0x00007ffff47b6935 in ?? () from /usr/lib/libaticaldd.so #4 0x00007ffff47b6d4f in ?? () from /usr/lib/libaticaldd.so #5 0x00007ffff47b824a in ?? () from /usr/lib/libaticaldd.so #6 0x00007ffff47b9070 in ?? () from /usr/lib/libaticaldd.so #7 0x00007ffff47b5ab3 in ?? () from /usr/lib/libaticaldd.so #8 0x00007ffff47575aa in ?? () from /usr/lib/libaticaldd.so #9 0x00007ffff46c72fc in ?? () from /usr/lib/libaticaldd.so #10 0x00007ffff46c758c in ?? () from /usr/lib/libaticaldd.so #11 0x00007ffff46c7fe9 in ?? () from /usr/lib/libaticaldd.so #12 0x00007ffff48bf715 in ?? () from /usr/lib/libaticaldd.so #13 0x00007ffff465c61a in ?? () from /usr/lib/libaticaldd.so #14 0x00007ffff465dacd in ?? () from /usr/lib/libaticaldd.so #15 0x00007ffff5d757c7 in ?? () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #16 0x00007ffff5d77b19 in ?? () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #17 0x00007ffff5d799b9 in ?? () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #18 0x00007ffff5d8782a in ?? () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #19 0x00007ffff5d8a9a7 in ?? () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #20 0x00007ffff5d5e329 in ?? () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #21 0x00007ffff5da27f8 in ?? () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #22 0x00007ffff5d3cdd3 in clBuildProgram () from /opt/ati-stream-sdk-v2.2-lnx64/lib/x86_64/libatiocl64.so #23 0x0000000000434841 in cl::Program::build (this=0x7fffffffdf90, devices=..., options=0x0, notifyFptr=0, data=0x0) at /opt/ati-stream-sdk-v2.2-lnx64/include/CL/cl.hpp:2474 #24 0x0000000000432131 in GP::BuildKernel (this=0x6666b0) at /home/douglas/gpocl/gpocl/trunk/src/gp/GP.cc:689 #25 0x0000000000424ac9 in GP::Run (this=0x6666b0) at /home/douglas/gpocl/gpocl/trunk/src/gp/GP.h:133 #26 0x0000000000423fa7 in main (argc=22, argv=0x7fffffffe438) at /home/douglas/gpocl/gpocl/trunk/src/gp/gpocl.cc:54

Outcomes