dschwen

Erratic freezes on linux (update)

Discussion created by dschwen on Jul 27, 2010
Latest reply on Aug 13, 2010 by dschwen
(SDK2.1, Catalyst 10.5,10.6,10.7, still in SDK 2.2, Catalyst 10.7b)

Attached kernel code (compiles fine, Stream Analyzer under Windows does not complain either) together with a rather simple initialization and running program makes my computer freeze completely.

The precise behaviour is rather erratic. If I comment out the main loop of the kernel the whole thing runs a few times (1 single kernel execution per run) until it freezes my computer. The non-opencl-certified drivers 10.6 and 10.7 lead to immediate freezing, with the 10.5 driver the code hits a clFinish after I submit the kernel, and then takes about 1-2 minutes to completely lock up the system.

I have no idea where to even start looking. :-(

#define SIZE 32 #define SIZE2 1024 #define LSIZE 8 #define LSIZE2 100 __kernel void mu3d( float4 Eax, float4 Ebx, float4 Evx, float4 Exx, float4 Ex, float zz, float ddzn, float ncd2, __global float *cA, __global float *cV, __global float *ccv, __global float4 *u, __local float4 *cn ) { int xl = get_local_id(0); int yl = get_local_id(1); int a[3]; a[0] = xl + yl * (LSIZE+2); a[1] = a[0] + LSIZE2; a[2] = a[1] + LSIZE2; cn[a[0]].s3 = 0.0f; cn[a[1]].s3 = 0.0f; cn[a[2]].s3 = 0.0f; int xg = ( get_global_id(0) / ( LSIZE + 2 ) ); xg *= LSIZE; xg += xl; int yg = ( get_global_id(1) / ( LSIZE + 2 ) ); yg *= LSIZE; yg += yl; int b = ( (xg-1) & (SIZE-1) ) + ( (yg-1) & (SIZE-1) ) *SIZE; float4 DC; float rv; // // copy self // // below rv = 1.0f / ( 1.0f - ccv[ b + (SIZE-1)*SIZE2 ] ); // reduced volume cn[a[2]].s0 = cA[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s2 = cV[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s1 = rv - cn[a[2]].s0 - cn[a[2]].s2; // current rv = 1.0f / ( 1.0f - ccv[b] ); // reduced volume cn[a[0]].s0 = cA[b] * rv; cn[a[0]].s2 = cV[b] * rv; cn[a[0]].s1 = rv - cn[a[0]].s0 - cn[a[0]].s2; for( int zl = 0; zl < SIZE; zl++ ) { // fetch next z-layer rv = 1.0f / ( 1.0f - ccv[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] ); // reduced volume cn[a[(zl+1)%3]].s0 = cA[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s2 = cV[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s1 = rv - cn[a[(zl+1)%3]].s0 - cn[a[(zl+1)%3]].s2; barrier(CLK_LOCAL_MEM_FENCE); // copy back chemical potential if( xl > 0 && xl < (LSIZE+1) && yl > 0 && yl < (LSIZE+1) ) { DC = ( cn[a[(zl+1)%3]] + cn[a[(zl+2)%3]] + // +2 = -1 cn[a[zl%3]+1] + cn[a[zl%3]-1] + cn[a[zl%3]+LSIZE+2] + cn[a[zl%3]-LSIZE-2] - 6.0f * cn[a[zl%3]] ) / ncd2; u[ b + zl*SIZE2 ] = zz * ( Eax*cn[a[zl%3]].s0 + Ebx*cn[a[zl%3]].s1 + Evx*cn[a[zl%3]].s2 ) + ddzn * ( Eax * DC.s0 + Ebx * DC.s1 + Evx * DC.s2 ) -zz * Exx * 0.5f - Ex; } } }

Outcomes