After minute changes on my kernel source - namely replacing float4 with float3 - my program suddenly crashes with
terminate called after throwing an instance of 'std::length_error'
what(): basic_string::_S_create
The difference boils down to:
__kernel void mu3d( float4 Eax, float4 Ebx, float4 Evx, float4 Exx, float4 Ex, float zz, float ddzn, float ncd2,
__global float *cA, __global float *cV, __global float *ccv,
__global float4 *u,
__local float3 *cn )
float4 version works, float3 version build fails.
Originally posted by: dschwen
After minute changes on my kernel source - namely replacing float4 with float3 - my program suddenly crashes with
terminate called after throwing an instance of 'std::length_error' what(): basic_string::_S_create
when I call clBuildProgram.Just verified that the error vanishes if i replace all occurrences of float3 with float4 (like it used to be). It is the same project I mentiond in the "Erratic freezes" thread.What is going on here?The difference boils down to:
__kernel void mu3d( float4 Eax, float4 Ebx, float4 Evx, float4 Exx, float4 Ex, float zz, float ddzn, float ncd2, __global float *cA, __global float *cV, __global float *ccv, __global float4 *u, __local float3 *cn )
vs.__kernel void mu3d( float3 Eax, float3 Ebx, float3 Evx, float3 Exx, float3 Ex, float zz, float ddzn, float ncd2, __global float *cA, __global float *cV, __global float *ccv, __global float4 *u, __local float3 *cn )
float4 version works, float3 version build fails.
Could you please post complete code here? see below code and compare with your code. below code is crashed in SKA.
#define SIZE 32 #define SIZE2 1024 #define LSIZE 8 #define LSIZE2 100 __kernel void mu3d( float3 Eax, float3 Ebx, float3 Evx, float3 Exx, float3 Ex, float zz, float ddzn, float ncd2, __global float *cA, __global float *cV, __global float *ccv, __global float3 *u, __local float3 *cn ) { int xl = get_local_id(0); int yl = get_local_id(1); int a[3]; a[0] = xl + yl * (LSIZE+2); a[1] = a[0] + LSIZE2; a[2] = a[1] + LSIZE2; cn[a[0]].s2 = 0.0f; cn[a[1]].s2 = 0.0f; cn[a[2]].s2 = 0.0f; int xg = ( get_global_id(0) / ( LSIZE + 2 ) ); xg *= LSIZE; xg += xl; int yg = ( get_global_id(1) / ( LSIZE + 2 ) ); yg *= LSIZE; yg += yl; int b = ( (xg-1) & (SIZE-1) ) + ( (yg-1) & (SIZE-1) ) *SIZE; float3 DC; float rv; // // copy self // // below rv = 1.0f / ( 1.0f - ccv[ b + (SIZE-1)*SIZE2 ] ); // reduced volume cn[a[2]].s0 = cA[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s2 = cV[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s1 = rv - cn[a[2]].s0 - cn[a[2]].s2; // current rv = 1.0f / ( 1.0f - ccv ); // reduced volume cn[a[0]].s0 = cA * rv; cn[a[0]].s2 = cV * rv; cn[a[0]].s1 = rv - cn[a[0]].s0 - cn[a[0]].s2; for( int zl = 0; zl < SIZE; zl++ ) { // fetch next z-layer rv = 1.0f / ( 1.0f - ccv[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] ); // reduced volume cn[a[(zl+1)%3]].s0 = cA[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s2 = cV[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s1 = rv - cn[a[(zl+1)%3]].s0 - cn[a[(zl+1)%3]].s2; barrier(CLK_LOCAL_MEM_FENCE); // copy back chemical potential if( xl > 0 && xl < (LSIZE+1) && yl > 0 && yl < (LSIZE+1) ) { DC = ( cn[a[(zl+1)%3]] + cn[a[(zl+2)%3]] + // +2 = -1 cn[a[zl%3]+1] + cn[a[zl%3]-1] + cn[a[zl%3]+LSIZE+2] + cn[a[zl%3]-LSIZE-2] - 6.0f * cn[a[zl%3]] ) / ncd2; u[ b + zl*SIZE2 ] = zz * ( Eax*cn[a[zl%3]].s0 + Ebx*cn[a[zl%3]].s1 + Evx*cn[a[zl%3]].s2 ) + ddzn * ( Eax * DC.s0 + Ebx * DC.s1 + Evx * DC.s2 ) -zz * Exx * 0.5f - Ex; } } }
Working kernel attached.
replace parameter declaration with
__kernel void mu3d( float3 Eax, float3 Ebx, float3 Evx, float3 Exx, float3 Ex, float zz, float ddzn, float ncd2,
__global float *cA, __global float *cV, __global float *ccv,
__global float3 *u,
__local float3 *cn )
__kernel void mu3d( float4 Eax, float4 Ebx, float4 Evx, float4 Exx, float4 Ex, float zz, float ddzn, float ncd2, __global float *cA, __global float *cV, __global float *ccv, __global float3 *u, __local float3 *cn ) { int xl = get_local_id(0); int yl = get_local_id(1); int a[3]; a[0] = xl + yl * (LSIZE+2); a[1] = a[0] + LSIZE2; a[2] = a[1] + LSIZE2; int xg = ( get_global_id(0) / ( LSIZE + 2 ) ); xg *= LSIZE; xg += xl; int yg = ( get_global_id(1) / ( LSIZE + 2 ) ); yg *= LSIZE; yg += yl; int b = ( (xg-1) & (SIZE-1) ) + ( (yg-1) & (SIZE-1) ) *SIZE; float3 DC; float rv; // // copy self // // below rv = 1.0f / ( 1.0f - ccv[ b + (SIZE-1)*SIZE2 ] ); // reduced volume cn[a[2]].s0 = cA[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s2 = cV[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s1 = rv - cn[a[2]].s0 - cn[a[2]].s2; // current rv = 1.0f / ( 1.0f - ccv ); // reduced volume cn[a[0]].s0 = cA * rv; cn[a[0]].s2 = cV * rv; cn[a[0]].s1 = rv - cn[a[0]].s0 - cn[a[0]].s2; for( int zl = 0; zl < SIZE; zl++ ) { // fetch next z-layer rv = 1.0f / ( 1.0f - ccv[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] ); // reduced volume cn[a[(zl+1)%3]].s0 = cA[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s2 = cV[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s1 = rv - cn[a[(zl+1)%3]].s0 - cn[a[(zl+1)%3]].s2; barrier(CLK_LOCAL_MEM_FENCE); // copy back chemical potential if( xl > 0 && xl < (LSIZE+1) && yl > 0 && yl < (LSIZE+1) ) { DC = ( cn[a[(zl+1)%3]] + cn[a[(zl+2)%3]] + // +2 = -1 cn[a[zl%3]+1] + cn[a[zl%3]-1] + cn[a[zl%3]+LSIZE+2] + cn[a[zl%3]-LSIZE-2] - 6.0f * cn[a[zl%3]] ) / ncd2; u[ b + zl*SIZE2 ] = zz * ( Eax.xyz*cn[a[zl%3]].s0 + Ebx.xyz*cn[a[zl%3]].s1 + Evx.xyz*cn[a[zl%3]].s2 ) + ddzn * ( Eax.xyz*DC.s0 + Ebx.xyz*DC.s1 + Evx.xyz*DC.s2 ) -zz*Exx.xyz * 0.5f - Ex.xyz; } } }