Archives Discussions

dschwen · ‎08-13-2010

After minute changes on my kernel source - namely replacing float4 with float3 - my program suddenly crashes with

terminate called after throwing an instance of 'std::length_error'
what(): basic_string::_S_create

when I call clBuildProgram.

Just verified that the error vanishes if i replace all occurrences of float3 with float4 (like it used to be). It is the same project I mentiond in the "Erratic freezes" thread.

What is going on here?

The difference boils down to:

__kernel void mu3d( float4 Eax, float4 Ebx, float4 Evx, float4 Exx, float4 Ex, float zz, float ddzn, float ncd2,
   __global float *cA, __global float *cV, __global float *ccv,
   __global float4 *u,
   __local float3 *cn )

vs.

__kernel void mu3d( float3 Eax, float3 Ebx, float3 Evx, float3 Exx, float3 Ex, float zz, float ddzn, float ncd2,
   __global float *cA, __global float *cV, __global float *ccv,
   __global float4 *u,
   __local float3 *cn )

float4 version works, float3 version build fails.

genaganna · ‎08-16-2010

Originally posted by: dschwen

After minute changes on my kernel source - namely replacing float4 with float3 - my program suddenly crashes with

terminate called after throwing an instance of 'std::length_error'   what(): basic_string::_S_create

when I call clBuildProgram.
Just verified that the error vanishes if i replace all occurrences of float3 with float4 (like it used to be). It is the same project I mentiond in the "Erratic freezes" thread.
What is going on here?
The difference boils down to:

__kernel void mu3d( float4 Eax, float4 Ebx, float4 Evx, float4 Exx, float4 Ex, float zz, float ddzn, float ncd2,    __global float *cA, __global float *cV, __global float *ccv,    __global float4 *u,    __local float3 *cn )

vs.
__kernel void mu3d( float3 Eax, float3 Ebx, float3 Evx, float3 Exx, float3 Ex, float zz, float ddzn, float ncd2,    __global float *cA, __global float *cV, __global float *ccv,    __global float4 *u,    __local float3 *cn )

float4 version works, float3 version build fails.

Could you please post complete code here? see below code and compare with your code. below code is crashed in SKA.

#define SIZE 32 #define SIZE2 1024 #define LSIZE 8 #define LSIZE2 100 __kernel void mu3d( float3 Eax, float3 Ebx, float3 Evx, float3 Exx, float3 Ex, float zz, float ddzn, float ncd2, __global float *cA, __global float *cV, __global float *ccv, __global float3 *u, __local float3 *cn ) { int xl = get_local_id(0); int yl = get_local_id(1); int a[3]; a[0] = xl + yl * (LSIZE+2); a[1] = a[0] + LSIZE2; a[2] = a[1] + LSIZE2; cn[a[0]].s2 = 0.0f; cn[a[1]].s2 = 0.0f; cn[a[2]].s2 = 0.0f; int xg = ( get_global_id(0) / ( LSIZE + 2 ) ); xg *= LSIZE; xg += xl; int yg = ( get_global_id(1) / ( LSIZE + 2 ) ); yg *= LSIZE; yg += yl; int b = ( (xg-1) & (SIZE-1) ) + ( (yg-1) & (SIZE-1) ) *SIZE; float3 DC; float rv; // // copy self // // below rv = 1.0f / ( 1.0f - ccv[ b + (SIZE-1)*SIZE2 ] ); // reduced volume cn[a[2]].s0 = cA[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s2 = cV[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s1 = rv - cn[a[2]].s0 - cn[a[2]].s2; // current rv = 1.0f / ( 1.0f - ccv ); // reduced volume cn[a[0]].s0 = cA * rv; cn[a[0]].s2 = cV * rv; cn[a[0]].s1 = rv - cn[a[0]].s0 - cn[a[0]].s2; for( int zl = 0; zl < SIZE; zl++ ) { // fetch next z-layer rv = 1.0f / ( 1.0f - ccv[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] ); // reduced volume cn[a[(zl+1)%3]].s0 = cA[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s2 = cV[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s1 = rv - cn[a[(zl+1)%3]].s0 - cn[a[(zl+1)%3]].s2; barrier(CLK_LOCAL_MEM_FENCE); // copy back chemical potential if( xl > 0 && xl < (LSIZE+1) && yl > 0 && yl < (LSIZE+1) ) { DC = ( cn[a[(zl+1)%3]] + cn[a[(zl+2)%3]] + // +2 = -1 cn[a[zl%3]+1] + cn[a[zl%3]-1] + cn[a[zl%3]+LSIZE+2] + cn[a[zl%3]-LSIZE-2] - 6.0f * cn[a[zl%3]] ) / ncd2; u[ b + zl*SIZE2 ] = zz * ( Eax*cn[a[zl%3]].s0 + Ebx*cn[a[zl%3]].s1 + Evx*cn[a[zl%3]].s2 ) + ddzn * ( Eax * DC.s0 + Ebx * DC.s1 + Evx * DC.s2 ) -zz * Exx * 0.5f - Ex; } } }

dschwen · ‎08-16-2010

Working kernel attached.

replace parameter declaration with

__kernel void mu3d( float3 Eax, float3 Ebx, float3 Evx, float3 Exx, float3 Ex, float zz, float ddzn, float ncd2,
   __global float *cA, __global float *cV, __global float *ccv,
   __global float3 *u,
   __local float3 *cn )

to cause compilation error. Define constants as:

-D LSIZE=8 -D LSIZE2=100 -D SIZE=32 -D SIZE2=1024 -D SIZE3=32768

__kernel void mu3d( float4 Eax, float4 Ebx, float4 Evx, float4 Exx, float4 Ex, float zz, float ddzn, float ncd2, __global float *cA, __global float *cV, __global float *ccv, __global float3 *u, __local float3 *cn ) { int xl = get_local_id(0); int yl = get_local_id(1); int a[3]; a[0] = xl + yl * (LSIZE+2); a[1] = a[0] + LSIZE2; a[2] = a[1] + LSIZE2; int xg = ( get_global_id(0) / ( LSIZE + 2 ) ); xg *= LSIZE; xg += xl; int yg = ( get_global_id(1) / ( LSIZE + 2 ) ); yg *= LSIZE; yg += yl; int b = ( (xg-1) & (SIZE-1) ) + ( (yg-1) & (SIZE-1) ) *SIZE; float3 DC; float rv; // // copy self // // below rv = 1.0f / ( 1.0f - ccv[ b + (SIZE-1)*SIZE2 ] ); // reduced volume cn[a[2]].s0 = cA[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s2 = cV[ b + (SIZE-1)*SIZE2 ] * rv; cn[a[2]].s1 = rv - cn[a[2]].s0 - cn[a[2]].s2; // current rv = 1.0f / ( 1.0f - ccv ); // reduced volume cn[a[0]].s0 = cA * rv; cn[a[0]].s2 = cV * rv; cn[a[0]].s1 = rv - cn[a[0]].s0 - cn[a[0]].s2; for( int zl = 0; zl < SIZE; zl++ ) { // fetch next z-layer rv = 1.0f / ( 1.0f - ccv[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] ); // reduced volume cn[a[(zl+1)%3]].s0 = cA[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s2 = cV[ b + ( (zl+1) & (SIZE-1) )*SIZE2 ] * rv; cn[a[(zl+1)%3]].s1 = rv - cn[a[(zl+1)%3]].s0 - cn[a[(zl+1)%3]].s2; barrier(CLK_LOCAL_MEM_FENCE); // copy back chemical potential if( xl > 0 && xl < (LSIZE+1) && yl > 0 && yl < (LSIZE+1) ) { DC = ( cn[a[(zl+1)%3]] + cn[a[(zl+2)%3]] + // +2 = -1 cn[a[zl%3]+1] + cn[a[zl%3]-1] + cn[a[zl%3]+LSIZE+2] + cn[a[zl%3]-LSIZE-2] - 6.0f * cn[a[zl%3]] ) / ncd2; u[ b + zl*SIZE2 ] = zz * ( Eax.xyz*cn[a[zl%3]].s0 + Ebx.xyz*cn[a[zl%3]].s1 + Evx.xyz*cn[a[zl%3]].s2 ) + ddzn * ( Eax.xyz*DC.s0 + Ebx.xyz*DC.s1 + Evx.xyz*DC.s2 ) -zz*Exx.xyz * 0.5f - Ex.xyz; } } }

MicahVillmow · ‎08-18-2010

dschwen,
This has been fixed internally but won't be public till our next release. The workaround is to not pass arguments by value with a 3 component data type.

Archives Discussions

clBuildProgram fails with std::length_error