2 Replies Latest reply on Mar 16, 2010 4:02 PM by grafixel

    cl compiler crash

    grafixel
      compiler crashes on code dereferencing a struct

      Hi

      I wrote a kernel (attached) using several helper functions which crashes the compiler some cases. Identified the problematic code

      LCG(&(s->z4), &(s->z5), 1013904223UL)        //crashes opencl compiler

      LCG(&(s->z4), &A, 1013904223UL)  // in contrast doesn't crash

      LCG is just a function with signature (unsigned *state, const unsigned *m, const unsigned s)

      When i remove the __kernel function completely and compile only the called functions, the compiler doesn't crash even when using those offending lines.

      Is it a compiler bug, or am i doing sth illegal?

      Thanks

       

      #define ISCALL 1.0f #define ISPUT -1.0f #ifndef PI #define PI 3.1415925358979 #endif #define SEED 190673 struct Option { float Strike; float T; float r; float q; float isCall; }; unsigned Tausw(unsigned *z, int S1, int S2, int S3, unsigned M) { unsigned b=(((*z << S1) ^ *z) >> S2); return *z = (((*z & M) << S3) ^ b); } unsigned LCG(unsigned *state, const unsigned *m, const unsigned s) { return *state=(*m*(*state)+s)%4294967296UL; //%4294967296 is a precaution, probably not necessary on 32 bit system } struct state { unsigned z1, z2, z3, z4, z5; }; long power32(const long x, const unsigned long n, long mod) { if(mod==0) mod=4294967295UL; if(n==0) return 1; if(n==1) { if(x>0) return x;// %mod; return x; } long result= x*power32(x,n-1,mod); result %= 4294967295UL; return result; } void init_state(struct state *s, int index, unsigned seed) { s->z4=seed; unsigned A=1664525; //long A2// s->z5=(power32(A,index+1,4294967295UL)) ;//%4294967296UL; for(int i=0;i<index;i++) { LCG(&(s->z4), &A, 1013904223UL); //maybe 1 run with A replaced by s->A would be enough }; LCG(&(s->z4), &(s->z5), 1013904223UL); //crashes opencl compiler ! s->z1=s->z4; s->z2=s->z4; s->z3=s->z4; } float HybridTaus(struct state* s) { unsigned A = 1664525; // Combined period is lcm(p1,p2,p3,p4)~ 2^121 // need to think about cast to single precision float return 2.3283064365387e-10 * ( // Periods Tausw(&(s->z1), 13, 19, 12, 4294967294UL) ^ // p1=2^31-1 Tausw(&(s->z2), 2, 25, 4, 4294967288UL) ^ // p2=2^30-1 Tausw(&(s->z3), 3, 11, 17, 4294967280UL) ^ // p3=2^28-1 LCG(&(s->z4), &(s->z5), 1013904223UL) //crashes opecl compiler //LCG(&(s->z4), &A, 1013904223UL) // p4=2^32 ); } float2 BoxMuller(struct state* s) { float u0=HybridTaus (s), u1=HybridTaus (s); float r=sqrt(-2 *log(u0)); float theta=2*PI*u1; float2 result; result.x = r*sin(theta); result.y = r*cos(theta); return result; } float evalOptionAtForward(struct Option *o, float ST) { if((o->isCall)>0.0) { return exp(-o->r*o->T)*max(ST-(o->Strike),0.0); } if((o->isCall)<0.0) { return exp(-o->r*o->T)*max((o->Strike)-ST,0.0); } return -1.0f; } __kernel void calcBSMC(__global float* result, __global float* ArgBuffer, unsigned int runs) { __private struct Option theOption; float S0=ArgBuffer[0]; theOption.Strike=ArgBuffer[1]; theOption.T=ArgBuffer[2]; theOption.r=ArgBuffer[3]; theOption.q=ArgBuffer[4]; float vola=ArgBuffer[5]; theOption.isCall=ArgBuffer[5]; __private struct state theState; //one RNG per thread const unsigned int gid = get_global_id(0); init_state(&theState, gid+2*runs, SEED); //we do 2 steps per run float dt = theOption.T/runs; dt *= 0.5; // we do 2 steps per run float sqrtdt = sqrt(dt); float b=theOption.r-theOption.q; float2 dW1=0.0f; float S=S0; float dWT=0; /* //brute force...: for(int i=0;i<runs;i++) { dW1 = BoxMuller(&theState); x = dt*b+sqrtdt*vola*dW1.x; //~N(b dt, vola *sqrt(dt)) S += S*x; x = dt*b+sqrtdt*vola*dW1.y; //~N(b dt, vola *sqrt(dt)) S += S*x; } */ //less brute force: for(int i=0;i<runs;i++) { dW1 = BoxMuller(&theState); dWT += sqrtdt*dW1.x; //~N(b dt, vola *sqrt(dt)) dWT += sqrtdt*dW1.y; //~N(b dt, vola *sqrt(dt)) } S *= exp((b-vola*vola*0.5)*theOption.T+vola*dWT); result[gid]=evalOptionAtForward(&theOption,S); barrier(CLK_GLOBAL_MEM_FENCE); }