cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

chevydevil
Adept II

Loop unrolling problem

clBuiltProgram does not built - without error message

Hello. I'm doing some performance optimzation for my mass-spring system kernels. One kernel has to sum up all the calculated forces and the normals for a specific masspoint. When i unroll this loop as followed the clBuiltProgram instruction takes forever for the gpu. For the CPU its not a problem at all. Is there a problem in my code?

__kernel void force_sum( __global vertex* _out, __global float4* _force_all, __global float4* _force_sum, __global float4* _normal, const float4 _store) { int id = get_global_id(0); unsigned int tid = get_local_id(0); unsigned int bid = get_group_id(0); float4 force = 0.0f; float4 normal = 0.0f; int _count = _store.w; // for(int i=0; i < 128; ++i) // { // test[tid] = _force_all[id+i*_count]; // } // barrier(CLK_LOCAL_MEM_FENCE); // for(int i=0; i < _store.y; ++i) // { if (_store.y == 64) { normal += _normal[id+0*_count]; normal += _normal[id+1*_count]; normal += _normal[id+2*_count]; normal += _normal[id+3*_count]; normal += _normal[id+4*_count]; normal += _normal[id+5*_count]; normal += _normal[id+6*_count]; normal += _normal[id+7*_count]; normal += _normal[id+8*_count]; normal += _normal[id+9*_count]; normal += _normal[id+10*_count]; normal += _normal[id+11*_count]; normal += _normal[id+12*_count]; normal += _normal[id+13*_count]; normal += _normal[id+14*_count]; normal += _normal[id+15*_count]; normal += _normal[id+16*_count]; normal += _normal[id+17*_count]; normal += _normal[id+18*_count]; normal += _normal[id+19*_count]; normal += _normal[id+20*_count]; normal += _normal[id+21*_count]; normal += _normal[id+22*_count]; normal += _normal[id+23*_count]; normal += _normal[id+24*_count]; normal += _normal[id+25*_count]; normal += _normal[id+26*_count]; normal += _normal[id+27*_count]; normal += _normal[id+28*_count]; normal += _normal[id+29*_count]; normal += _normal[id+30*_count]; normal += _normal[id+31*_count]; normal += _normal[id+32*_count]; normal += _normal[id+33*_count]; normal += _normal[id+34*_count]; normal += _normal[id+35*_count]; normal += _normal[id+36*_count]; normal += _normal[id+37*_count]; normal += _normal[id+38*_count]; normal += _normal[id+39*_count]; normal += _normal[id+40*_count]; normal += _normal[id+41*_count]; normal += _normal[id+42*_count]; normal += _normal[id+43*_count]; normal += _normal[id+44*_count]; normal += _normal[id+45*_count]; normal += _normal[id+46*_count]; normal += _normal[id+47*_count]; normal += _normal[id+48*_count]; normal += _normal[id+49*_count]; normal += _normal[id+50*_count]; normal += _normal[id+51*_count]; normal += _normal[id+52*_count]; normal += _normal[id+53*_count]; normal += _normal[id+54*_count]; normal += _normal[id+55*_count]; normal += _normal[id+56*_count]; normal += _normal[id+57*_count]; normal += _normal[id+58*_count]; normal += _normal[id+59*_count]; normal += _normal[id+60*_count]; normal += _normal[id+61*_count]; normal += _normal[id+62*_count]; normal += _normal[id+63*_count]; force += _force_all[id+0*_count]; force += _force_all[id+1*_count]; force += _force_all[id+2*_count]; force += _force_all[id+3*_count]; force += _force_all[id+4*_count]; force += _force_all[id+5*_count]; force += _force_all[id+6*_count]; force += _force_all[id+7*_count]; force += _force_all[id+8*_count]; force += _force_all[id+9*_count]; force += _force_all[id+10*_count]; force += _force_all[id+11*_count]; force += _force_all[id+12*_count]; force += _force_all[id+13*_count]; force += _force_all[id+14*_count]; force += _force_all[id+15*_count]; force += _force_all[id+16*_count]; force += _force_all[id+17*_count]; force += _force_all[id+18*_count]; force += _force_all[id+19*_count]; force += _force_all[id+20*_count]; force += _force_all[id+21*_count]; force += _force_all[id+22*_count]; force += _force_all[id+23*_count]; force += _force_all[id+24*_count]; force += _force_all[id+25*_count]; force += _force_all[id+26*_count]; force += _force_all[id+27*_count]; force += _force_all[id+28*_count]; force += _force_all[id+29*_count]; force += _force_all[id+30*_count]; force += _force_all[id+31*_count]; force += _force_all[id+32*_count]; force += _force_all[id+33*_count]; force += _force_all[id+34*_count]; force += _force_all[id+35*_count]; force += _force_all[id+36*_count]; force += _force_all[id+37*_count]; force += _force_all[id+38*_count]; force += _force_all[id+39*_count]; force += _force_all[id+40*_count]; force += _force_all[id+41*_count]; force += _force_all[id+42*_count]; force += _force_all[id+43*_count]; force += _force_all[id+44*_count]; force += _force_all[id+45*_count]; force += _force_all[id+46*_count]; force += _force_all[id+47*_count]; force += _force_all[id+48*_count]; force += _force_all[id+49*_count]; force += _force_all[id+50*_count]; force += _force_all[id+51*_count]; force += _force_all[id+52*_count]; force += _force_all[id+53*_count]; force += _force_all[id+54*_count]; force += _force_all[id+55*_count]; force += _force_all[id+56*_count]; force += _force_all[id+57*_count]; force += _force_all[id+58*_count]; force += _force_all[id+59*_count]; force += _force_all[id+60*_count]; force += _force_all[id+61*_count]; force += _force_all[id+62*_count]; force += _force_all[id+63*_count]; } else if (_store.y == 128) { normal += _normal[id+0*_count]; normal += _normal[id+1*_count]; normal += _normal[id+2*_count]; normal += _normal[id+3*_count]; normal += _normal[id+4*_count]; normal += _normal[id+5*_count]; normal += _normal[id+6*_count]; normal += _normal[id+7*_count]; normal += _normal[id+8*_count]; normal += _normal[id+9*_count]; normal += _normal[id+10*_count]; normal += _normal[id+11*_count]; normal += _normal[id+12*_count]; normal += _normal[id+13*_count]; normal += _normal[id+14*_count]; normal += _normal[id+15*_count]; normal += _normal[id+16*_count]; normal += _normal[id+17*_count]; normal += _normal[id+18*_count]; normal += _normal[id+19*_count]; normal += _normal[id+20*_count]; normal += _normal[id+21*_count]; normal += _normal[id+22*_count]; normal += _normal[id+23*_count]; normal += _normal[id+24*_count]; normal += _normal[id+25*_count]; normal += _normal[id+26*_count]; normal += _normal[id+27*_count]; normal += _normal[id+28*_count]; normal += _normal[id+29*_count]; normal += _normal[id+30*_count]; normal += _normal[id+31*_count]; normal += _normal[id+32*_count]; normal += _normal[id+33*_count]; normal += _normal[id+34*_count]; normal += _normal[id+35*_count]; normal += _normal[id+36*_count]; normal += _normal[id+37*_count]; normal += _normal[id+38*_count]; normal += _normal[id+39*_count]; normal += _normal[id+40*_count]; normal += _normal[id+41*_count]; normal += _normal[id+42*_count]; normal += _normal[id+43*_count]; normal += _normal[id+44*_count]; normal += _normal[id+45*_count]; normal += _normal[id+46*_count]; normal += _normal[id+47*_count]; normal += _normal[id+48*_count]; normal += _normal[id+49*_count]; normal += _normal[id+50*_count]; normal += _normal[id+51*_count]; normal += _normal[id+52*_count]; normal += _normal[id+53*_count]; normal += _normal[id+54*_count]; normal += _normal[id+55*_count]; normal += _normal[id+56*_count]; normal += _normal[id+57*_count]; normal += _normal[id+58*_count]; normal += _normal[id+59*_count]; normal += _normal[id+60*_count]; normal += _normal[id+61*_count]; normal += _normal[id+62*_count]; normal += _normal[id+63*_count]; normal += _normal[id+64*_count]; normal += _normal[id+65*_count]; normal += _normal[id+66*_count]; normal += _normal[id+67*_count]; normal += _normal[id+68*_count]; normal += _normal[id+69*_count]; normal += _normal[id+70*_count]; normal += _normal[id+71*_count]; normal += _normal[id+72*_count]; normal += _normal[id+73*_count]; normal += _normal[id+74*_count]; normal += _normal[id+75*_count]; normal += _normal[id+76*_count]; normal += _normal[id+77*_count]; normal += _normal[id+78*_count]; normal += _normal[id+79*_count]; normal += _normal[id+80*_count]; normal += _normal[id+81*_count]; normal += _normal[id+82*_count]; normal += _normal[id+83*_count]; normal += _normal[id+84*_count]; normal += _normal[id+85*_count]; normal += _normal[id+86*_count]; normal += _normal[id+87*_count]; normal += _normal[id+88*_count]; normal += _normal[id+89*_count]; normal += _normal[id+90*_count]; normal += _normal[id+91*_count]; normal += _normal[id+92*_count]; normal += _normal[id+93*_count]; normal += _normal[id+94*_count]; normal += _normal[id+95*_count]; normal += _normal[id+96*_count]; normal += _normal[id+97*_count]; normal += _normal[id+98*_count]; normal += _normal[id+99*_count]; normal += _normal[id+100*_count]; normal += _normal[id+101*_count]; normal += _normal[id+102*_count]; normal += _normal[id+103*_count]; normal += _normal[id+104*_count]; normal += _normal[id+105*_count]; normal += _normal[id+106*_count]; normal += _normal[id+107*_count]; normal += _normal[id+108*_count]; normal += _normal[id+109*_count]; normal += _normal[id+110*_count]; normal += _normal[id+111*_count]; normal += _normal[id+112*_count]; normal += _normal[id+113*_count]; normal += _normal[id+114*_count]; normal += _normal[id+115*_count]; normal += _normal[id+116*_count]; normal += _normal[id+117*_count]; normal += _normal[id+118*_count]; normal += _normal[id+119*_count]; normal += _normal[id+120*_count]; normal += _normal[id+121*_count]; normal += _normal[id+122*_count]; normal += _normal[id+123*_count]; normal += _normal[id+124*_count]; normal += _normal[id+125*_count]; normal += _normal[id+126*_count]; normal += _normal[id+127*_count]; force += _force_all[id+0*_count]; force += _force_all[id+1*_count]; force += _force_all[id+2*_count]; force += _force_all[id+3*_count]; force += _force_all[id+4*_count]; force += _force_all[id+5*_count]; force += _force_all[id+6*_count]; force += _force_all[id+7*_count]; force += _force_all[id+8*_count]; force += _force_all[id+9*_count]; force += _force_all[id+10*_count]; force += _force_all[id+11*_count]; force += _force_all[id+12*_count]; force += _force_all[id+13*_count]; force += _force_all[id+14*_count]; force += _force_all[id+15*_count]; force += _force_all[id+16*_count]; force += _force_all[id+17*_count]; force += _force_all[id+18*_count]; force += _force_all[id+19*_count]; force += _force_all[id+20*_count]; force += _force_all[id+21*_count]; force += _force_all[id+22*_count]; force += _force_all[id+23*_count]; force += _force_all[id+24*_count]; force += _force_all[id+25*_count]; force += _force_all[id+26*_count]; force += _force_all[id+27*_count]; force += _force_all[id+28*_count]; force += _force_all[id+29*_count]; force += _force_all[id+30*_count]; force += _force_all[id+31*_count]; force += _force_all[id+32*_count]; force += _force_all[id+33*_count]; force += _force_all[id+34*_count]; force += _force_all[id+35*_count]; force += _force_all[id+36*_count]; force += _force_all[id+37*_count]; force += _force_all[id+38*_count]; force += _force_all[id+39*_count]; force += _force_all[id+40*_count]; force += _force_all[id+41*_count]; force += _force_all[id+42*_count]; force += _force_all[id+43*_count]; force += _force_all[id+44*_count]; force += _force_all[id+45*_count]; force += _force_all[id+46*_count]; force += _force_all[id+47*_count]; force += _force_all[id+48*_count]; force += _force_all[id+49*_count]; force += _force_all[id+50*_count]; force += _force_all[id+51*_count]; force += _force_all[id+52*_count]; force += _force_all[id+53*_count]; force += _force_all[id+54*_count]; force += _force_all[id+55*_count]; force += _force_all[id+56*_count]; force += _force_all[id+57*_count]; force += _force_all[id+58*_count]; force += _force_all[id+59*_count]; force += _force_all[id+60*_count]; force += _force_all[id+61*_count]; force += _force_all[id+62*_count]; force += _force_all[id+63*_count]; force += _force_all[id+64*_count]; force += _force_all[id+65*_count]; force += _force_all[id+66*_count]; force += _force_all[id+67*_count]; force += _force_all[id+68*_count]; force += _force_all[id+69*_count]; force += _force_all[id+70*_count]; force += _force_all[id+71*_count]; force += _force_all[id+72*_count]; force += _force_all[id+73*_count]; force += _force_all[id+74*_count]; force += _force_all[id+75*_count]; force += _force_all[id+76*_count]; force += _force_all[id+77*_count]; force += _force_all[id+78*_count]; force += _force_all[id+79*_count]; force += _force_all[id+80*_count]; force += _force_all[id+81*_count]; force += _force_all[id+82*_count]; force += _force_all[id+83*_count]; force += _force_all[id+84*_count]; force += _force_all[id+85*_count]; force += _force_all[id+86*_count]; force += _force_all[id+87*_count]; force += _force_all[id+88*_count]; force += _force_all[id+89*_count]; force += _force_all[id+90*_count]; force += _force_all[id+91*_count]; force += _force_all[id+92*_count]; force += _force_all[id+93*_count]; force += _force_all[id+94*_count]; force += _force_all[id+95*_count]; force += _force_all[id+96*_count]; force += _force_all[id+97*_count]; force += _force_all[id+98*_count]; force += _force_all[id+99*_count]; force += _force_all[id+100*_count]; force += _force_all[id+101*_count]; force += _force_all[id+102*_count]; force += _force_all[id+103*_count]; force += _force_all[id+104*_count]; force += _force_all[id+105*_count]; force += _force_all[id+106*_count]; force += _force_all[id+107*_count]; force += _force_all[id+108*_count]; force += _force_all[id+109*_count]; force += _force_all[id+110*_count]; force += _force_all[id+111*_count]; force += _force_all[id+112*_count]; force += _force_all[id+113*_count]; force += _force_all[id+114*_count]; force += _force_all[id+115*_count]; force += _force_all[id+116*_count]; force += _force_all[id+117*_count]; force += _force_all[id+118*_count]; force += _force_all[id+119*_count]; force += _force_all[id+120*_count]; force += _force_all[id+121*_count]; force += _force_all[id+122*_count]; force += _force_all[id+123*_count]; force += _force_all[id+124*_count]; force += _force_all[id+125*_count]; force += _force_all[id+126*_count]; force += _force_all[id+127*_count]; } _force_sum[id] = force; normal = fast_normalize(normal); _out[id].nx = normal.x; _out[id].ny = normal.y; _out[id].nz = normal.z; }

0 Likes
5 Replies
karbous
Journeyman III

Perhaps it isn't the right forever loop cause, but I experienced problems with NVidia card running:

float4 normal = 0.0f;

Instead I had to use :

float4 normal = (float4) 0.0f;

Btw. I wouldn't use comparison for equality on float values because of precision problems. (I mean conditions like this:  if (_store.y == 64) )

Hope it helps a bit.

 

0 Likes

hi chevydevil,

Please send a testcase at streamdeveloper@amd.com.

Also mention the details regarding your CPU,GPU,SDK,Driver and OS.

0 Likes

I think there is no need for a testcase. I forgot to add the struct vertex to the code, so here again. This brings even the stream kernelAnalyser to "no reaction" state.

typedef struct { float x,y,z; float nx, ny, nz; } vertex; __kernel void force_sum( __global vertex* _out, __global float4* _force_all, __global float4* _force_sum, __global float4* _normal, const float4 _store) { int id = get_global_id(0); unsigned int tid = get_local_id(0); unsigned int bid = get_group_id(0); float4 force = 0.0f; float4 normal = 0.0f; int _count = _store.w; // for(int i=0; i < 128; ++i) // { // test[tid] = _force_all[id+i*_count]; // } // barrier(CLK_LOCAL_MEM_FENCE); // for(int i=0; i < _store.y; ++i) // { if (_store.y == 64) { normal += _normal[id+0*_count]; normal += _normal[id+1*_count]; normal += _normal[id+2*_count]; normal += _normal[id+3*_count]; normal += _normal[id+4*_count]; normal += _normal[id+5*_count]; normal += _normal[id+6*_count]; normal += _normal[id+7*_count]; normal += _normal[id+8*_count]; normal += _normal[id+9*_count]; normal += _normal[id+10*_count]; normal += _normal[id+11*_count]; normal += _normal[id+12*_count]; normal += _normal[id+13*_count]; normal += _normal[id+14*_count]; normal += _normal[id+15*_count]; normal += _normal[id+16*_count]; normal += _normal[id+17*_count]; normal += _normal[id+18*_count]; normal += _normal[id+19*_count]; normal += _normal[id+20*_count]; normal += _normal[id+21*_count]; normal += _normal[id+22*_count]; normal += _normal[id+23*_count]; normal += _normal[id+24*_count]; normal += _normal[id+25*_count]; normal += _normal[id+26*_count]; normal += _normal[id+27*_count]; normal += _normal[id+28*_count]; normal += _normal[id+29*_count]; normal += _normal[id+30*_count]; normal += _normal[id+31*_count]; normal += _normal[id+32*_count]; normal += _normal[id+33*_count]; normal += _normal[id+34*_count]; normal += _normal[id+35*_count]; normal += _normal[id+36*_count]; normal += _normal[id+37*_count]; normal += _normal[id+38*_count]; normal += _normal[id+39*_count]; normal += _normal[id+40*_count]; normal += _normal[id+41*_count]; normal += _normal[id+42*_count]; normal += _normal[id+43*_count]; normal += _normal[id+44*_count]; normal += _normal[id+45*_count]; normal += _normal[id+46*_count]; normal += _normal[id+47*_count]; normal += _normal[id+48*_count]; normal += _normal[id+49*_count]; normal += _normal[id+50*_count]; normal += _normal[id+51*_count]; normal += _normal[id+52*_count]; normal += _normal[id+53*_count]; normal += _normal[id+54*_count]; normal += _normal[id+55*_count]; normal += _normal[id+56*_count]; normal += _normal[id+57*_count]; normal += _normal[id+58*_count]; normal += _normal[id+59*_count]; normal += _normal[id+60*_count]; normal += _normal[id+61*_count]; normal += _normal[id+62*_count]; normal += _normal[id+63*_count]; force += _force_all[id+0*_count]; force += _force_all[id+1*_count]; force += _force_all[id+2*_count]; force += _force_all[id+3*_count]; force += _force_all[id+4*_count]; force += _force_all[id+5*_count]; force += _force_all[id+6*_count]; force += _force_all[id+7*_count]; force += _force_all[id+8*_count]; force += _force_all[id+9*_count]; force += _force_all[id+10*_count]; force += _force_all[id+11*_count]; force += _force_all[id+12*_count]; force += _force_all[id+13*_count]; force += _force_all[id+14*_count]; force += _force_all[id+15*_count]; force += _force_all[id+16*_count]; force += _force_all[id+17*_count]; force += _force_all[id+18*_count]; force += _force_all[id+19*_count]; force += _force_all[id+20*_count]; force += _force_all[id+21*_count]; force += _force_all[id+22*_count]; force += _force_all[id+23*_count]; force += _force_all[id+24*_count]; force += _force_all[id+25*_count]; force += _force_all[id+26*_count]; force += _force_all[id+27*_count]; force += _force_all[id+28*_count]; force += _force_all[id+29*_count]; force += _force_all[id+30*_count]; force += _force_all[id+31*_count]; force += _force_all[id+32*_count]; force += _force_all[id+33*_count]; force += _force_all[id+34*_count]; force += _force_all[id+35*_count]; force += _force_all[id+36*_count]; force += _force_all[id+37*_count]; force += _force_all[id+38*_count]; force += _force_all[id+39*_count]; force += _force_all[id+40*_count]; force += _force_all[id+41*_count]; force += _force_all[id+42*_count]; force += _force_all[id+43*_count]; force += _force_all[id+44*_count]; force += _force_all[id+45*_count]; force += _force_all[id+46*_count]; force += _force_all[id+47*_count]; force += _force_all[id+48*_count]; force += _force_all[id+49*_count]; force += _force_all[id+50*_count]; force += _force_all[id+51*_count]; force += _force_all[id+52*_count]; force += _force_all[id+53*_count]; force += _force_all[id+54*_count]; force += _force_all[id+55*_count]; force += _force_all[id+56*_count]; force += _force_all[id+57*_count]; force += _force_all[id+58*_count]; force += _force_all[id+59*_count]; force += _force_all[id+60*_count]; force += _force_all[id+61*_count]; force += _force_all[id+62*_count]; force += _force_all[id+63*_count]; } else if (_store.y == 128) { normal += _normal[id+0*_count]; normal += _normal[id+1*_count]; normal += _normal[id+2*_count]; normal += _normal[id+3*_count]; normal += _normal[id+4*_count]; normal += _normal[id+5*_count]; normal += _normal[id+6*_count]; normal += _normal[id+7*_count]; normal += _normal[id+8*_count]; normal += _normal[id+9*_count]; normal += _normal[id+10*_count]; normal += _normal[id+11*_count]; normal += _normal[id+12*_count]; normal += _normal[id+13*_count]; normal += _normal[id+14*_count]; normal += _normal[id+15*_count]; normal += _normal[id+16*_count]; normal += _normal[id+17*_count]; normal += _normal[id+18*_count]; normal += _normal[id+19*_count]; normal += _normal[id+20*_count]; normal += _normal[id+21*_count]; normal += _normal[id+22*_count]; normal += _normal[id+23*_count]; normal += _normal[id+24*_count]; normal += _normal[id+25*_count]; normal += _normal[id+26*_count]; normal += _normal[id+27*_count]; normal += _normal[id+28*_count]; normal += _normal[id+29*_count]; normal += _normal[id+30*_count]; normal += _normal[id+31*_count]; normal += _normal[id+32*_count]; normal += _normal[id+33*_count]; normal += _normal[id+34*_count]; normal += _normal[id+35*_count]; normal += _normal[id+36*_count]; normal += _normal[id+37*_count]; normal += _normal[id+38*_count]; normal += _normal[id+39*_count]; normal += _normal[id+40*_count]; normal += _normal[id+41*_count]; normal += _normal[id+42*_count]; normal += _normal[id+43*_count]; normal += _normal[id+44*_count]; normal += _normal[id+45*_count]; normal += _normal[id+46*_count]; normal += _normal[id+47*_count]; normal += _normal[id+48*_count]; normal += _normal[id+49*_count]; normal += _normal[id+50*_count]; normal += _normal[id+51*_count]; normal += _normal[id+52*_count]; normal += _normal[id+53*_count]; normal += _normal[id+54*_count]; normal += _normal[id+55*_count]; normal += _normal[id+56*_count]; normal += _normal[id+57*_count]; normal += _normal[id+58*_count]; normal += _normal[id+59*_count]; normal += _normal[id+60*_count]; normal += _normal[id+61*_count]; normal += _normal[id+62*_count]; normal += _normal[id+63*_count]; normal += _normal[id+64*_count]; normal += _normal[id+65*_count]; normal += _normal[id+66*_count]; normal += _normal[id+67*_count]; normal += _normal[id+68*_count]; normal += _normal[id+69*_count]; normal += _normal[id+70*_count]; normal += _normal[id+71*_count]; normal += _normal[id+72*_count]; normal += _normal[id+73*_count]; normal += _normal[id+74*_count]; normal += _normal[id+75*_count]; normal += _normal[id+76*_count]; normal += _normal[id+77*_count]; normal += _normal[id+78*_count]; normal += _normal[id+79*_count]; normal += _normal[id+80*_count]; normal += _normal[id+81*_count]; normal += _normal[id+82*_count]; normal += _normal[id+83*_count]; normal += _normal[id+84*_count]; normal += _normal[id+85*_count]; normal += _normal[id+86*_count]; normal += _normal[id+87*_count]; normal += _normal[id+88*_count]; normal += _normal[id+89*_count]; normal += _normal[id+90*_count]; normal += _normal[id+91*_count]; normal += _normal[id+92*_count]; normal += _normal[id+93*_count]; normal += _normal[id+94*_count]; normal += _normal[id+95*_count]; normal += _normal[id+96*_count]; normal += _normal[id+97*_count]; normal += _normal[id+98*_count]; normal += _normal[id+99*_count]; normal += _normal[id+100*_count]; normal += _normal[id+101*_count]; normal += _normal[id+102*_count]; normal += _normal[id+103*_count]; normal += _normal[id+104*_count]; normal += _normal[id+105*_count]; normal += _normal[id+106*_count]; normal += _normal[id+107*_count]; normal += _normal[id+108*_count]; normal += _normal[id+109*_count]; normal += _normal[id+110*_count]; normal += _normal[id+111*_count]; normal += _normal[id+112*_count]; normal += _normal[id+113*_count]; normal += _normal[id+114*_count]; normal += _normal[id+115*_count]; normal += _normal[id+116*_count]; normal += _normal[id+117*_count]; normal += _normal[id+118*_count]; normal += _normal[id+119*_count]; normal += _normal[id+120*_count]; normal += _normal[id+121*_count]; normal += _normal[id+122*_count]; normal += _normal[id+123*_count]; normal += _normal[id+124*_count]; normal += _normal[id+125*_count]; normal += _normal[id+126*_count]; normal += _normal[id+127*_count]; force += _force_all[id+0*_count]; force += _force_all[id+1*_count]; force += _force_all[id+2*_count]; force += _force_all[id+3*_count]; force += _force_all[id+4*_count]; force += _force_all[id+5*_count]; force += _force_all[id+6*_count]; force += _force_all[id+7*_count]; force += _force_all[id+8*_count]; force += _force_all[id+9*_count]; force += _force_all[id+10*_count]; force += _force_all[id+11*_count]; force += _force_all[id+12*_count]; force += _force_all[id+13*_count]; force += _force_all[id+14*_count]; force += _force_all[id+15*_count]; force += _force_all[id+16*_count]; force += _force_all[id+17*_count]; force += _force_all[id+18*_count]; force += _force_all[id+19*_count]; force += _force_all[id+20*_count]; force += _force_all[id+21*_count]; force += _force_all[id+22*_count]; force += _force_all[id+23*_count]; force += _force_all[id+24*_count]; force += _force_all[id+25*_count]; force += _force_all[id+26*_count]; force += _force_all[id+27*_count]; force += _force_all[id+28*_count]; force += _force_all[id+29*_count]; force += _force_all[id+30*_count]; force += _force_all[id+31*_count]; force += _force_all[id+32*_count]; force += _force_all[id+33*_count]; force += _force_all[id+34*_count]; force += _force_all[id+35*_count]; force += _force_all[id+36*_count]; force += _force_all[id+37*_count]; force += _force_all[id+38*_count]; force += _force_all[id+39*_count]; force += _force_all[id+40*_count]; force += _force_all[id+41*_count]; force += _force_all[id+42*_count]; force += _force_all[id+43*_count]; force += _force_all[id+44*_count]; force += _force_all[id+45*_count]; force += _force_all[id+46*_count]; force += _force_all[id+47*_count]; force += _force_all[id+48*_count]; force += _force_all[id+49*_count]; force += _force_all[id+50*_count]; force += _force_all[id+51*_count]; force += _force_all[id+52*_count]; force += _force_all[id+53*_count]; force += _force_all[id+54*_count]; force += _force_all[id+55*_count]; force += _force_all[id+56*_count]; force += _force_all[id+57*_count]; force += _force_all[id+58*_count]; force += _force_all[id+59*_count]; force += _force_all[id+60*_count]; force += _force_all[id+61*_count]; force += _force_all[id+62*_count]; force += _force_all[id+63*_count]; force += _force_all[id+64*_count]; force += _force_all[id+65*_count]; force += _force_all[id+66*_count]; force += _force_all[id+67*_count]; force += _force_all[id+68*_count]; force += _force_all[id+69*_count]; force += _force_all[id+70*_count]; force += _force_all[id+71*_count]; force += _force_all[id+72*_count]; force += _force_all[id+73*_count]; force += _force_all[id+74*_count]; force += _force_all[id+75*_count]; force += _force_all[id+76*_count]; force += _force_all[id+77*_count]; force += _force_all[id+78*_count]; force += _force_all[id+79*_count]; force += _force_all[id+80*_count]; force += _force_all[id+81*_count]; force += _force_all[id+82*_count]; force += _force_all[id+83*_count]; force += _force_all[id+84*_count]; force += _force_all[id+85*_count]; force += _force_all[id+86*_count]; force += _force_all[id+87*_count]; force += _force_all[id+88*_count]; force += _force_all[id+89*_count]; force += _force_all[id+90*_count]; force += _force_all[id+91*_count]; force += _force_all[id+92*_count]; force += _force_all[id+93*_count]; force += _force_all[id+94*_count]; force += _force_all[id+95*_count]; force += _force_all[id+96*_count]; force += _force_all[id+97*_count]; force += _force_all[id+98*_count]; force += _force_all[id+99*_count]; force += _force_all[id+100*_count]; force += _force_all[id+101*_count]; force += _force_all[id+102*_count]; force += _force_all[id+103*_count]; force += _force_all[id+104*_count]; force += _force_all[id+105*_count]; force += _force_all[id+106*_count]; force += _force_all[id+107*_count]; force += _force_all[id+108*_count]; force += _force_all[id+109*_count]; force += _force_all[id+110*_count]; force += _force_all[id+111*_count]; force += _force_all[id+112*_count]; force += _force_all[id+113*_count]; force += _force_all[id+114*_count]; force += _force_all[id+115*_count]; force += _force_all[id+116*_count]; force += _force_all[id+117*_count]; force += _force_all[id+118*_count]; force += _force_all[id+119*_count]; force += _force_all[id+120*_count]; force += _force_all[id+121*_count]; force += _force_all[id+122*_count]; force += _force_all[id+123*_count]; force += _force_all[id+124*_count]; force += _force_all[id+125*_count]; force += _force_all[id+126*_count]; force += _force_all[id+127*_count]; } _force_sum[id] = force; normal = fast_normalize(normal); _out[id].nx = normal.x; _out[id].ny = normal.y; _out[id].nz = normal.z; }

0 Likes

chevydevil,

I checked your code with the internal SDK. With SKA i am able to compile the code and GPU statistics are generated.

Are you able to compile it on your system in SKA?

I will try to write the host code myself to actually run it on my GPU , but it can save me some efforts if you can provide it.

0 Likes

chevydevil,

I tried the code and the code builds well in the internal SDK.

Thank you for reporting.

0 Likes