AnsweredAssumed Answered

OpenCL kernel bug

Question asked by zoli0726 on Sep 8, 2015

Hello Devgurus!

 

I have a strange bug in my kernel, and i dont know why its happening.

 

The kernel is quite complex.

If i use the caching method in bold under then the results not getting written back to the global memory. However if i dont use cohesive_element current_beam and simply use Beamlist[beam_index] everywhere, the result are correct and getting written.

 

Its happening on Hawaii with the latest driver 15.7.

 

Please help me with this.

 

Thanks.

 

__kernel void beam_force_and_torque(__global Particle* Particles, __global cohesive_element* Beamlist, double gammab, double dt, __global double* platefZ, __global int* neigh_index) {

 

        __local cohesive_element current_beam[64];

        int i, c0, c1;

        double length, xx, yy, zz;

        double beamelo, vreln, magnit, fn, dissip;

        double ffx, ffy, ffz;

        double angle_torsion, omega_torsion;

        double omX0, omX1;

        double angleY0, angleY1, angleZ0, angleZ1, omY0, omZ0, omY1, omZ1;

        double Ty, Tz, Qy, Qz;

        double energy;

 

 

        vec3d e, dv, e0, e1;

        vec3d t0x, t0y, t0z;

        vec3d t0;

        vec3d shr0yS, shr0zS;

        vec3d om0, om1;

 

        int part_index = get_global_id(0);

        int lindex = get_local_id(0);

        Particle current_particle = Particles[part_index];

 

        platefZ[part_index] = 0;

 

        for(i = 0; i < current_particle.NumNb; i++) {

                if (Beamlist[neigh_index[part_index] + i].exist) {

                int beam_index = neigh_index[part_index] + i;

               current_beam[lindex] = Beamlist[beam_index];

 

                        ffx = 0.0;

                        ffy = 0.0;

                        ffz = 0.0;

 

                        c0 = current_beam[lindex].end[0];

                        c1 = current_beam[lindex].end[1];

                        e.x = Particles[c1].rx - current_particle.rx;

                        e.y = Particles[c1].ry - current_particle.ry;

                        e.z = Particles[c1].rz - current_particle.rz;

                        length = vec3d_length(e);

                        beamelo = length - current_beam[lindex].length0;

                        current_beam[lindex].elong = beamelo;

                        e.x = e.x / length;

                        e.y = e.y / length;

                        e.z = e.z / length;

 

                        dv.x = Particles[c1].rx1 - current_particle.rx1;

                        dv.y = Particles[c1].ry1 - current_particle.ry1;

                        dv.z = Particles[c1].rz1 - current_particle.rz1;

 

                        vreln = scalar_prod(dv, e);

                        fn = beamelo * current_beam[lindex].longit;

                        dissip = gammab*vreln;

 

                        ffx = fn * e.x + dissip * e.x;

                        ffy = fn * e.y + dissip * e.y;

                        ffz = fn * e.z + dissip * e.z;

 

                        current_particle.fx += ffx;

                        current_particle.fy += ffy;

                        current_particle.fz += ffz;

 

 

                        om0.x = current_particle.ox;

                        om0.y = current_particle.oy;

                        om0.z = current_particle.oz;

 

                        om1.x = Particles[c1].ox;

                        om1.y = Particles[c1].oy;

                        om1.z = Particles[c1].oz;

 

                        omX0 = scalar_prod(om0, current_beam[lindex].b0x);

                        omX1 = scalar_prod(om1, current_beam[lindex].b0x);

                        omega_torsion = omX0 - omX1;

 

                        angle_torsion = current_beam[lindex].angle_Tor + omega_torsion*dt;

                        current_beam[lindex].angle_Tor = angle_torsion;

 

                        magnit = current_beam[lindex].torsion * (-angle_torsion - gammab * omega_torsion);

                        t0x = scalar_prod_vec3d(magnit, current_beam[lindex].b0x);

 

                        magnit = -magnit;

 

                        e0 = rotate_vector_3d(e, current_particle.ROTA);

 

                        xx = scalar_prod(e0, current_beam[lindex].b0x);

                        yy = scalar_prod(e0, current_beam[lindex].b0y);

                        zz = scalar_prod(e0, current_beam[lindex].b0z);

 

                        angleY0 =  -atan2(zz,xx);

                        angleZ0 =  -atan2(yy,xx);

 

                        current_beam[lindex].Theta0 = acos(xx);

 

                        omY0 = (angleY0 - current_beam[lindex].angle0_Y) / dt;

                        omZ0 = (angleZ0 - current_beam[lindex].angle0_Z) / dt;

                        current_beam[lindex].angle0_Y = angleY0;

                        current_beam[lindex].angle0_Z = angleZ0;

 

                        e1 = rotate_vector_3d(e, Particles[c1].ROTA);

                        xx = scalar_prod(e1, current_beam[lindex].b0x);

                        yy = scalar_prod(e1, current_beam[lindex].b0y);

                        zz = scalar_prod(e1, current_beam[lindex].b0z);

 

                        angleY1 =  -atan2(zz,xx);

                        angleZ1 =  -atan2(yy,xx);

 

                        current_beam[lindex].Theta1 = acos(xx);

 

                        omY1 = (angleY1 - current_beam[lindex].angle1_Y) / dt;

                        omZ1 = (angleZ1 - current_beam[lindex].angle1_Z) / dt;

 

                        current_beam[lindex].angle1_Y = angleY1;

                        current_beam[lindex].angle1_Z = angleZ1;

 

                        Qz = 3.0 * (current_beam[lindex].bending * (angleZ1 + angleZ0) + gammab * (omZ1 + omZ0)) / current_beam[lindex].length0;

                        Tz = current_beam[lindex].bending * (angleZ1 - angleZ0) + gammab * (omZ1 - omZ0);

                        

                        Qy = 3.0 * (current_beam[lindex].bending * (angleY1 + angleY0) + gammab * (omY1 + omY0)) / current_beam[lindex].length0;

                        Ty = current_beam[lindex].bending * (angleY1 - angleY0) + gammab * (omY1 - omY0);

 

                        magnit = Ty - Qy*length;

                        t0y = scalar_prod_vec3d(magnit, current_beam[lindex].b0y);

 

                        magnit = -(Ty + Qy * length);

 

                        magnit = Tz - Qz*length;

                        t0z = scalar_prod_vec3d(magnit, current_beam[lindex].b0z);

 

                        magnit = -(Tz + Qz * length);

 

                        shr0yS = scalar_prod_vec3d(-Qy, current_beam[lindex].b0z);

                        shr0zS = scalar_prod_vec3d(-Qz, current_beam[lindex].b0y);

 

                        shr0yS = invrot_vector_3d(shr0yS, current_particle.ROTA);

                        shr0zS = invrot_vector_3d(shr0zS, current_particle.ROTA);

 

                        t0.x = t0x.x - t0y.x + t0z.x;

                        t0.y = t0x.y - t0y.y + t0z.y;

                        t0.z = t0x.z - t0y.z + t0z.z;

 

                        current_particle.tx += t0.x;

                        current_particle.ty += t0.y;

                        current_particle.tz += t0.z;

 

                        current_particle.fx = current_particle.fx + shr0yS.x + shr0zS.x;

                        current_particle.fy = current_particle.fy + shr0yS.y + shr0zS.y;

                        current_particle.fz = current_particle.fz + shr0yS.z + shr0zS.z;

 

                        energy = 0.5 * fn * fn / current_beam[lindex].longit +

                                0.5 * current_beam[lindex].torsion * angle_torsion * angle_torsion +

                                current_beam[lindex].bending * (angleY0 * angleY0 + angleY1 * angleY1 + angleY0 * angleY1 +

                                angleZ0 * angleZ0 + angleZ1 * angleZ1 + angleZ0 * angleZ1);

 

                        current_beam[lindex].Energy = energy;

                        double test1 = ffz + shr0yS.z + shr0zS.z;

 

                        if (current_particle.PState == 1 && Particles[c1].PState == 0) {

                                platefZ[part_index] += test1;

                        }      

 

                       Beamlist[beam_index] = current_beam[lindex];

                }

        }

        Particles[part_index] = current_particle;                      

}

 

Az üzenetet szerkesztette: Zoltan Janosi

Outcomes