AnsweredAssumed Answered

Possible problem with atomic_cmpxchg?

Question asked by arkanet on Jun 1, 2013
Latest reply on Jun 6, 2013 by arkanet

Hi guys,

 

I've been trying to write a kernel which multiplies a row vector into a CSR format stored matrix, but it gives a different result each time it runs. The GPU is AMD Radeon™ HD 7970 Graphics.

 

#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable

#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable

#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable

#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable

__kernel void MatMulTF(__global float *A, __global int *row, __global int *col, __global float *Rs, volatile __global float *As) {   

    int gid=get_global_id(0);

    int j=row[gid];   

    int a1=row[gid+1];

    float Rsgid=Rs[gid];

    float rp;

    int colj;

    volatile __global unsigned int *pAs;

 

    union {

        unsigned int intVal;

        float floatVal;

    } newVal, prevVal;

   

    for (;j<a1; j++) {

        rp=A[j]*Rsgid;

        colj=col[j];

        pAs=(volatile __global unsigned int *) (&As[colj]);

        do {

            prevVal.floatVal = As[colj];

            newVal.floatVal = prevVal.floatVal + rp;

        } while (atomic_cmpxchg(pAs, prevVal.intVal, newVal.intVal) != prevVal.intVal);       

    }   

}

 

 

Can anybody help me? thanks.

Outcomes