AnsweredAssumed Answered

I am trying to testout how well atomicity performs on APU. But my sample program hangs the system

Question asked by avinashkrc on Apr 10, 2019
Latest reply on Apr 15, 2019 by dipak

I am trying to testout how well atomicity performs on APU. But my sample program does not update the variable properly hence whole system hangs as I check for updated value at either side (cpu and gpu)  in while loop and it will continue to loop until other side updates the variable.

 

HOST program:

 int *data =(int*)clSVMAlloc(context, CL_MEM_READ_WRITE|CL_MEM_SVM_FINE_GRAIN_BUFFER|CL_MEM_SVM_ATOMICS , sizeof(int)*256,0);

int t = 2,ind;
        for(ind = 0; ind < 256; ind++) {
               atomic_store(&data[ind], 0);
        }

 status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, indexSpaceSize, workGroupSize, 0, NULL, &kernelExeEvent);

atomic_store(p, 1);
    while(t--) {
        printf("value of t = %d\n", t);
        for(ind = 0; ind < 255; ind++) {
                printf("first load %d\n", atomic_load(&data[ind]));
               while(atomic_load(&data[ind]) != 1);
               atomic_store(&data[ind], 0);
                printf("value of ind = %d, %d\n", t, ind);
        }
    }
    clWaitForEvents(1, &kernelExeEvent);

 

 

GPU kernel:

 

__kernel void simple(volatile __global atomic_int *A, __global atomic_int *p) {
    int in = 0, index = 0;
    while(in || atomic_load(p) == 1) {
        in = 1;
        while(atomic_load_explicit(&A[get_global_id(0)], memory_scope_work_group, memory_scope_all_svm_devices) !=0);
        atomic_store_explicit(&A[get_global_id(0)], 1, memory_order_seq_cst, memory_scope_all_svm_devices);
        index++;
        if(index > 1)
            break;
    }
}

Attachments

Outcomes