I am trying to testout how well atomicity performs on APU. But my sample program does not update the variable properly hence whole system hangs as I check for updated value at either side (cpu and gpu) in while loop and it will continue to loop until other side updates the variable.
HOST program:
int *data =(int*)clSVMAlloc(context, CL_MEM_READ_WRITE|CL_MEM_SVM_FINE_GRAIN_BUFFER|CL_MEM_SVM_ATOMICS , sizeof(int)*256,0);
int t = 2,ind;
for(ind = 0; ind < 256; ind++) {
atomic_store(&data[ind], 0);
}
status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, indexSpaceSize, workGroupSize, 0, NULL, &kernelExeEvent);
atomic_store(p, 1);
while(t--) {
printf("value of t = %d\n", t);
for(ind = 0; ind < 255; ind++) {
printf("first load %d\n", atomic_load(&data[ind]));
while(atomic_load(&data[ind]) != 1);
atomic_store(&data[ind], 0);
printf("value of ind = %d, %d\n", t, ind);
}
}
clWaitForEvents(1, &kernelExeEvent);
GPU kernel:
__kernel void simple(volatile __global atomic_int *A, __global atomic_int *p) {
int in = 0, index = 0;
while(in || atomic_load(p) == 1) {
in = 1;
while(atomic_load_explicit(&A[get_global_id(0)], memory_scope_work_group, memory_scope_all_svm_devices) !=0);
atomic_store_explicit(&A[get_global_id(0)], 1, memory_order_seq_cst, memory_scope_all_svm_devices);
index++;
if(index > 1)
break;
}
}
By the way, I made another change that I forget to mention. It was the main reason for failing. In the original code, the host-side was not updating all the elements. Please modify the code as shown below:
Thanks.