I am trying to test the new OpenCl 2.0 SVM features. I am using AMD-APP-SDK on a machine that has the following SVM capabilities:
I am testing with a very simple example in which I do the following (after setting up OpenCL objects of course):On the host side:
Here is the relevant host code:
int status;
cl_int cl_status;
int num_elements = 10;
status = SetupKernel("test_svm_kernel.cl", "test_svm_kernel");
CHECK_ERROR(status, "SetupKernel");
svm_input = clSVMAlloc(context, CL_MEM_READ_WRITE, num_elements*sizeof(int),
0);
CHECK_ALLOCATION(svm_input, "svm_input");
cl_status = clEnqueueSVMMap(queue, CL_TRUE,
CL_MAP_WRITE_INVALIDATE_REGION,
svm_input, num_elements*sizeof(int), 0, NULL,
NULL);
CHECK_OPENCL_ERROR(cl_status, "clEnqueueSVMMap");
for(int i=0 ; i<num_elements ; i++)
{
((int*)svm_input) = i;
}
for(int i=0 ; i<num_elements ; i++)
{
std::cout << "Host: input[" << i << "] = " << ((int*)svm_input) << std::endl;
}
cl_status = clEnqueueSVMUnmap(queue, svm_input, 0, NULL, NULL);
CHECK_OPENCL_ERROR(cl_status, "clEnqueueSVMUnmap");
cl_status = clGetKernelWorkGroupInfo(
kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
&kernel_wrkgrp_size, NULL);
CHECK_OPENCL_ERROR(cl_status, "clGetKernelWorkGroupInfo");
cl_status = clGetKernelWorkGroupInfo(
kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong),
&compile_wrkgrp_size, NULL);
CHECK_OPENCL_ERROR(cl_status, "clGetKernelWorkGroupInfo");
cl_status = clGetKernelWorkGroupInfo(
kernel, device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
sizeof(size_t)*3, &compile_wrkgrp_size, NULL);
CHECK_OPENCL_ERROR(cl_status, "clGetKernelWorkGroupInfo");
size_t local_threads = 1;//kernel_wrkgrp_size;
size_t globl_threads = num_elements;
cl_status = clSetKernelArgSVMPointer(kernel, 0, (void*)(svm_input));
CHECK_OPENCL_ERROR(cl_status, "clSetKernelArgSVMPointer");
cl_event ndr_event;
cl_status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
&globl_threads, NULL,
0, NULL, &ndr_event);
CHECK_OPENCL_ERROR(cl_status, "clEnqueueNDRangeKernel");
cl_status = clFlush(queue);
CHECK_OPENCL_ERROR(cl_status, "clFlush");
On the kernel side: the kernel is really simple.
__kernel void test_svm_kernel(__global void* input)
{
__global int* intInput = (__global int*)(input);
int idx = get_global_id(0);
printf("input[%d] = %d\n", idx, intInput[idx]);
}
The output that I get in the host is:
Host: input[0] = 0
Host: input[1] = 1
Host: input[2] = 2
Host: input[3] = 3
Host: input[4] = 4
Host: input[5] = 5
Host: input[6] = 6
Host: input[7] = 7
Host: input[8] = 8
Host: input[9] = 9
which is the natural expected output.
On the kernel, I get strange output (and it changes sometimes):
input[0] = 0
input[1] = 2
input[3] = 1
input[5] = 5
input[1] = 7
input[8] = 1
input[0] = 0
input[0] = 0
input[0] = 0
input[0] = 0
I don't expect printf's on the device to be in order. However, at least to print array in a correct manner.
Any ideas how do I get such strange output?
It seems a printf issue. Similar printf issues for OpenCL 2.0 kernel were reported earlier and a bug report against it has been already filed.
To verify SVM is working fine, just modify the array values inside the kernel, again read it from host and check the values. For example,