Hi,
I am trying to make a simple tutorial on OpenCL 2.0 SVM, and I choose vector addition to illustrate two features:
A. Coarse-grain buffer
B. Fine-grain buffer
The APU I used is AMD A10-7850K (Kaveri), and I have tested FineGrainSVM and SVMBinaryTreeSearch in AMD APP SDK 3.0 successfully.
The followings is the main part of my code:
=============================================================
// OpenCL kernel. Each work item takes care of one element of c
const char *kernelSource = "\n" \
"__kernel void vecAdd( __global double *a, \n" \
" __global double *b, \n" \
" __global double *c, \n" \
" const unsigned int n) \n" \
"{ \n" \
" //Get our global thread ID \n" \
" int id = get_global_id(0); \n" \
" \n" \
" //Make sure we do not go out of bounds \n" \
" if (id < n) \n" \
" c[id] = a[id] + b[id]; \n" \
"} \n" \
"\n" ;
int main( int argc, char* argv[] )
{
// Length of vectors
unsigned int n = 100000;
...
// Size, in bytes, of each vector
size_t bytes = n*sizeof(double);
...
// Bind to platform
err = clGetPlatformIDs(1, &cpPlatform, NULL);
// Get ID for the device
err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
// Create a context
cl_context_properties cps[3] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)cpPlatform,
0
};
//context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
// Create a command queue
queue = clCreateCommandQueueWithProperties(context, device_id, 0, &err);
// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1,
(const char **) & kernelSource, NULL, &err);
// Build the program executable
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
// Create the compute kernel in the program we wish to run
kernel = clCreateKernel(program, "vecAdd", &err);
double* h_a = (double*)clSVMAlloc(context, CL_MEM_READ_WRITE, bytes, 0);
double* h_b = (double*)clSVMAlloc(context, CL_MEM_READ_WRITE, bytes, 0);
double* h_c = (double*)clSVMAlloc(context, CL_MEM_READ_WRITE, bytes, 0);
clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, h_a, bytes, 0, NULL, NULL);
clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, h_b, bytes, 0, NULL, NULL);
clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, h_c, bytes, 0, NULL, NULL);
// Initialize vectors on host
int i;
for( i = 0; i < n; i++ )
{
h_a = sinf(i)*sinf(i);
h_b = cosf(i)*cosf(i);
h_c = 0;
}
clEnqueueSVMUnmap(queue, h_a, 0, NULL, NULL);
clEnqueueSVMUnmap(queue, h_b, 0, NULL, NULL);
clEnqueueSVMUnmap(queue, h_c, 0, NULL, NULL);
// Set the arguments to our compute kernel
err = clSetKernelArgSVMPointer(kernel, 0, (double*)h_a);
err |= clSetKernelArgSVMPointer(kernel, 1, (double*)h_b);
err |= clSetKernelArgSVMPointer(kernel, 2, (double*)h_c);
err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);
// Execute the kernel over the entire range of the data set
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,
0, NULL, NULL);
// Wait for the command queue to get serviced before reading back results
clFlush(queue);
clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, h_c, bytes, 0, NULL, NULL);
//Sum up vector c and print result divided by n, this should equal 1 within error
double sum = 0;
for(i=0; i<n; i++)
sum += h_c;
printf("final result: %f\n", sum/n);
clEnqueueSVMUnmap(queue, h_c, 0, NULL, NULL);
clSVMFree(context, h_a);
clSVMFree(context, h_b);
clSVMFree(context, h_c);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
=============================================================
Everything goes well when I check the return value of each CL runtime API, however it prints Segmentation fault (core dumped) and does not show "final result: 1.00000"
Could someone guide me how to fix it?
If need, I can provide more details about the code.
Many thanks in advance!
Patrick Lin
Solved! Go to Solution.
In order to use OpenCL 2.0 features like SVM, you need to build/compile your kernel code for OpenCL 2.0 using build option "-cl-std=CL2.0" (clBuildProgram). Also the application/program should be built as x64 version.
Regards,
In order to use OpenCL 2.0 features like SVM, you need to build/compile your kernel code for OpenCL 2.0 using build option "-cl-std=CL2.0" (clBuildProgram). Also the application/program should be built as x64 version.
Regards,