I've written a simple vector add program using SVM in APPSDK 3.0, but it reports segement falt. the following is the code, i kown it is long. but it's a simple opencl program, I've try my best to figure out what's wrong, but no help
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
// OpenCL includes
#include <CL/cl.h>
// OpenCL kernel to perform an element-wise
// add of two arrays
const char* programSource =
"__kernel \n"
"void vecadd(__global int *A, \n"
" __global int *B, \n"
" __global int *C) \n"
"{ \n"
" \n"
" // Get the work-item’s unique ID \n"
" int idx = get_global_id(0); \n"
" \n"
" // Add the corresponding locations of \n"
" // 'A' and 'B', and store the result in 'C'. \n"
" C[idx] = A[idx] + B[idx]; \n"
"} \n"
;
int main() {
// Elements in each array
const int elements = 2048;
// Compute the size of the data
size_t datasize = sizeof(int)*elements;
// Use this to check the output of each API call
cl_int status;
// Discover and initialize the platforms
cl_uint numPlatforms = 0;
cl_platform_id *platforms = NULL;
// Use clGetPlatformIDs() to retrieve the number of
// platforms
status = clGetPlatformIDs(0, NULL, &numPlatforms);
// Allocate enough space for each platform
platforms =
(cl_platform_id*)malloc(
numPlatforms*sizeof(cl_platform_id));
// Fill in platforms with clGetPlatformIDs()
status = clGetPlatformIDs(numPlatforms, platforms,
NULL);
// STEP 2: Discover and initialize the devices
cl_uint numDevices = 0;
cl_device_id *devices = NULL;
// Use clGetDeviceIDs() to retrieve the number of
// devices present
status = clGetDeviceIDs(
platforms[0],
CL_DEVICE_TYPE_ALL,
0,
NULL,
&numDevices);
// Allocate enough space for each device
devices =
(cl_device_id*)malloc(
numDevices*sizeof(cl_device_id));
// Fill in devices with clGetDeviceIDs()
status = clGetDeviceIDs(
platforms[0],
CL_DEVICE_TYPE_ALL,
numDevices,
devices,
NULL);
// Create a context
cl_context context = NULL;
// Create a context using clCreateContext() and
// associate it with the devices
context = clCreateContext(
NULL,
numDevices,
devices,
NULL,
NULL,
&status);
// Create a command queue
cl_command_queue cmdQueue;
// Create a command queue using clCreateCommandQueue(),
// and associate it with the device you want to execute
// on
cl_queue_properties prop[] = {0};
cmdQueue = clCreateCommandQueueWithProperties(
context,
devices[0],
prop,
&status);
// Create SVM buffers
void *bufferA = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
void *bufferB = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
void *bufferC = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
if (bufferA == NULL || bufferB==NULL || bufferC==NULL) {
fprintf(stderr, "can't create SVM buffers\n");
exit(-1);
}
/* initialize bufferA and bufferB */
status = clEnqueueSVMMap(cmdQueue,
CL_TRUE, //blocking call
CL_MAP_WRITE_INVALIDATE_REGION,
bufferA,
datasize,
0,
NULL,
NULL);
int *A = (int *)(bufferA);
for (int i = 0; i < elements; i++) {
A = i;
}
status = clEnqueueSVMUnmap(cmdQueue, bufferA, 0, NULL, NULL);
status = clEnqueueSVMMap(cmdQueue,
CL_TRUE, //blocking call
CL_MAP_WRITE_INVALIDATE_REGION,
bufferB,
datasize,
0,
NULL,
NULL);
int *B = (int *)(bufferB);
for (int i = 0; i < elements; i++) {
B = i;
}
status = clEnqueueSVMUnmap(cmdQueue, bufferB, 0, NULL, NULL);
// Create and compile the program
cl_program program = clCreateProgramWithSource(
context,
1,
(const char**)&programSource,
NULL,
&status);
// Build (compile) the program for the devices with
// clBuildProgram()
status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
// Create the kernel
cl_kernel kernel = clCreateKernel(program, "vecadd", &status);
// Set the kernel arguments
status = clSetKernelArgSVMPointer(kernel, 0, (void *)(bufferA));
status |= clSetKernelArgSVMPointer(kernel, 1, (void *)(bufferB));
status |= clSetKernelArgSVMPointer(kernel, 2, (void *)(bufferC));
size_t globalWorkSize[1];
// There are 'elements' work-items
globalWorkSize[0] = elements;
status = clEnqueueNDRangeKernel(
cmdQueue,
kernel,
1,
NULL,
globalWorkSize,
NULL,
0,
NULL,
NULL);
clFinish(cmdQueue);
status = clEnqueueSVMMap(cmdQueue,
CL_TRUE, //blocking call
CL_MAP_WRITE_INVALIDATE_REGION,
bufferC,
datasize,
0,
NULL,
NULL);
// Verify the output
bool result = true;
int *C = (int *)(bufferC);
for(int i = 0; i < elements; i++) {
if(C != i+i) {
result = false;
break;
}
}
if(result) {
fprintf(stderr, "Output is correct\n");
} else {
fprintf(stderr, "Output is incorrect\n");
}
status = clEnqueueSVMUnmap(cmdQueue, bufferC, 0, NULL, NULL);
// Release OpenCL resources
clSVMFree(context, bufferA);
clSVMFree(context, bufferB);
clSVMFree(context, bufferC);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(cmdQueue);
clReleaseContext(context);
// Free host resources
free(platforms);
free(devices);
}
a very simple program, just add the elements in bufferA and bufferB, then store the result to bufferC, but when I run this program, i got a segement fault. I really don't kown why. anyone can help me? thx.
Solved! Go to Solution.
Another point is you need to specify the –cl-std=CL2.0
option during clBuildProgram API to build the program with OpenCL C 2.0 support. Because if the –cl-std
build option is not specified, the highest OpenCL C 1.x language version supported by each device is used when compiling the program for each device.
SVM requires OpenCL2.0 compatible devices. Are you running the kernel on a OpenCL2.0 compatible device? Please check the clinfo. You may also run any SVM sample from the APP SDK to check the system compatibility. If everything is okay and still facing the problem, please provide your setup details and output of the clinfo.
Have you tried running it in CodeXL? It would be helpful to know at which step it faults and the arguments at that step.
Another point is you need to specify the –cl-std=CL2.0
option during clBuildProgram API to build the program with OpenCL C 2.0 support. Because if the –cl-std
build option is not specified, the highest OpenCL C 1.x language version supported by each device is used when compiling the program for each device.