AnsweredAssumed Answered

vector-add program using SVM produces segement fault.

Question asked by yy1990cn on Dec 22, 2014
Latest reply on Dec 22, 2014 by dipak

I've written a simple vector add program using SVM in APPSDK 3.0, but it reports segement falt. the following is the code,  i kown it is long. but it's a simple opencl program, I've try my best to figure out what's wrong, but no help

 

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>


// OpenCL includes
#include <CL/cl.h>


// OpenCL kernel to perform an element-wise
// add of two arrays
const char* programSource =
"__kernel                                            \n"
"void vecadd(__global int *A,                        \n"
"            __global int *B,                        \n"
"            __global int *C)                        \n"
"{                                                   \n"
"                                                    \n"
"   // Get the work-item’s unique ID                 \n"
"   int idx = get_global_id(0);                      \n"
"                                                    \n"
"   // Add the corresponding locations of            \n"
"   // 'A' and 'B', and store the result in 'C'.     \n"
"   C[idx] = A[idx] + B[idx];                        \n"
"}                                                   \n"
;


int main() {
    // Elements in each array
    const int elements = 2048;


    // Compute the size of the data
    size_t datasize = sizeof(int)*elements;
    // Use this to check the output of each API call
    cl_int status;


    // Discover and initialize the platforms
    cl_uint numPlatforms = 0;
    cl_platform_id *platforms = NULL;


    // Use clGetPlatformIDs() to retrieve the number of
    // platforms
    status = clGetPlatformIDs(0, NULL, &numPlatforms);


    // Allocate enough space for each platform
    platforms =
            (cl_platform_id*)malloc(
                numPlatforms*sizeof(cl_platform_id));


    // Fill in platforms with clGetPlatformIDs()
    status = clGetPlatformIDs(numPlatforms, platforms,
                              NULL);


    // STEP 2: Discover and initialize the devices
    cl_uint numDevices = 0;
    cl_device_id *devices = NULL;


    // Use clGetDeviceIDs() to retrieve the number of
    // devices present
    status = clGetDeviceIDs(
        platforms[0],
        CL_DEVICE_TYPE_ALL,
        0,
        NULL,
        &numDevices);


    // Allocate enough space for each device
    devices =
            (cl_device_id*)malloc(
                numDevices*sizeof(cl_device_id));


    // Fill in devices with clGetDeviceIDs()
    status = clGetDeviceIDs(
        platforms[0],
        CL_DEVICE_TYPE_ALL,
        numDevices,
        devices,
        NULL);


    // Create a context
    cl_context context = NULL;


    // Create a context using clCreateContext() and
    // associate it with the devices
    context = clCreateContext(
        NULL,
        numDevices,
        devices,
        NULL,
        NULL,
        &status);


    // Create a command queue
    cl_command_queue cmdQueue;


    // Create a command queue using clCreateCommandQueue(),
    // and associate it with the device you want to execute
    // on
    cl_queue_properties prop[] = {0};
    cmdQueue = clCreateCommandQueueWithProperties(
        context,
        devices[0],
        prop,
        &status);


    // Create SVM buffers
    void *bufferA = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
    void *bufferB = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
    void *bufferC = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
    if (bufferA == NULL || bufferB==NULL || bufferC==NULL) {
        fprintf(stderr, "can't create SVM buffers\n");
        exit(-1);
    }
    /* initialize bufferA and bufferB */
    status = clEnqueueSVMMap(cmdQueue,
                             CL_TRUE, //blocking call
                             CL_MAP_WRITE_INVALIDATE_REGION,
                             bufferA,
                             datasize,
                             0,
                             NULL,
                             NULL);
    int *A = (int *)(bufferA);
    for (int i = 0; i < elements; i++) {
        A[i] = i;
    }
    status = clEnqueueSVMUnmap(cmdQueue, bufferA, 0, NULL, NULL);
    status = clEnqueueSVMMap(cmdQueue,
                             CL_TRUE, //blocking call
                             CL_MAP_WRITE_INVALIDATE_REGION,
                             bufferB,
                             datasize,
                             0,
                             NULL,
                             NULL);
    int *B = (int *)(bufferB);
    for (int i = 0; i < elements; i++) {
        B[i] = i;
    }
    status = clEnqueueSVMUnmap(cmdQueue, bufferB, 0, NULL, NULL);
    // Create and compile the program
    cl_program program = clCreateProgramWithSource(
        context,
        1,
        (const char**)&programSource,
        NULL,
        &status);


    // Build (compile) the program for the devices with
    // clBuildProgram()
    status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);


    // Create the kernel
     cl_kernel kernel = clCreateKernel(program, "vecadd", &status);


    // Set the kernel arguments
    status = clSetKernelArgSVMPointer(kernel, 0, (void *)(bufferA));
    status |= clSetKernelArgSVMPointer(kernel, 1, (void *)(bufferB));
    status |= clSetKernelArgSVMPointer(kernel, 2, (void *)(bufferC));


    size_t globalWorkSize[1];
    // There are 'elements' work-items
    globalWorkSize[0] = elements;


    status = clEnqueueNDRangeKernel(
        cmdQueue,
        kernel,
        1,
        NULL,
        globalWorkSize,
        NULL,
        0,
        NULL,
        NULL);
    clFinish(cmdQueue);
    status = clEnqueueSVMMap(cmdQueue,
                             CL_TRUE, //blocking call
                             CL_MAP_WRITE_INVALIDATE_REGION,
                             bufferC,
                             datasize,
                             0,
                             NULL,
                             NULL);
    // Verify the output
    bool result = true;
    int *C = (int *)(bufferC);
    for(int i = 0; i < elements; i++) {
        if(C[i] != i+i) {
            result = false;
            break;
        }
    }
    if(result) {
        fprintf(stderr, "Output is correct\n");
    } else {
        fprintf(stderr, "Output is incorrect\n");
    }


    status = clEnqueueSVMUnmap(cmdQueue, bufferC, 0, NULL, NULL);


    // Release OpenCL resources
    clSVMFree(context, bufferA);
    clSVMFree(context, bufferB);
    clSVMFree(context, bufferC);


    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(cmdQueue);
    clReleaseContext(context);


    // Free host resources
    free(platforms);
    free(devices);
}

a very simple program, just add the elements in bufferA and bufferB, then store the result to bufferC, but when I run this program, i got a segement fault. I really don't kown why. anyone can help me? thx.

Outcomes