cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

yy1990cn
Journeyman III

vector-add program using SVM produces segement fault.

I've written a simple vector add program using SVM in APPSDK 3.0, but it reports segement falt. the following is the code,  i kown it is long. but it's a simple opencl program, I've try my best to figure out what's wrong, but no help


#include <stdio.h>


#include <string.h>


#include <stdlib.h>


#include <stdbool.h>




// OpenCL includes


#include <CL/cl.h>




// OpenCL kernel to perform an element-wise


// add of two arrays


const char* programSource =


"__kernel                                            \n"


"void vecadd(__global int *A,                        \n"


"            __global int *B,                        \n"


"            __global int *C)                        \n"


"{                                                   \n"


"                                                    \n"


"   // Get the work-item’s unique ID                 \n"


"   int idx = get_global_id(0);                      \n"


"                                                    \n"


"   // Add the corresponding locations of            \n"


"   // 'A' and 'B', and store the result in 'C'.     \n"


"   C[idx] = A[idx] + B[idx];                        \n"


"}                                                   \n"


;




int main() {


    // Elements in each array


    const int elements = 2048;




    // Compute the size of the data


    size_t datasize = sizeof(int)*elements;


    // Use this to check the output of each API call


    cl_int status;




    // Discover and initialize the platforms


    cl_uint numPlatforms = 0;


    cl_platform_id *platforms = NULL;




    // Use clGetPlatformIDs() to retrieve the number of


    // platforms


    status = clGetPlatformIDs(0, NULL, &numPlatforms);




    // Allocate enough space for each platform


    platforms =


            (cl_platform_id*)malloc(


                numPlatforms*sizeof(cl_platform_id));




    // Fill in platforms with clGetPlatformIDs()


    status = clGetPlatformIDs(numPlatforms, platforms,


                              NULL);




    // STEP 2: Discover and initialize the devices


    cl_uint numDevices = 0;


    cl_device_id *devices = NULL;




    // Use clGetDeviceIDs() to retrieve the number of


    // devices present


    status = clGetDeviceIDs(


        platforms[0],


        CL_DEVICE_TYPE_ALL,


        0,


        NULL,


        &numDevices);




    // Allocate enough space for each device


    devices =


            (cl_device_id*)malloc(


                numDevices*sizeof(cl_device_id));




    // Fill in devices with clGetDeviceIDs()


    status = clGetDeviceIDs(


        platforms[0],


        CL_DEVICE_TYPE_ALL,


        numDevices,


        devices,


        NULL);




    // Create a context


    cl_context context = NULL;




    // Create a context using clCreateContext() and


    // associate it with the devices


    context = clCreateContext(


        NULL,


        numDevices,


        devices,


        NULL,


        NULL,


        &status);




    // Create a command queue


    cl_command_queue cmdQueue;




    // Create a command queue using clCreateCommandQueue(),


    // and associate it with the device you want to execute


    // on


    cl_queue_properties prop[] = {0};


    cmdQueue = clCreateCommandQueueWithProperties(


        context,


        devices[0],


        prop,


        &status);




    // Create SVM buffers


    void *bufferA = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);


    void *bufferB = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);


    void *bufferC = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);


    if (bufferA == NULL || bufferB==NULL || bufferC==NULL) {


        fprintf(stderr, "can't create SVM buffers\n");


        exit(-1);


    }


    /* initialize bufferA and bufferB */


    status = clEnqueueSVMMap(cmdQueue,


                             CL_TRUE, //blocking call


                             CL_MAP_WRITE_INVALIDATE_REGION,


                             bufferA,


                             datasize,


                             0,


                             NULL,


                             NULL);


    int *A = (int *)(bufferA);


    for (int i = 0; i < elements; i++) {


        A = i;


    }


    status = clEnqueueSVMUnmap(cmdQueue, bufferA, 0, NULL, NULL);


    status = clEnqueueSVMMap(cmdQueue,


                             CL_TRUE, //blocking call


                             CL_MAP_WRITE_INVALIDATE_REGION,


                             bufferB,


                             datasize,


                             0,


                             NULL,


                             NULL);


    int *B = (int *)(bufferB);


    for (int i = 0; i < elements; i++) {


        B = i;


    }


    status = clEnqueueSVMUnmap(cmdQueue, bufferB, 0, NULL, NULL);


    // Create and compile the program


    cl_program program = clCreateProgramWithSource(


        context,


        1,


        (const char**)&programSource,


        NULL,


        &status);




    // Build (compile) the program for the devices with


    // clBuildProgram()


    status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);




    // Create the kernel


     cl_kernel kernel = clCreateKernel(program, "vecadd", &status);




    // Set the kernel arguments


    status = clSetKernelArgSVMPointer(kernel, 0, (void *)(bufferA));


    status |= clSetKernelArgSVMPointer(kernel, 1, (void *)(bufferB));


    status |= clSetKernelArgSVMPointer(kernel, 2, (void *)(bufferC));




    size_t globalWorkSize[1];


    // There are 'elements' work-items


    globalWorkSize[0] = elements;




    status = clEnqueueNDRangeKernel(


        cmdQueue,


        kernel,


        1,


        NULL,


        globalWorkSize,


        NULL,


        0,


        NULL,


        NULL);


    clFinish(cmdQueue);


    status = clEnqueueSVMMap(cmdQueue,


                             CL_TRUE, //blocking call


                             CL_MAP_WRITE_INVALIDATE_REGION,


                             bufferC,


                             datasize,


                             0,


                             NULL,


                             NULL);


    // Verify the output


    bool result = true;


    int *C = (int *)(bufferC);


    for(int i = 0; i < elements; i++) {


        if(C != i+i) {


            result = false;


            break;


        }


    }


    if(result) {


        fprintf(stderr, "Output is correct\n");


    } else {


        fprintf(stderr, "Output is incorrect\n");


    }




    status = clEnqueueSVMUnmap(cmdQueue, bufferC, 0, NULL, NULL);




    // Release OpenCL resources


    clSVMFree(context, bufferA);


    clSVMFree(context, bufferB);


    clSVMFree(context, bufferC);




    clReleaseKernel(kernel);


    clReleaseProgram(program);


    clReleaseCommandQueue(cmdQueue);


    clReleaseContext(context);




    // Free host resources


    free(platforms);


    free(devices);


}


a very simple program, just add the elements in bufferA and bufferB, then store the result to bufferC, but when I run this program, i got a segement fault. I really don't kown why. anyone can help me? thx.

0 Likes
1 Solution
dipak
Big Boss


Another point is you need to specify the –cl-std=CL2.0 option during clBuildProgram API to build the program with OpenCL C 2.0 support. Because if the –cl-std build option is not specified, the highest OpenCL C 1.x language version supported by each device is used when compiling the program for each device.

View solution in original post

0 Likes
3 Replies
dipak
Big Boss

SVM requires OpenCL2.0 compatible devices. Are you running the kernel on a OpenCL2.0 compatible device? Please check the clinfo. You may also run any SVM sample from the APP SDK to check the system compatibility. If everything is okay and still facing the problem, please provide your setup details and output of the clinfo.

0 Likes
nibal
Challenger

Have you tried running it in CodeXL? It would be helpful to know at which step it faults and the arguments at that step.

0 Likes
dipak
Big Boss


Another point is you need to specify the –cl-std=CL2.0 option during clBuildProgram API to build the program with OpenCL C 2.0 support. Because if the –cl-std build option is not specified, the highest OpenCL C 1.x language version supported by each device is used when compiling the program for each device.

0 Likes