cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

vrcomputing
Adept I

clSVMFree/clWaitForEvents SEGSEV Segmentation Fault

Hello,

when I run the following code unsing clSVMFree to free my buffer I eventually get a Segmentation Fault at the clWaitForEvents call. Sometime earlier sometimes later. When I disable the clSVMFree call I don't but I obviously have a memory leak. Do you see any error?

#include <CL/cl.h>

#include <gtest/gtest.h>

#include <iostream>

using namespace std;

const char* KernelName = "kernel_buffer";

const char* KernelSource = ""

        "kernel void kernel_buffer(global unsigned char* buffer, unsigned int size){\n"

        "  buffer[get_global_id(0) % size] = get_local_id(0) % 255;"

        "}";

TEST(KERNEL_DYNPAR_SVMFREE, postive) {

    cl_int errCPU;                      // error code returned from api calls

    size_t global;                      // global domain size for our calculation

    size_t local;                       // local domain size for our calculation

    cl_device_id device_id;             // compute device id

    cl_context context;                 // compute context

    cl_command_queue queue_host;          // host's command queue

    cl_command_queue queue_device;        // device's command queue

    cl_program program;                 // compute program

    cl_kernel kernel;                   // compute kernel

    // number of platforms found

    cl_uint count;

    // Connect to a platform

    //

    cl_platform_id platforms[2];

    errCPU = clGetPlatformIDs(2, platforms, &count);

    assert(errCPU == CL_SUCCESS);

    // Connect to a compute device

    //

    errCPU = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);

    assert(errCPU == CL_SUCCESS);

    if (errCPU != CL_SUCCESS) {

        printf("Error: Failed to create a device group!\n");

        exit(1);

    }

    // Create a compute context

    //

    context = clCreateContext(0, 1, &device_id, NULL, NULL, &errCPU);

    assert(errCPU == CL_SUCCESS);

    if (!context) {

        printf("Error: Failed to create a compute context!\n");

        exit(1);

    }

    // Create host's command queues

    //

    cl_queue_properties props_host[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0, 0 };

    queue_host = clCreateCommandQueueWithProperties(context, device_id, props_host, &errCPU);

    assert(errCPU == CL_SUCCESS);

    // Create device's command queues

    //

    cl_queue_properties props_device[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0 };

    queue_device = clCreateCommandQueueWithProperties(context, device_id, props_device, &errCPU);

    assert(errCPU == CL_SUCCESS);

    if (!queue_host) {

        printf("Error: Failed to create a command commands!\n");

        exit(1);

    }

    // Create the compute program from the source buffer

    //

    program = clCreateProgramWithSource(context, 1, (const char **) &KernelSource, NULL, &errCPU);

    assert(errCPU == CL_SUCCESS);

    if (!program) {

        printf("Error: Failed to create compute program!\n");

        exit(1);

    }

    // Build the program executable

    //

    errCPU = clBuildProgram(program, 0, NULL, "-cl-opt-disable -cl-std=CL2.0 -g -Werror", NULL, NULL);

    assert(errCPU == CL_SUCCESS);

    if (errCPU != CL_SUCCESS) {

        size_t len;

        char buffer[2048];

        printf("Error: Failed to build program executable!\n");

        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);

        printf("%s\n", buffer);

        exit(1);

    }

    // Create the compute kernel in the program we wish to run

    //

    kernel = clCreateKernel(program, "kernel_buffer", &errCPU);

    assert(errCPU == CL_SUCCESS);

    if (!kernel || errCPU != CL_SUCCESS) {

        printf("Error: Failed to create compute kernel!\n");

        exit(1);

    }

    ///////////////////////////////////////////////////////////////////////////

    // MAIN LOOP

    //

    // enqueue thousands of kernels

    for (int i = 0; i < 100000; ++i) {

//        cout << i << endl;

        const unsigned int size = 1024;

        unsigned char* buffer = (unsigned char*) clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER, size, 0);

//        cout << "buffer address: " << hex << (void*)buffer << endl;

        // Set the arguments to our compute kernel

        //

        errCPU = 0;

        errCPU |= clSetKernelArgSVMPointer(kernel, 0, buffer);

        errCPU |= clSetKernelArg(kernel, 1, sizeof(size), &size);

        if (errCPU != CL_SUCCESS) {

            printf("Error: Failed to set kernel arguments! %d\n", errCPU);

            exit(1);

        }

        // Execute the kernel over the entire range of our 1d input data set

        // using the maximum number of work group items for this device

        //

        global = size;

        local = 64;

        cl_event evt;

        errCPU = clEnqueueNDRangeKernel(queue_host, kernel, 1, NULL, &global, &local, 0, NULL, &evt);

        clFlush(queue_host);

        clWaitForEvents(1, &evt);

        cl_ulong start, end;

        clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(start), &start, nullptr);

        clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(end), &end, nullptr);

        if (errCPU) {

            printf("Error: Failed to execute kernel!\n");

            exit(1);

        }

//        cout << "buffer: ";

//        for (unsigned int i = 0; i < size; ++i) {

//            cout << setw(2) << setfill('0') << hex << (unsigned int)buffer;

//        }

//        cout << endl;

        clSVMFree(context, buffer);

    }

    // Shutdown and cleanup

    //

    clReleaseProgram(program);

    clReleaseKernel(kernel);

    clReleaseCommandQueue(queue_host);

    clReleaseContext(context);

    ASSERT_FALSE(false);

}

My system: Ubuntu 14.04 x64, AMD A10-7890K Radeon R7, 12 Compute Cores 4C+8G, AMD-APP-SDK 3.0, dmesg:

[2.381237] fglrx: module license 'Proprietary. (C) 2002 - ATI Technologies, Starnberg, GERMANY' taints kernel.
[2.400232] <6>[fglrx] module loaded - fglrx 15.30.3 [Dec 17 2015] with 1 minors

Thank you for your help.

Greeting,

Rick

0 Likes
2 Replies
dipak
Big Boss

Hi Rick,

My apologies for this late reply.

Has the issue been resolved? If not, please share the required header files (e.g. gtest/gtest.h) to build the project.

Regards,

0 Likes

Hi dipak,

after updating the SDK the error was fixed. Sorry for the late reply.

Greetings,

Rick

0 Likes