AnsweredAssumed Answered

clSVMFree/clWaitForEvents SEGSEV Segmentation Fault

Question asked by vrcomputing on Feb 5, 2017
Latest reply on Mar 13, 2017 by vrcomputing

Hello,

 

when I run the following code unsing clSVMFree to free my buffer I eventually get a Segmentation Fault at the clWaitForEvents call. Sometime earlier sometimes later. When I disable the clSVMFree call I don't but I obviously have a memory leak. Do you see any error?

 

#include <CL/cl.h>
#include <gtest/gtest.h>
#include <iostream>

using namespace std;

const char* KernelName = "kernel_buffer";
const char* KernelSource = ""
        "kernel void kernel_buffer(global unsigned char* buffer, unsigned int size){\n"
        "  buffer[get_global_id(0) % size] = get_local_id(0) % 255;"
        "}";

TEST(KERNEL_DYNPAR_SVMFREE, postive) {

    cl_int errCPU;                      // error code returned from api calls

    size_t global;                      // global domain size for our calculation
    size_t local;                       // local domain size for our calculation

    cl_device_id device_id;             // compute device id
    cl_context context;                 // compute context
    cl_command_queue queue_host;          // host's command queue
    cl_command_queue queue_device;        // device's command queue
    cl_program program;                 // compute program
    cl_kernel kernel;                   // compute kernel

    // number of platforms found
    cl_uint count;

    // Connect to a platform
    //
    cl_platform_id platforms[2];
    errCPU = clGetPlatformIDs(2, platforms, &count);
    assert(errCPU == CL_SUCCESS);

    // Connect to a compute device
    //
    errCPU = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
    assert(errCPU == CL_SUCCESS);
    if (errCPU != CL_SUCCESS) {
        printf("Error: Failed to create a device group!\n");
        exit(1);
    }

    // Create a compute context
    //
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &errCPU);
    assert(errCPU == CL_SUCCESS);
    if (!context) {
        printf("Error: Failed to create a compute context!\n");
        exit(1);
    }

    // Create host's command queues
    //
    cl_queue_properties props_host[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0, 0 };
    queue_host = clCreateCommandQueueWithProperties(context, device_id, props_host, &errCPU);
    assert(errCPU == CL_SUCCESS);

    // Create device's command queues
    //
    cl_queue_properties props_device[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0 };
    queue_device = clCreateCommandQueueWithProperties(context, device_id, props_device, &errCPU);
    assert(errCPU == CL_SUCCESS);

    if (!queue_host) {
        printf("Error: Failed to create a command commands!\n");
        exit(1);
    }

    // Create the compute program from the source buffer
    //
    program = clCreateProgramWithSource(context, 1, (const char **) &KernelSource, NULL, &errCPU);
    assert(errCPU == CL_SUCCESS);

    if (!program) {
        printf("Error: Failed to create compute program!\n");
        exit(1);
    }

    // Build the program executable
    //
    errCPU = clBuildProgram(program, 0, NULL, "-cl-opt-disable -cl-std=CL2.0 -g -Werror", NULL, NULL);
    assert(errCPU == CL_SUCCESS);

    if (errCPU != CL_SUCCESS) {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n");
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        exit(1);
    }

    // Create the compute kernel in the program we wish to run
    //
    kernel = clCreateKernel(program, "kernel_buffer", &errCPU);
    assert(errCPU == CL_SUCCESS);

    if (!kernel || errCPU != CL_SUCCESS) {
        printf("Error: Failed to create compute kernel!\n");
        exit(1);
    }

    ///////////////////////////////////////////////////////////////////////////
    // MAIN LOOP
    //

    // enqueue thousands of kernels
    for (int i = 0; i < 100000; ++i) {

//        cout << i << endl;

        const unsigned int size = 1024;
        unsigned char* buffer = (unsigned char*) clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER, size, 0);
//        cout << "buffer address: " << hex << (void*)buffer << endl;

        // Set the arguments to our compute kernel
        //
        errCPU = 0;
        errCPU |= clSetKernelArgSVMPointer(kernel, 0, buffer);
        errCPU |= clSetKernelArg(kernel, 1, sizeof(size), &size);

        if (errCPU != CL_SUCCESS) {
            printf("Error: Failed to set kernel arguments! %d\n", errCPU);
            exit(1);
        }

        // Execute the kernel over the entire range of our 1d input data set
        // using the maximum number of work group items for this device
        //
        global = size;
        local = 64;
        cl_event evt;
        errCPU = clEnqueueNDRangeKernel(queue_host, kernel, 1, NULL, &global, &local, 0, NULL, &evt);
        clFlush(queue_host);
        clWaitForEvents(1, &evt);

        cl_ulong start, end;
        clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(start), &start, nullptr);
        clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(end), &end, nullptr);

        if (errCPU) {
            printf("Error: Failed to execute kernel!\n");
            exit(1);
        }

//        cout << "buffer: ";
//        for (unsigned int i = 0; i < size; ++i) {
//            cout << setw(2) << setfill('0') << hex << (unsigned int)buffer[i];
//        }
//        cout << endl;

        clSVMFree(context, buffer);
    }

    // Shutdown and cleanup
    //
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue_host);
    clReleaseContext(context);

    ASSERT_FALSE(false);
}

 

My system: Ubuntu 14.04 x64, AMD A10-7890K Radeon R7, 12 Compute Cores 4C+8G, AMD-APP-SDK 3.0, dmesg:

[2.381237] fglrx: module license 'Proprietary. (C) 2002 - ATI Technologies, Starnberg, GERMANY' taints kernel.
[2.400232] <6>[fglrx] module loaded - fglrx 15.30.3 [Dec 17 2015] with 1 minors

 

Thank you for your help.

 

Greeting,

 

Rick

Outcomes