2 Replies Latest reply on Mar 13, 2017 11:03 AM by vrcomputing

    clSVMFree/clWaitForEvents SEGSEV Segmentation Fault

    vrcomputing

      Hello,

       

      when I run the following code unsing clSVMFree to free my buffer I eventually get a Segmentation Fault at the clWaitForEvents call. Sometime earlier sometimes later. When I disable the clSVMFree call I don't but I obviously have a memory leak. Do you see any error?

       

      #include <CL/cl.h>
      #include <gtest/gtest.h>
      #include <iostream>
      
      using namespace std;
      
      const char* KernelName = "kernel_buffer";
      const char* KernelSource = ""
              "kernel void kernel_buffer(global unsigned char* buffer, unsigned int size){\n"
              "  buffer[get_global_id(0) % size] = get_local_id(0) % 255;"
              "}";
      
      TEST(KERNEL_DYNPAR_SVMFREE, postive) {
      
          cl_int errCPU;                      // error code returned from api calls
      
          size_t global;                      // global domain size for our calculation
          size_t local;                       // local domain size for our calculation
      
          cl_device_id device_id;             // compute device id
          cl_context context;                 // compute context
          cl_command_queue queue_host;          // host's command queue
          cl_command_queue queue_device;        // device's command queue
          cl_program program;                 // compute program
          cl_kernel kernel;                   // compute kernel
      
          // number of platforms found
          cl_uint count;
      
          // Connect to a platform
          //
          cl_platform_id platforms[2];
          errCPU = clGetPlatformIDs(2, platforms, &count);
          assert(errCPU == CL_SUCCESS);
      
          // Connect to a compute device
          //
          errCPU = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
          assert(errCPU == CL_SUCCESS);
          if (errCPU != CL_SUCCESS) {
              printf("Error: Failed to create a device group!\n");
              exit(1);
          }
      
          // Create a compute context
          //
          context = clCreateContext(0, 1, &device_id, NULL, NULL, &errCPU);
          assert(errCPU == CL_SUCCESS);
          if (!context) {
              printf("Error: Failed to create a compute context!\n");
              exit(1);
          }
      
          // Create host's command queues
          //
          cl_queue_properties props_host[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0, 0 };
          queue_host = clCreateCommandQueueWithProperties(context, device_id, props_host, &errCPU);
          assert(errCPU == CL_SUCCESS);
      
          // Create device's command queues
          //
          cl_queue_properties props_device[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0 };
          queue_device = clCreateCommandQueueWithProperties(context, device_id, props_device, &errCPU);
          assert(errCPU == CL_SUCCESS);
      
          if (!queue_host) {
              printf("Error: Failed to create a command commands!\n");
              exit(1);
          }
      
          // Create the compute program from the source buffer
          //
          program = clCreateProgramWithSource(context, 1, (const char **) &KernelSource, NULL, &errCPU);
          assert(errCPU == CL_SUCCESS);
      
          if (!program) {
              printf("Error: Failed to create compute program!\n");
              exit(1);
          }
      
          // Build the program executable
          //
          errCPU = clBuildProgram(program, 0, NULL, "-cl-opt-disable -cl-std=CL2.0 -g -Werror", NULL, NULL);
          assert(errCPU == CL_SUCCESS);
      
          if (errCPU != CL_SUCCESS) {
              size_t len;
              char buffer[2048];
      
              printf("Error: Failed to build program executable!\n");
              clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
              printf("%s\n", buffer);
              exit(1);
          }
      
          // Create the compute kernel in the program we wish to run
          //
          kernel = clCreateKernel(program, "kernel_buffer", &errCPU);
          assert(errCPU == CL_SUCCESS);
      
          if (!kernel || errCPU != CL_SUCCESS) {
              printf("Error: Failed to create compute kernel!\n");
              exit(1);
          }
      
          ///////////////////////////////////////////////////////////////////////////
          // MAIN LOOP
          //
      
          // enqueue thousands of kernels
          for (int i = 0; i < 100000; ++i) {
      
      //        cout << i << endl;
      
              const unsigned int size = 1024;
              unsigned char* buffer = (unsigned char*) clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER, size, 0);
      //        cout << "buffer address: " << hex << (void*)buffer << endl;
      
              // Set the arguments to our compute kernel
              //
              errCPU = 0;
              errCPU |= clSetKernelArgSVMPointer(kernel, 0, buffer);
              errCPU |= clSetKernelArg(kernel, 1, sizeof(size), &size);
      
              if (errCPU != CL_SUCCESS) {
                  printf("Error: Failed to set kernel arguments! %d\n", errCPU);
                  exit(1);
              }
      
              // Execute the kernel over the entire range of our 1d input data set
              // using the maximum number of work group items for this device
              //
              global = size;
              local = 64;
              cl_event evt;
              errCPU = clEnqueueNDRangeKernel(queue_host, kernel, 1, NULL, &global, &local, 0, NULL, &evt);
              clFlush(queue_host);
              clWaitForEvents(1, &evt);
      
              cl_ulong start, end;
              clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(start), &start, nullptr);
              clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(end), &end, nullptr);
      
              if (errCPU) {
                  printf("Error: Failed to execute kernel!\n");
                  exit(1);
              }
      
      //        cout << "buffer: ";
      //        for (unsigned int i = 0; i < size; ++i) {
      //            cout << setw(2) << setfill('0') << hex << (unsigned int)buffer[i];
      //        }
      //        cout << endl;
      
              clSVMFree(context, buffer);
          }
      
          // Shutdown and cleanup
          //
          clReleaseProgram(program);
          clReleaseKernel(kernel);
          clReleaseCommandQueue(queue_host);
          clReleaseContext(context);
      
          ASSERT_FALSE(false);
      }
      
      

       

      My system: Ubuntu 14.04 x64, AMD A10-7890K Radeon R7, 12 Compute Cores 4C+8G, AMD-APP-SDK 3.0, dmesg:

      [2.381237] fglrx: module license 'Proprietary. (C) 2002 - ATI Technologies, Starnberg, GERMANY' taints kernel.
      [2.400232] <6>[fglrx] module loaded - fglrx 15.30.3 [Dec 17 2015] with 1 minors

       

      Thank you for your help.

       

      Greeting,

       

      Rick