Hello,
when I run the following code unsing clSVMFree to free my buffer I eventually get a Segmentation Fault at the clWaitForEvents call. Sometime earlier sometimes later. When I disable the clSVMFree call I don't but I obviously have a memory leak. Do you see any error?
#include <CL/cl.h>
#include <gtest/gtest.h>
#include <iostream>
using namespace std;
const char* KernelName = "kernel_buffer";
const char* KernelSource = ""
"kernel void kernel_buffer(global unsigned char* buffer, unsigned int size){\n"
" buffer[get_global_id(0) % size] = get_local_id(0) % 255;"
"}";
TEST(KERNEL_DYNPAR_SVMFREE, postive) {
cl_int errCPU; // error code returned from api calls
size_t global; // global domain size for our calculation
size_t local; // local domain size for our calculation
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue queue_host; // host's command queue
cl_command_queue queue_device; // device's command queue
cl_program program; // compute program
cl_kernel kernel; // compute kernel
// number of platforms found
cl_uint count;
// Connect to a platform
//
cl_platform_id platforms[2];
errCPU = clGetPlatformIDs(2, platforms, &count);
assert(errCPU == CL_SUCCESS);
// Connect to a compute device
//
errCPU = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
assert(errCPU == CL_SUCCESS);
if (errCPU != CL_SUCCESS) {
printf("Error: Failed to create a device group!\n");
exit(1);
}
// Create a compute context
//
context = clCreateContext(0, 1, &device_id, NULL, NULL, &errCPU);
assert(errCPU == CL_SUCCESS);
if (!context) {
printf("Error: Failed to create a compute context!\n");
exit(1);
}
// Create host's command queues
//
cl_queue_properties props_host[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0, 0 };
queue_host = clCreateCommandQueueWithProperties(context, device_id, props_host, &errCPU);
assert(errCPU == CL_SUCCESS);
// Create device's command queues
//
cl_queue_properties props_device[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0 };
queue_device = clCreateCommandQueueWithProperties(context, device_id, props_device, &errCPU);
assert(errCPU == CL_SUCCESS);
if (!queue_host) {
printf("Error: Failed to create a command commands!\n");
exit(1);
}
// Create the compute program from the source buffer
//
program = clCreateProgramWithSource(context, 1, (const char **) &KernelSource, NULL, &errCPU);
assert(errCPU == CL_SUCCESS);
if (!program) {
printf("Error: Failed to create compute program!\n");
exit(1);
}
// Build the program executable
//
errCPU = clBuildProgram(program, 0, NULL, "-cl-opt-disable -cl-std=CL2.0 -g -Werror", NULL, NULL);
assert(errCPU == CL_SUCCESS);
if (errCPU != CL_SUCCESS) {
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
// Create the compute kernel in the program we wish to run
//
kernel = clCreateKernel(program, "kernel_buffer", &errCPU);
assert(errCPU == CL_SUCCESS);
if (!kernel || errCPU != CL_SUCCESS) {
printf("Error: Failed to create compute kernel!\n");
exit(1);
}
///////////////////////////////////////////////////////////////////////////
// MAIN LOOP
//
// enqueue thousands of kernels
for (int i = 0; i < 100000; ++i) {
// cout << i << endl;
const unsigned int size = 1024;
unsigned char* buffer = (unsigned char*) clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER, size, 0);
// cout << "buffer address: " << hex << (void*)buffer << endl;
// Set the arguments to our compute kernel
//
errCPU = 0;
errCPU |= clSetKernelArgSVMPointer(kernel, 0, buffer);
errCPU |= clSetKernelArg(kernel, 1, sizeof(size), &size);
if (errCPU != CL_SUCCESS) {
printf("Error: Failed to set kernel arguments! %d\n", errCPU);
exit(1);
}
// Execute the kernel over the entire range of our 1d input data set
// using the maximum number of work group items for this device
//
global = size;
local = 64;
cl_event evt;
errCPU = clEnqueueNDRangeKernel(queue_host, kernel, 1, NULL, &global, &local, 0, NULL, &evt);
clFlush(queue_host);
clWaitForEvents(1, &evt);
cl_ulong start, end;
clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(start), &start, nullptr);
clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(end), &end, nullptr);
if (errCPU) {
printf("Error: Failed to execute kernel!\n");
exit(1);
}
// cout << "buffer: ";
// for (unsigned int i = 0; i < size; ++i) {
// cout << setw(2) << setfill('0') << hex << (unsigned int)buffer;
// }
// cout << endl;
clSVMFree(context, buffer);
}
// Shutdown and cleanup
//
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue_host);
clReleaseContext(context);
ASSERT_FALSE(false);
}
My system: Ubuntu 14.04 x64, AMD A10-7890K Radeon R7, 12 Compute Cores 4C+8G, AMD-APP-SDK 3.0, dmesg:
[ | 2.381237] fglrx: module license 'Proprietary. (C) 2002 - ATI Technologies, Starnberg, GERMANY' taints kernel. |
[ | 2.400232] <6>[fglrx] module loaded - fglrx 15.30.3 [Dec 17 2015] with 1 minors |
Thank you for your help.
Greeting,
Rick
Hi Rick,
My apologies for this late reply.
Has the issue been resolved? If not, please share the required header files (e.g. gtest/gtest.h) to build the project.
Regards,
Hi dipak,
after updating the SDK the error was fixed. Sorry for the late reply.
Greetings,
Rick