10 Replies Latest reply on Jan 7, 2013 5:43 AM by scharupa

    serious memory leak in clEnqueueNDRangeKernel


      First of all forgive my bad english :)

      The attached code (just 100 lines) does the following simple tasks:


      create a context on a GPU device and full profile platform;

      create and build a simple cl program;

      create a command queue;

      create a very simple kernel (it actually does nothing)

         kernel void doNothing()


            int id = get_global_id(0);



      then it begins a while(1) loop, in which the following things are done:

      enqueue the kernel with clEnqueueNDRangeKernel(queue, doNothing, 1, NULL, &workSize, &workSize, 0, NULL, NULL);

      call clFinish on the command queue;

      sleep for 10 milliseconds.


      The memory usage of the running program keeps growing even if nothing is created/allocated inside the loop. The problem remains even if I get the event returned by clEnqueueNDRangeKernel and release it immediately each time.


      Im running on windows 7 64 bit, compiling with visual studio 32 bit compiler ("cl memleak.c user32.lib OpenCL.lib). My graphics card is a mobility radeon HD 5470, with ATI catalyst 1.9 drivers. The OpenCL implementations is the 32 bit OpenCL.lib file found in the AMD APP SDK version 2.5.


      I hope this is the right place to report a bug :)

      #include <stdlib.h> #include <stdio.h> #include <windows.h> #include <CL/cl.h> #define STRINGIFY(cl_source) #cl_source cl_context context = NULL; cl_program program = NULL; cl_command_queue queue = NULL; cl_kernel doNothing = NULL; void releaseAllAndExit() { if (doNothing != NULL) clReleaseKernel(doNothing); if (queue != NULL) clReleaseCommandQueue(queue); if (program != NULL) clReleaseProgram(program); if (context != NULL) clReleaseContext(context); exit(0); } int main(int argc, char *argv[]) { cl_int error; cl_platform_id platform; cl_device_id device; cl_context_properties properties[3]; const char *programSource = "kernel void doNothing() { int id = get_global_id(0); }"; size_t workSize = 64; cl_event event; error = clGetPlatformIDs(1, &platform, NULL); if (error != CL_SUCCESS) { printf("Error: clGetPlatformIDs: %d\n", error); releaseAllAndExit(); } error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); if (error != CL_SUCCESS) { printf("Error: clGetDeviceIDs: %d\n", error); releaseAllAndExit(); } properties[0] = CL_CONTEXT_PLATFORM; properties[1] = (cl_context_properties) platform; properties[2] = 0; context = clCreateContext(properties, 1, &device, NULL, NULL, &error); if (error != CL_SUCCESS) { printf("Error: clCreateContext: %d\n", error); releaseAllAndExit(); } program = clCreateProgramWithSource(context, 1, &programSource, NULL, &error); if (error != CL_SUCCESS) { printf("Error: clCreateProgramWithSource: %d\n", error); releaseAllAndExit(); } error = clBuildProgram(program, 1, &device, "-Werror", NULL, NULL); if (error != CL_SUCCESS) { printf("Error: clBuildProgram: %d\n", error); releaseAllAndExit(); } queue = clCreateCommandQueue(context, device, 0, &error); if (error != CL_SUCCESS) { printf("Error: clCreateCommandQueue: %d\n", error); releaseAllAndExit(); } doNothing = clCreateKernel(program, "doNothing", &error); if (error != CL_SUCCESS) { printf("Error: clCreateKernel: %d\n", error); releaseAllAndExit(); } while (1) { error = clEnqueueNDRangeKernel(queue, doNothing, 1, NULL, &workSize, &workSize, 0, NULL, NULL/*&event*/); if (error != CL_SUCCESS) { printf("Error: clEnqueueDNRangeKernel: %d\n", error); releaseAllAndExit(); } error = clFinish(queue); if (error != CL_SUCCESS) { printf("Error: clFinish: %d\n", error); releaseAllAndExit(); } /*error = clReleaseEvent(event); if (error != CL_SUCCESS) { printf("Error: clReleaseEvent: %d\n", error); releaseAllAndExit(); }*/ Sleep(10); } }