rick.weber

clUtil - a library for making OpenCL as easy to use as CUDA

Discussion created by rick.weber on Dec 22, 2010
Latest reply on Feb 25, 2011 by rick.weber

I've written a library that makes OpenCL really easy to use. Consider this trivial program that writes the number 20 to every element in an array:

 

#include <Opencl/cl.h>

char const* kernelSource = "__kernel void fill(__global float* array, unsigned int arrayLength, float val)"
"{"
"    if(get_global_id(0) < arrayLength)"
"    {"
"        array[get_global_id(0)] = val;}"
"    }"
"}";

int main(int argc, char** argv)
{
   
float val = 20.0f;
   
float array[2000];
    cl_int err
;
    cl_platform_id platform
;
    cl_device_id device
;
    cl_context context
;
    cl_command_queue commandQueue
;
    cl_mem buffer
;
    cl_program program
;
    cl_kernel kernel
;
   
unsigned int length = 2000;

   
//Initialization
    err
= clGetPlatform_IDs(1, &platform, NULL);
    err
= clGetDeviceIDs(platform, CL_DEVICE_TYPE_ANY, 1, &device, NULL);
    context
= clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    commandQueue
= clCreateCommandQueue(context, device, 0, &err);
    program
= clCreateProgramWithSource(context, 1, &kernelSource, 0, &err);
    err
= clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    kernel
= clCreateKernel(program, "fill", &err);

   
//Allocate memory    
    buffer
= clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(array), NULL, &err);

   
//Actually call the kernel
    err
= clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
    err
= clSetKernelArg(kernel, 1, sizeof(length), &length);
    err
= clSetKernelArg(kernel, 2, sizeof(val), &val);

    size_t
global;
    size_t
local = 64;
   
   
global = length % local == 0 ? length : (length / local + 1) * local;

    err
= clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);

   
//Copy data back
    err
= clEnqueueReadBuffer(commandQueue, buffer, CL_TRUE, 0, sizeof(array), array, 0, NULL, NULL);

   
//Free the data on the GPU
    clReleaseMemObject
(buffer);
}
This is the same program using clUtil:

kernel.cl:

__kernel void fill(__global float* array, unsigned int arrayLength, float val)
{
   
if(get_global_id(0) < arrayLength)
   
{
        array
[get_global_id(0)] = val;
   
}
}

main.cc:

#include <clUtil.h>

int main(int argc, char** argv)
{
   
const char* kernelFiles[] = {"kernel.cl"};
    cl_mem buffer
;
   
float array[2000];
   
unsigned int length = 2000;
   
float val = 20.0f;

    clUtilInitialize
(kernelFiles, 1);
    clUtilAlloc
(sizeof(array), &buffer);
     
    clUtilEnqueueKernel
("fill", clUtilGrid(length, 64), buffer, length, val);
    clUtilDeviceGet
(array, sizeof(array), buffer);

    clUtilFree
(buffer);
}
It makes assumptions about devices and platforms to reduce the number of handles you have floating around while using C++0x constructs to make calling kernels significantly easier.
Currently runs in Linux
Source and documentation at http://code.google.com/p/clutil/


Outcomes