AnsweredAssumed Answered

simple OpenCL example returns error

Question asked by mark77 on Dec 11, 2012
Latest reply on Dec 11, 2012 by mark77

Hi,

 

I am trying to run a simple example code in OpenCL which just returns device info and runs a very simple kernel. It is from the MacResearch tutorials on OpenCL:

 

#include <stdio.h>
#include <assert.h>
#include <sys/sysctl.h>
#include <sys/stat.h>
#include <stdlib.h>
//#include <stdio.h>
#include <iostream> //want to use cout

//#include <OpenCL/OpenCL.h>
#include <CL/cl.h>

#define NUM_DATA 100

#define CL_CHECK(_expr)                                                         \
   do {                                                                         \
     cl_int _err = _expr;                                                       \
     if (_err == CL_SUCCESS)                                                    \
       break;                                                                   \
     fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err);   \
     abort();                                                                   \
   } while (0)

#define CL_CHECK_ERR(_expr)                                                     \
   ({                                                                           \
     cl_int _err = CL_INVALID_VALUE;                                            \
     typeof(_expr) _ret = _expr;                                                \
     if (_err != CL_SUCCESS) {                                                  \
       fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
       abort();                                                                 \
     }                                                                          \
     _ret;                                                                      \
   })

using namespace std;

#pragma mark -
#pragma mark Utilities
char * load_program_source(const char *filename)
{

    struct stat statbuf;
    FILE *fh;
    char *source;

    fh = fopen(filename, "r");
    if (fh == 0)
        return 0;

    stat(filename, &statbuf);
    source = (char *) malloc(statbuf.st_size + 1);
    fread(source, statbuf.st_size, 1, fh);
    source[statbuf.st_size] = '\0';

    return source;
}

#pragma mark -
#pragma mark Main OpenCL Routine
int runCL(float * a, float * b, float * results, int n)
{
    cl_program program[1];
    cl_kernel kernel[2];

    cl_command_queue cmd_queue;
    cl_context   context;

    cl_device_id cpu = NULL, device = NULL;

    cl_int err = 0;
    size_t returned_size = 0;
    size_t buffer_size;

    cl_mem a_mem, b_mem, ans_mem;

//    //Mark - going to get platform info first
//    cl_platform_id platforms[100];
//    cl_uint platforms_n = 0;
//    CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
//
//
//    printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
//    for (int i=0; i<platforms_n; i++)
//    {
//        char buffer[10240];
//        printf("  -- %d --\n", i);
//        //CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
//        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL);
//        printf("  PROFILE = %s\n", buffer);
//        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL);
//        printf("  VERSION = %s\n", buffer);
//        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL);
//        printf("  NAME = %s\n", buffer);
//        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL);
//        printf("  VENDOR = %s\n", buffer);
//        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
//        printf("  EXTENSIONS = %s\n", buffer);
//    }
//
//    if (platforms_n == 0)
//        return 1;
//
//    cl_device_id devices[100];
//    cl_uint devices_n = 0;
//    //CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
//    CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
// cout << "here I found platforms_n=" <<platforms_n<< endl;

#pragma mark Device Information
    {




        // Find the CPU CL device, as a fallback
        err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);
        //err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);
        assert(err == CL_SUCCESS);

        // Find the GPU CL device, this is what we really want
        // If there is no GPU device is CL capable, fall back to CPU
        err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
        err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
        if (err != CL_SUCCESS) device = cpu;
        assert(device);
cout << "found GPU" << endl;
        // Get some information about the returned device
        cl_char vendor_name[1024] = {0};
        cl_char device_name[1024] = {0};
        err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name),
                              vendor_name, &returned_size);
        err |= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
                              device_name, &returned_size);
        assert(err == CL_SUCCESS);
        printf("Connecting to %s %s...\n", vendor_name, device_name);
    }

#pragma mark Context and Command Queue
    {
        // Now create a context to perform our calculation with the
        // specified device
        context = clCreateContext(0, 1, &device, NULL, NULL, &err);
        assert(err == CL_SUCCESS);

        // And also a command queue for the context
        cmd_queue = clCreateCommandQueue(context, device, 0, NULL);
    }

#pragma mark Program and Kernel Creation
    {
        // Load the program source from disk
        // The kernel/program is the project directory and in Xcode the executable
        // is set to launch from that directory hence we use a relative path
        const char * filename = "example.cl";
        char *program_source = load_program_source(filename);
        program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
                                               NULL, &err);
        assert(err == CL_SUCCESS);

        err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
        assert(err == CL_SUCCESS);

        // Now create the kernel "objects" that we want to use in the example file
        kernel[0] = clCreateKernel(program[0], "add", &err);
    }

#pragma mark Memory Allocation
    {
        // Allocate memory on the device to hold our data and store the results into
        buffer_size = sizeof(float) * n;

        // Input array a
        a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);
        err = clEnqueueWriteBuffer(cmd_queue, a_mem, CL_TRUE, 0, buffer_size,
                                   (void*)a, 0, NULL, NULL);

        // Input array b
        b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);
        err |= clEnqueueWriteBuffer(cmd_queue, b_mem, CL_TRUE, 0, buffer_size,
                                    (void*)b, 0, NULL, NULL);
        assert(err == CL_SUCCESS);

        // Results array
        ans_mem    = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, NULL);

        // Get all of the stuff written and allocated
        clFinish(cmd_queue);
    }

#pragma mark Kernel Arguments
    {
        // Now setup the arguments to our kernel
        err  = clSetKernelArg(kernel[0],  0, sizeof(cl_mem), &a_mem);
        err |= clSetKernelArg(kernel[0],  1, sizeof(cl_mem), &b_mem);
        err |= clSetKernelArg(kernel[0],  2, sizeof(cl_mem), &ans_mem);
        assert(err == CL_SUCCESS);
    }

#pragma mark Execution and Read
    {
        // Run the calculation by enqueuing it and forcing the
        // command queue to complete the task
        size_t global_work_size = n;
        err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,
                                     &global_work_size, NULL, 0, NULL, NULL);
        assert(err == CL_SUCCESS);
        clFinish(cmd_queue);

        // Once finished read back the results from the answer
        // array into the results array
        err = clEnqueueReadBuffer(cmd_queue, ans_mem, CL_TRUE, 0, buffer_size,
                                  results, 0, NULL, NULL);
        assert(err == CL_SUCCESS);
        clFinish(cmd_queue);
    }

#pragma mark Teardown
    {
        clReleaseMemObject(a_mem);
        clReleaseMemObject(b_mem);
        clReleaseMemObject(ans_mem);

        clReleaseCommandQueue(cmd_queue);
        clReleaseContext(context);
    }
    return CL_SUCCESS;
}

int main (int argc, const char * argv[]) {

    // Problem size
    int n = 32;

    // Allocate some memory and a place for the results
    float * a = (float *)malloc(n*sizeof(float));
    float * b = (float *)malloc(n*sizeof(float));
    float * results = (float *)malloc(n*sizeof(float));

    // Fill in the values
    for(int i=0;i<n;i++){
        a[i] = (float)i;
        b[i] = (float)n-i;
        results[i] = 0.f;
    }

    // Do the OpenCL calculation
    runCL(a, b, results, n);

    // Print out some results. For this example the values of all elements
    // should be the same as the value of n
    for(int i=0;i<n;i++) printf("%f\n",results[i]);

    // Free up memory
    free(a);
    free(b);
    free(results);

    return 0;
}










 

I am running Ubuntu 12.04 with an Intel i7 CPU and NVIDIA 560Ti GPU, with the latest drivers. For some reason the code returns an error at the very beginning, line 117, when it tries to get the CPU info. If I comment this out and try to find the GPU only, there is also an error (line 124). Here is the output:

FATAL: Module fglrx not found.
Error! Fail to load fglrx kernel module! Maybe you can switch to root user to load kernel module directly
test_opencl3: /home/mark/codes/test_opencl3/main.cpp:117: int runCL(float*, float*, float*, int): Assertion `err == 0' failed.


The module is not the problem as I have other examples working fine which also warn me about the module. How can I find out what the problem is?

Thanks

 

Mark

Outcomes