cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

mark77
Journeyman III

simple OpenCL example returns error

Hi,

I am trying to run a simple example code in OpenCL which just returns device info and runs a very simple kernel. It is from the MacResearch tutorials on OpenCL:

#include <stdio.h>

#include <assert.h>

#include <sys/sysctl.h>

#include <sys/stat.h>

#include <stdlib.h>

//#include <stdio.h>

#include <iostream> //want to use cout

//#include <OpenCL/OpenCL.h>

#include <CL/cl.h>

#define NUM_DATA 100

#define CL_CHECK(_expr)                                                         \

   do {                                                                         \

     cl_int _err = _expr;                                                       \

     if (_err == CL_SUCCESS)                                                    \

       break;                                                                   \

     fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err);   \

     abort();                                                                   \

   } while (0)

#define CL_CHECK_ERR(_expr)                                                     \

   ({                                                                           \

     cl_int _err = CL_INVALID_VALUE;                                            \

     typeof(_expr) _ret = _expr;                                                \

     if (_err != CL_SUCCESS) {                                                  \

       fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \

       abort();                                                                 \

     }                                                                          \

     _ret;                                                                      \

   })

using namespace std;

#pragma mark -

#pragma mark Utilities

char * load_program_source(const char *filename)

{

    struct stat statbuf;

    FILE *fh;

    char *source;

    fh = fopen(filename, "r");

    if (fh == 0)

        return 0;

    stat(filename, &statbuf);

    source = (char *) malloc(statbuf.st_size + 1);

    fread(source, statbuf.st_size, 1, fh);

    source[statbuf.st_size] = '\0';

    return source;

}

#pragma mark -

#pragma mark Main OpenCL Routine

int runCL(float * a, float * b, float * results, int n)

{

    cl_program program[1];

    cl_kernel kernel[2];

    cl_command_queue cmd_queue;

    cl_context   context;

    cl_device_id cpu = NULL, device = NULL;

    cl_int err = 0;

    size_t returned_size = 0;

    size_t buffer_size;

    cl_mem a_mem, b_mem, ans_mem;

//    //Mark - going to get platform info first

//    cl_platform_id platforms[100];

//    cl_uint platforms_n = 0;

//    CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));

//

//

//    printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);

//    for (int i=0; i<platforms_n; i++)

//    {

//        char buffer[10240];

//        printf("  -- %d --\n", i);

//        //CL_CHECK(clGetPlatformInfo(platforms, CL_PLATFORM_PROFILE, 10240, buffer, NULL));

//        err=clGetPlatformInfo(platforms, CL_PLATFORM_PROFILE, 10240, buffer, NULL);

//        printf("  PROFILE = %s\n", buffer);

//        err=clGetPlatformInfo(platforms, CL_PLATFORM_VERSION, 10240, buffer, NULL);

//        printf("  VERSION = %s\n", buffer);

//        err=clGetPlatformInfo(platforms, CL_PLATFORM_NAME, 10240, buffer, NULL);

//        printf("  NAME = %s\n", buffer);

//        err=clGetPlatformInfo(platforms, CL_PLATFORM_VENDOR, 10240, buffer, NULL);

//        printf("  VENDOR = %s\n", buffer);

//        err=clGetPlatformInfo(platforms, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);

//        printf("  EXTENSIONS = %s\n", buffer);

//    }

//

//    if (platforms_n == 0)

//        return 1;

//

//    cl_device_id devices[100];

//    cl_uint devices_n = 0;

//    //CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));

//    CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));

// cout << "here I found platforms_n=" <<platforms_n<< endl;

#pragma mark Device Information

    {

        // Find the CPU CL device, as a fallback

        err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);

        //err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);

        assert(err == CL_SUCCESS);

        // Find the GPU CL device, this is what we really want

        // If there is no GPU device is CL capable, fall back to CPU

        err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

        err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

        if (err != CL_SUCCESS) device = cpu;

        assert(device);

cout << "found GPU" << endl;

        // Get some information about the returned device

        cl_char vendor_name[1024] = {0};

        cl_char device_name[1024] = {0};

        err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name),

                              vendor_name, &returned_size);

        err |= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),

                              device_name, &returned_size);

        assert(err == CL_SUCCESS);

        printf("Connecting to %s %s...\n", vendor_name, device_name);

    }

#pragma mark Context and Command Queue

    {

        // Now create a context to perform our calculation with the

        // specified device

        context = clCreateContext(0, 1, &device, NULL, NULL, &err);

        assert(err == CL_SUCCESS);

        // And also a command queue for the context

        cmd_queue = clCreateCommandQueue(context, device, 0, NULL);

    }

#pragma mark Program and Kernel Creation

    {

        // Load the program source from disk

        // The kernel/program is the project directory and in Xcode the executable

        // is set to launch from that directory hence we use a relative path

        const char * filename = "example.cl";

        char *program_source = load_program_source(filename);

        program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,

                                               NULL, &err);

        assert(err == CL_SUCCESS);

        err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);

        assert(err == CL_SUCCESS);

        // Now create the kernel "objects" that we want to use in the example file

        kernel[0] = clCreateKernel(program[0], "add", &err);

    }

#pragma mark Memory Allocation

    {

        // Allocate memory on the device to hold our data and store the results into

        buffer_size = sizeof(float) * n;

        // Input array a

        a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);

        err = clEnqueueWriteBuffer(cmd_queue, a_mem, CL_TRUE, 0, buffer_size,

                                   (void*)a, 0, NULL, NULL);

        // Input array b

        b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);

        err |= clEnqueueWriteBuffer(cmd_queue, b_mem, CL_TRUE, 0, buffer_size,

                                    (void*)b, 0, NULL, NULL);

        assert(err == CL_SUCCESS);

        // Results array

        ans_mem    = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, NULL);

        // Get all of the stuff written and allocated

        clFinish(cmd_queue);

    }

#pragma mark Kernel Arguments

    {

        // Now setup the arguments to our kernel

        err  = clSetKernelArg(kernel[0],  0, sizeof(cl_mem), &a_mem);

        err |= clSetKernelArg(kernel[0],  1, sizeof(cl_mem), &b_mem);

        err |= clSetKernelArg(kernel[0],  2, sizeof(cl_mem), &ans_mem);

        assert(err == CL_SUCCESS);

    }

#pragma mark Execution and Read

    {

        // Run the calculation by enqueuing it and forcing the

        // command queue to complete the task

        size_t global_work_size = n;

        err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,

                                     &global_work_size, NULL, 0, NULL, NULL);

        assert(err == CL_SUCCESS);

        clFinish(cmd_queue);

        // Once finished read back the results from the answer

        // array into the results array

        err = clEnqueueReadBuffer(cmd_queue, ans_mem, CL_TRUE, 0, buffer_size,

                                  results, 0, NULL, NULL);

        assert(err == CL_SUCCESS);

        clFinish(cmd_queue);

    }

#pragma mark Teardown

    {

        clReleaseMemObject(a_mem);

        clReleaseMemObject(b_mem);

        clReleaseMemObject(ans_mem);

        clReleaseCommandQueue(cmd_queue);

        clReleaseContext(context);

    }

    return CL_SUCCESS;

}

int main (int argc, const char * argv[]) {

    // Problem size

    int n = 32;

    // Allocate some memory and a place for the results

    float * a = (float *)malloc(n*sizeof(float));

    float * b = (float *)malloc(n*sizeof(float));

    float * results = (float *)malloc(n*sizeof(float));

    // Fill in the values

    for(int i=0;i<n;i++){

        a = (float)i;

        b = (float)n-i;

        results = 0.f;

    }

    // Do the OpenCL calculation

    runCL(a, b, results, n);

    // Print out some results. For this example the values of all elements

    // should be the same as the value of n

    for(int i=0;i<n;i++) printf("%f\n",results);

    // Free up memory

    free(a);

    free(b);

    free(results);

    return 0;

}

I am running Ubuntu 12.04 with an Intel i7 CPU and NVIDIA 560Ti GPU, with the latest drivers. For some reason the code returns an error at the very beginning, line 117, when it tries to get the CPU info. If I comment this out and try to find the GPU only, there is also an error (line 124). Here is the output:

FATAL: Module fglrx not found.

Error! Fail to load fglrx kernel module! Maybe you can switch to root user to load kernel module directly

test_opencl3: /home/mark/codes/test_opencl3/main.cpp:117: int runCL(float*, float*, float*, int): Assertion `err == 0' failed.

The module is not the problem as I have other examples working fine which also warn me about the module. How can I find out what the problem is?

Thanks

Mark

0 Likes
3 Replies
mark77
Journeyman III

I believe it's because I'm not passing the platform information. Strange as this example comes from MacResearch's tutorials I would expect it to compile.

0 Likes

This was changed when ICD model was introduced. It was legal not to pass platform id.

mark77
Journeyman III

I see, thanks.

0 Likes