3 Replies Latest reply on Dec 11, 2012 8:30 AM by mark77

    simple OpenCL example returns error

    mark77

      Hi,

       

      I am trying to run a simple example code in OpenCL which just returns device info and runs a very simple kernel. It is from the MacResearch tutorials on OpenCL:

       

      #include <stdio.h>
      #include <assert.h>
      #include <sys/sysctl.h>
      #include <sys/stat.h>
      #include <stdlib.h>
      //#include <stdio.h>
      #include <iostream> //want to use cout
      
      //#include <OpenCL/OpenCL.h>
      #include <CL/cl.h>
      
      #define NUM_DATA 100
      
      #define CL_CHECK(_expr)                                                         \
         do {                                                                         \
           cl_int _err = _expr;                                                       \
           if (_err == CL_SUCCESS)                                                    \
             break;                                                                   \
           fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err);   \
           abort();                                                                   \
         } while (0)
      
      #define CL_CHECK_ERR(_expr)                                                     \
         ({                                                                           \
           cl_int _err = CL_INVALID_VALUE;                                            \
           typeof(_expr) _ret = _expr;                                                \
           if (_err != CL_SUCCESS) {                                                  \
             fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
             abort();                                                                 \
           }                                                                          \
           _ret;                                                                      \
         })
      
      using namespace std;
      
      #pragma mark -
      #pragma mark Utilities
      char * load_program_source(const char *filename)
      {
      
          struct stat statbuf;
          FILE *fh;
          char *source;
      
          fh = fopen(filename, "r");
          if (fh == 0)
              return 0;
      
          stat(filename, &statbuf);
          source = (char *) malloc(statbuf.st_size + 1);
          fread(source, statbuf.st_size, 1, fh);
          source[statbuf.st_size] = '\0';
      
          return source;
      }
      
      #pragma mark -
      #pragma mark Main OpenCL Routine
      int runCL(float * a, float * b, float * results, int n)
      {
          cl_program program[1];
          cl_kernel kernel[2];
      
          cl_command_queue cmd_queue;
          cl_context   context;
      
          cl_device_id cpu = NULL, device = NULL;
      
          cl_int err = 0;
          size_t returned_size = 0;
          size_t buffer_size;
      
          cl_mem a_mem, b_mem, ans_mem;
      
      //    //Mark - going to get platform info first
      //    cl_platform_id platforms[100];
      //    cl_uint platforms_n = 0;
      //    CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
      //
      //
      //    printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
      //    for (int i=0; i<platforms_n; i++)
      //    {
      //        char buffer[10240];
      //        printf("  -- %d --\n", i);
      //        //CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
      //        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL);
      //        printf("  PROFILE = %s\n", buffer);
      //        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL);
      //        printf("  VERSION = %s\n", buffer);
      //        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL);
      //        printf("  NAME = %s\n", buffer);
      //        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL);
      //        printf("  VENDOR = %s\n", buffer);
      //        err=clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
      //        printf("  EXTENSIONS = %s\n", buffer);
      //    }
      //
      //    if (platforms_n == 0)
      //        return 1;
      //
      //    cl_device_id devices[100];
      //    cl_uint devices_n = 0;
      //    //CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
      //    CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
      // cout << "here I found platforms_n=" <<platforms_n<< endl;
      
      #pragma mark Device Information
          {
      
      
      
      
              // Find the CPU CL device, as a fallback
              err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);
              //err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);
              assert(err == CL_SUCCESS);
      
              // Find the GPU CL device, this is what we really want
              // If there is no GPU device is CL capable, fall back to CPU
              err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
              err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
              if (err != CL_SUCCESS) device = cpu;
              assert(device);
      cout << "found GPU" << endl;
              // Get some information about the returned device
              cl_char vendor_name[1024] = {0};
              cl_char device_name[1024] = {0};
              err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name),
                                    vendor_name, &returned_size);
              err |= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
                                    device_name, &returned_size);
              assert(err == CL_SUCCESS);
              printf("Connecting to %s %s...\n", vendor_name, device_name);
          }
      
      #pragma mark Context and Command Queue
          {
              // Now create a context to perform our calculation with the
              // specified device
              context = clCreateContext(0, 1, &device, NULL, NULL, &err);
              assert(err == CL_SUCCESS);
      
              // And also a command queue for the context
              cmd_queue = clCreateCommandQueue(context, device, 0, NULL);
          }
      
      #pragma mark Program and Kernel Creation
          {
              // Load the program source from disk
              // The kernel/program is the project directory and in Xcode the executable
              // is set to launch from that directory hence we use a relative path
              const char * filename = "example.cl";
              char *program_source = load_program_source(filename);
              program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
                                                     NULL, &err);
              assert(err == CL_SUCCESS);
      
              err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
              assert(err == CL_SUCCESS);
      
              // Now create the kernel "objects" that we want to use in the example file
              kernel[0] = clCreateKernel(program[0], "add", &err);
          }
      
      #pragma mark Memory Allocation
          {
              // Allocate memory on the device to hold our data and store the results into
              buffer_size = sizeof(float) * n;
      
              // Input array a
              a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);
              err = clEnqueueWriteBuffer(cmd_queue, a_mem, CL_TRUE, 0, buffer_size,
                                         (void*)a, 0, NULL, NULL);
      
              // Input array b
              b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);
              err |= clEnqueueWriteBuffer(cmd_queue, b_mem, CL_TRUE, 0, buffer_size,
                                          (void*)b, 0, NULL, NULL);
              assert(err == CL_SUCCESS);
      
              // Results array
              ans_mem    = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, NULL);
      
              // Get all of the stuff written and allocated
              clFinish(cmd_queue);
          }
      
      #pragma mark Kernel Arguments
          {
              // Now setup the arguments to our kernel
              err  = clSetKernelArg(kernel[0],  0, sizeof(cl_mem), &a_mem);
              err |= clSetKernelArg(kernel[0],  1, sizeof(cl_mem), &b_mem);
              err |= clSetKernelArg(kernel[0],  2, sizeof(cl_mem), &ans_mem);
              assert(err == CL_SUCCESS);
          }
      
      #pragma mark Execution and Read
          {
              // Run the calculation by enqueuing it and forcing the
              // command queue to complete the task
              size_t global_work_size = n;
              err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,
                                           &global_work_size, NULL, 0, NULL, NULL);
              assert(err == CL_SUCCESS);
              clFinish(cmd_queue);
      
              // Once finished read back the results from the answer
              // array into the results array
              err = clEnqueueReadBuffer(cmd_queue, ans_mem, CL_TRUE, 0, buffer_size,
                                        results, 0, NULL, NULL);
              assert(err == CL_SUCCESS);
              clFinish(cmd_queue);
          }
      
      #pragma mark Teardown
          {
              clReleaseMemObject(a_mem);
              clReleaseMemObject(b_mem);
              clReleaseMemObject(ans_mem);
      
              clReleaseCommandQueue(cmd_queue);
              clReleaseContext(context);
          }
          return CL_SUCCESS;
      }
      
      int main (int argc, const char * argv[]) {
      
          // Problem size
          int n = 32;
      
          // Allocate some memory and a place for the results
          float * a = (float *)malloc(n*sizeof(float));
          float * b = (float *)malloc(n*sizeof(float));
          float * results = (float *)malloc(n*sizeof(float));
      
          // Fill in the values
          for(int i=0;i<n;i++){
              a[i] = (float)i;
              b[i] = (float)n-i;
              results[i] = 0.f;
          }
      
          // Do the OpenCL calculation
          runCL(a, b, results, n);
      
          // Print out some results. For this example the values of all elements
          // should be the same as the value of n
          for(int i=0;i<n;i++) printf("%f\n",results[i]);
      
          // Free up memory
          free(a);
          free(b);
          free(results);
      
          return 0;
      }
      
      
      
      
      
      
      
      
      
      
      

       

      I am running Ubuntu 12.04 with an Intel i7 CPU and NVIDIA 560Ti GPU, with the latest drivers. For some reason the code returns an error at the very beginning, line 117, when it tries to get the CPU info. If I comment this out and try to find the GPU only, there is also an error (line 124). Here is the output:

      FATAL: Module fglrx not found.
      Error! Fail to load fglrx kernel module! Maybe you can switch to root user to load kernel module directly
      test_opencl3: /home/mark/codes/test_opencl3/main.cpp:117: int runCL(float*, float*, float*, int): Assertion `err == 0' failed.
      
      
      

      The module is not the problem as I have other examples working fine which also warn me about the module. How can I find out what the problem is?

      Thanks

       

      Mark