Hi,
I am trying to run a simple example code in OpenCL which just returns device info and runs a very simple kernel. It is from the MacResearch tutorials on OpenCL:
#include <stdio.h>
#include <assert.h>
#include <sys/sysctl.h>
#include <sys/stat.h>
#include <stdlib.h>
//#include <stdio.h>
#include <iostream> //want to use cout
//#include <OpenCL/OpenCL.h>
#include <CL/cl.h>
#define NUM_DATA 100
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})
using namespace std;
#pragma mark -
#pragma mark Utilities
char * load_program_source(const char *filename)
{
struct stat statbuf;
FILE *fh;
char *source;
fh = fopen(filename, "r");
if (fh == 0)
return 0;
stat(filename, &statbuf);
source = (char *) malloc(statbuf.st_size + 1);
fread(source, statbuf.st_size, 1, fh);
source[statbuf.st_size] = '\0';
return source;
}
#pragma mark -
#pragma mark Main OpenCL Routine
int runCL(float * a, float * b, float * results, int n)
{
cl_program program[1];
cl_kernel kernel[2];
cl_command_queue cmd_queue;
cl_context context;
cl_device_id cpu = NULL, device = NULL;
cl_int err = 0;
size_t returned_size = 0;
size_t buffer_size;
cl_mem a_mem, b_mem, ans_mem;
// //Mark - going to get platform info first
// cl_platform_id platforms[100];
// cl_uint platforms_n = 0;
// CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
//
//
// printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
// for (int i=0; i<platforms_n; i++)
// {
// char buffer[10240];
// printf(" -- %d --\n", i);
// //CL_CHECK(clGetPlatformInfo(platforms, CL_PLATFORM_PROFILE, 10240, buffer, NULL));
// err=clGetPlatformInfo(platforms, CL_PLATFORM_PROFILE, 10240, buffer, NULL);
// printf(" PROFILE = %s\n", buffer);
// err=clGetPlatformInfo(platforms, CL_PLATFORM_VERSION, 10240, buffer, NULL);
// printf(" VERSION = %s\n", buffer);
// err=clGetPlatformInfo(platforms, CL_PLATFORM_NAME, 10240, buffer, NULL);
// printf(" NAME = %s\n", buffer);
// err=clGetPlatformInfo(platforms, CL_PLATFORM_VENDOR, 10240, buffer, NULL);
// printf(" VENDOR = %s\n", buffer);
// err=clGetPlatformInfo(platforms, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
// printf(" EXTENSIONS = %s\n", buffer);
// }
//
// if (platforms_n == 0)
// return 1;
//
// cl_device_id devices[100];
// cl_uint devices_n = 0;
// //CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
// CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
// cout << "here I found platforms_n=" <<platforms_n<< endl;
#pragma mark Device Information
{
// Find the CPU CL device, as a fallback
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);
//err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &cpu, NULL);
assert(err == CL_SUCCESS);
// Find the GPU CL device, this is what we really want
// If there is no GPU device is CL capable, fall back to CPU
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err != CL_SUCCESS) device = cpu;
assert(device);
cout << "found GPU" << endl;
// Get some information about the returned device
cl_char vendor_name[1024] = {0};
cl_char device_name[1024] = {0};
err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name),
vendor_name, &returned_size);
err |= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
device_name, &returned_size);
assert(err == CL_SUCCESS);
printf("Connecting to %s %s...\n", vendor_name, device_name);
}
#pragma mark Context and Command Queue
{
// Now create a context to perform our calculation with the
// specified device
context = clCreateContext(0, 1, &device, NULL, NULL, &err);
assert(err == CL_SUCCESS);
// And also a command queue for the context
cmd_queue = clCreateCommandQueue(context, device, 0, NULL);
}
#pragma mark Program and Kernel Creation
{
// Load the program source from disk
// The kernel/program is the project directory and in Xcode the executable
// is set to launch from that directory hence we use a relative path
const char * filename = "example.cl";
char *program_source = load_program_source(filename);
program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
NULL, &err);
assert(err == CL_SUCCESS);
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
assert(err == CL_SUCCESS);
// Now create the kernel "objects" that we want to use in the example file
kernel[0] = clCreateKernel(program[0], "add", &err);
}
#pragma mark Memory Allocation
{
// Allocate memory on the device to hold our data and store the results into
buffer_size = sizeof(float) * n;
// Input array a
a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);
err = clEnqueueWriteBuffer(cmd_queue, a_mem, CL_TRUE, 0, buffer_size,
(void*)a, 0, NULL, NULL);
// Input array b
b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, buffer_size, NULL, NULL);
err |= clEnqueueWriteBuffer(cmd_queue, b_mem, CL_TRUE, 0, buffer_size,
(void*)b, 0, NULL, NULL);
assert(err == CL_SUCCESS);
// Results array
ans_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, NULL);
// Get all of the stuff written and allocated
clFinish(cmd_queue);
}
#pragma mark Kernel Arguments
{
// Now setup the arguments to our kernel
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &a_mem);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &b_mem);
err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &ans_mem);
assert(err == CL_SUCCESS);
}
#pragma mark Execution and Read
{
// Run the calculation by enqueuing it and forcing the
// command queue to complete the task
size_t global_work_size = n;
err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,
&global_work_size, NULL, 0, NULL, NULL);
assert(err == CL_SUCCESS);
clFinish(cmd_queue);
// Once finished read back the results from the answer
// array into the results array
err = clEnqueueReadBuffer(cmd_queue, ans_mem, CL_TRUE, 0, buffer_size,
results, 0, NULL, NULL);
assert(err == CL_SUCCESS);
clFinish(cmd_queue);
}
#pragma mark Teardown
{
clReleaseMemObject(a_mem);
clReleaseMemObject(b_mem);
clReleaseMemObject(ans_mem);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
}
return CL_SUCCESS;
}
int main (int argc, const char * argv[]) {
// Problem size
int n = 32;
// Allocate some memory and a place for the results
float * a = (float *)malloc(n*sizeof(float));
float * b = (float *)malloc(n*sizeof(float));
float * results = (float *)malloc(n*sizeof(float));
// Fill in the values
for(int i=0;i<n;i++){
a = (float)i;
b = (float)n-i;
results = 0.f;
}
// Do the OpenCL calculation
runCL(a, b, results, n);
// Print out some results. For this example the values of all elements
// should be the same as the value of n
for(int i=0;i<n;i++) printf("%f\n",results);
// Free up memory
free(a);
free(b);
free(results);
return 0;
}
I am running Ubuntu 12.04 with an Intel i7 CPU and NVIDIA 560Ti GPU, with the latest drivers. For some reason the code returns an error at the very beginning, line 117, when it tries to get the CPU info. If I comment this out and try to find the GPU only, there is also an error (line 124). Here is the output:
FATAL: Module fglrx not found.
Error! Fail to load fglrx kernel module! Maybe you can switch to root user to load kernel module directly
test_opencl3: /home/mark/codes/test_opencl3/main.cpp:117: int runCL(float*, float*, float*, int): Assertion `err == 0' failed.
The module is not the problem as I have other examples working fine which also warn me about the module. How can I find out what the problem is?
Thanks
Mark
I believe it's because I'm not passing the platform information. Strange as this example comes from MacResearch's tutorials I would expect it to compile.
This was changed when ICD model was introduced. It was legal not to pass platform id.
I see, thanks.