1 Reply Latest reply on Nov 22, 2011 10:41 AM by Anon5710

    OpenCL error: clEnqueueNDRangeKernel(-54)

    Anon5710
      no idea where to start

      hello,

       

      I'm learning opencl and i tought i tried to make a simple vector add program. It fails with : OpenCL error: clEnqueueNDRangeKernel(-54).

      I have no idea what i am doing wrong. Anyone who could fix my error and explain to me why will have my gratitude :)

       

      Regards Anon5710

       

       

      #include <iostream> #include <vector> #include <cstdlib> #include <stdio.h> #define __CL_ENABLE_EXCEPTIONS #include <CL/cl.hpp> const std::string kernelSource = "__kernel void addparallel( __global int* A,\n" " const __global int* B,\n" " const __global int* C,\n" " const unsigned int N)\n" "{\n" " int i = get_global_id(0);\n" "\n" " if( i < N)\n" " A[i] = B[i] + C[i];\n" "}\n"; int main(int argc, char *argv[]) { try { // get platforms std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); std::cout << "Platform Profile : " << platforms.front().getInfo<CL_PLATFORM_PROFILE>() << std::endl; std::cout << "Platform version : " << platforms.front().getInfo<CL_PLATFORM_VERSION>() << std::endl; std::cout << "Platform name : " << platforms.front().getInfo<CL_PLATFORM_NAME>() << std::endl; std::cout << "Platform vendor : " << platforms.front().getInfo<CL_PLATFORM_VENDOR>() << std::endl; std::cout << "Platform extensions : " << platforms.front().getInfo<CL_PLATFORM_EXTENSIONS>() << "\n" << std::endl; //get gpu devices std::vector<cl::Device> devices; platforms.front().getDevices(CL_DEVICE_TYPE_GPU,&devices); std::cout << "Device name : " << devices.front().getInfo<CL_DEVICE_NAME>() << std::endl; std::cout << "Device type : " << devices.front().getInfo<CL_DEVICE_TYPE>() << std::endl; std::cout << "Device vendor ID : " << devices.front().getInfo<CL_DEVICE_VENDOR_ID>() << std::endl; std::cout << "Maximum compute units on device : " << devices.front().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << std::endl; std::cout << "Maximum work items demensions : " << devices.front().getInfo<CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS>() << std::endl; std::cout << "Maximum workgroup size : " << devices.front().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << std::endl; std::cout << "Maximum clock frequency : " << devices.front().getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>() << std::endl; std::cout << "Device vendor: " << devices.front().getInfo<CL_DEVICE_VENDOR>() << std::endl; std::cout << "Suported extensions: " << devices.front().getInfo<CL_DEVICE_EXTENSIONS>() << "\n" << std::endl; //create a context on the first device (GPU) cl::Context context(devices); //create a command cue associated with a context and device cl::Device& device = devices.front(); cl::CommandQueue cmdqueue(context , device ) ; //JIT compiling of kernel source code cl::Program::Sources sourceCode; sourceCode.push_back(std::make_pair( kernelSource.c_str(), kernelSource.size())); cl::Program program = cl::Program( context, sourceCode ); try { program.build(devices); } catch(cl::Error& err ) { std::cerr << "Buildingfailed, " << err.what() << "(" << err.err() << ")" << "\nRetrieving build log" << "\nBuild Log Follows\n" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG >(devices.front()); } //variabels int foo[1000]; int bar[1000]; int result[1000]; int i; //setting up for(i=0;i<100;i++) { foo[i] = i; bar[i] = i; } //setup opencl buffers for foo and bar //inputbuffer will be equivalent to foo : CL_MEM_COPY_HOST_PTR = flag to fill in buffer with data pointed to by &foo[0] //outputbuffer is just an empty buffer cl::Buffer inputBuffer1 ( context , CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR ,sizeof(cl_int)*1000 , &foo[0]); cl::Buffer inputBuffer2( context , CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof (cl_int )*1000, &bar[0]); cl::Buffer resultBuffer( context , CL_MEM_READ_WRITE , sizeof (cl_int )*1000); //starting kernel (parallel part ? ) cl::Kernel kernel(program , "addparallel" ) ; cl::KernelFunctor kernel_func = kernel.bind( cmdqueue ,cl::NDRange(1000), cl::NDRange(128)); kernel_func(resultBuffer,inputBuffer1,inputBuffer2,1000 ); //data retrieval ? cmdqueue.enqueueReadBuffer( resultBuffer , true , 0 , sizeof(cl_int)*1024 , &result[0]); for(i=0;i<1000;i++) { printf("%d + %d = %d\n",foo[i],bar[i],result[i]); } } catch(cl::Error& err) { std::cerr << "OpenCL error: " << err.what() << "(" << err.err() << ")" << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }