2 Replies Latest reply on May 18, 2012 2:20 AM by kahlan

    binary cl.hpp program.build fails in windows

    CaptGreg
      binary cl.hpp program.build fails in windows, works in linux

      Using the c++ cl.hpp wrapper, saving a compiled kernel binary and loading it works in linux.

      Under windows, we get:

      Internal error: Input OpenCL binary is not for the target!
      ERROR: clBuildProgram -11

      Running the attached code as-is demostrates the problem.

      The kernel executes in Windows if one changes the if(1) to if(0) to disable the binary load.

       

      // g++ -o hpp -c -g -Wall -I/opt/AMDAPP/include -L/opt/AMDAPP/lib/x86_64 -lOpenCL hpp.cpp // cl /Fohpp.obj /c hpp.cpp /TP /nologo /EHsc /Zi /RTCs /Fd -DAMD "/IC:\Program Files (x86)\AMD APP\include" "/I C:\Program Files\Windows SDKs\Windows\v6.0AInclude" hpp.cpp // link /nologo /OUT:hpp.exe "/LIBPATH:C:\Program Files (x86)\AMD APP\lib\x86_64" OpenCL.lib hpp.obj #include <stdlib.h> // prototype: int main(int argc, char **argv); // #include <cstdio> // #include <cstdlib> #include <iostream> using namespace std; #define __CL_ENABLE_EXCEPTIONS // #define __NO_STD_STRING #define __NO_STD_VECTOR #include <CL/cl.hpp> const char * helloStr = "__kernel void hello(void) { } "; int main(int argc, char **argv) { cl_int err = CL_SUCCESS; try { cl::vector<cl::Platform> platforms; cl::Platform::get(&platforms); cout << "CL_PLATFORM_VENDOR = " << platforms[0].getInfo<CL_PLATFORM_VENDOR>() << "\n"; // choose 1st platform cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[ 0 ])(), 0 }; cl::Context context(CL_DEVICE_TYPE_GPU, properties); // or CL_DEVICE_TYPE_CPU; cl::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); cout << "CL_DEVICE_NAME = " << devices[0].getInfo<CL_DEVICE_NAME>() << "\n"; cl::Program::Sources source(1, make_pair(helloStr,strlen(helloStr))); cl::Program program = cl::Program(context, source); program.build(devices); if(1) { VECTOR_CLASS<size_t> programBinarySizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>(); VECTOR_CLASS<char*> programBinaries = program.getInfo<CL_PROGRAM_BINARIES>(); cl::Program::Binaries binaries(1, make_pair( (const void*) programBinaries[0], (size_t) programBinarySizes[0] ) ); program = cl::Program( context, devices, binaries); try { program.build(devices); cout << "program.build with binary successful\n"; } catch (cl::Error err) // print build log { cerr << "program.build with binary failed!\n"; cerr << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]); throw err; } } cl::Kernel kernel(program, "hello", &err); cl::CommandQueue cmdQ(context, devices[0], 0, &err); cl::Event event; cmdQ.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(4,4), cl::NDRange(2,2), NULL, &event); event.wait(); cout << "KERNEL \"" << helloStr << "\" EXECUTION SUCCESSFUL\n"; } catch (cl::Error err) { cerr << "ERROR: " << err.what() << " " << err.err() << "\n"; } return EXIT_SUCCESS; }

        • Re: binary cl.hpp program.build fails in windows
          CaptGreg

          I revised the program so that it

          • prints some driver information
          • executes the compiled source program
          • saves the binary to an array
          • attempts to compile and execute the binary 

          Again, it works under linux, but fails under windows.

           

          The modified source code, file hpp.cpp, is

           

          ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

           

          // g++ -o hpp -c -g -Wall -I/opt/AMDAPP/include -L/opt/AMDAPP/lib/x86_64 -lOpenCL hpp.cpp

          // cl /Fohpp.obj /c hpp.cpp /TP /nologo /EHsc /Zi /RTCs /Fd -DAMD "/IC:\Program Files (x86)\AMD APP\include" "/I C:\Program Files\Windows SDKs\Windows\v6.0AInclude" hpp.cpp

          // link /nologo /OUT:hpp.exe "/LIBPATH:C:\Program Files (x86)\AMD APP\lib\x86_64" OpenCL.lib hpp.obj

           

           

          // #include <stdlib.h> // prototype: int main(int argc, char **argv);

          #include <cstdlib>

          #include <iostream>

          using namespace std;

           

          #define __CL_ENABLE_EXCEPTIONS

          // #define __NO_STD_STRING

          #define __NO_STD_VECTOR

          #include <CL/cl.hpp>

           

          const char * helloStr  = "__kernel void hello(void) {   } ";

           

          int main(int argc, char **argv)

          {

              cout << "\n";

               cl_int err = CL_SUCCESS;

               try {

                  cl::vector<cl::Platform>   platforms;

                  cl::Platform::get(&platforms);

                  cout << argv[0] << " CL_PLATFORM_VENDOR             = " << platforms[0].getInfo<CL_PLATFORM_VENDOR>() << "\n";

                  // choose 1st platform

                  cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[ 0 ])(), 0 };

                  cl::Context context(CL_DEVICE_TYPE_GPU, properties); // or CL_DEVICE_TYPE_CPU;

           

                  cl::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();

                  cout << argv[0] << " CL_DEVICE_VENDOR               = " << devices[0].getInfo <CL_DEVICE_VENDOR>            () << "\n";

                  cout << argv[0] << " CL_DEVICE_NAME                 = " << devices[0].getInfo <CL_DEVICE_NAME>              () << "\n";

                  cout << argv[0] << " CL_DEVICE_PROFILE              = " << devices[0].getInfo <CL_DEVICE_PROFILE>           () << "\n";

                  cout << argv[0] << " CL_DEVICE_VERSION              = " << devices[0].getInfo <CL_DEVICE_VERSION>           () << "\n";

                  cout << argv[0] << " CL_DEVICE_MAX_WORK_GROUP_SIZE  = " << devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << "\n";

           

           

                  cl::Program::Sources source(1, make_pair(helloStr,strlen(helloStr)));

                  cl::Program program = cl::Program(context, source);

                  program.build(devices);

                  cl::Kernel kernelFromSource(program, "hello", &err);

                  cl::CommandQueue cmdQ(context, devices[0], 0, &err);

                  cl::Event event;

                  cmdQ.enqueueNDRangeKernel(kernelFromSource, cl::NullRange, cl::NDRange(4,4), cl::NDRange(2,2), NULL, &event);

                  event.wait();

                  cout << argv[0] << " KERNEL source compile \"" << helloStr << "\" EXECUTION SUCCESSFUL\n";

           

                  cout << argv[0] << " source compile program works - now grab binary and compile binary\n";

                  VECTOR_CLASS<size_t> programBinarySizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();

                  cout << argv[0] << " binary size = " << programBinarySizes[0] << " bytes\n";

                  VECTOR_CLASS<char*>  programBinaries    = program.getInfo<CL_PROGRAM_BINARIES>();

           

                  cl::Program::Binaries binaries(1, make_pair( (const void*) programBinaries[0], (size_t) programBinarySizes[0] ) );

                  program = cl::Program( context, devices, binaries);

                  try

                  {

                      program.build(devices);

                      cout << argv[0] << " program.build with binary successful\n";

                  }

                  catch (cl::Error err) // print build log

                  {

                      cerr << argv[0] << " program.build with binary failed!\n";

                      cerr << argv[0] << " " <<  program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]);

                      throw err;

                  }

           

                  cl::Kernel kernelFromBinary(program, "hello", &err);

           

                  cmdQ.enqueueNDRangeKernel(kernelFromBinary, cl::NullRange, cl::NDRange(4,4), cl::NDRange(2,2), NULL, &event);

                  event.wait();

                  cout << argv[0] << " KERNEL binary compile \"" << helloStr << "\" EXECUTION SUCCESSFUL\n";

              }

              catch (cl::Error err) { cerr << argv[0] << " ERROR: " << err.what() << " " << err.err() << "\n"; }

           

              return EXIT_SUCCESS;

          }

           

          ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

          The output is

          W I N D O W S

                  C:\...>scons && hpp2.exe

                  scons: Reading SConscript files ...

                  os.name= nt

                  sys.platform= win32

                  scons: done reading SConscript files.

                  scons: Building targets ...

                  cl /Fohpp2.obj /c hpp2.cpp /TP /nologo /EHsc /Zi /RTCs /Fd -DAMD "/IC:\Program Files (x86)\AMD APP\include" "/I C:\Program Files\Windows SDKs\Windows\v6.0AInclude" hpp2.cpp

                  link /nologo /OUT:hpp2.exe "/LIBPATH:C:\Program Files (x86)\AMD APP\lib\x86_64" OpenCL.lib hpp2.obj

                  scons: done building targets.

           

                  hpp2.exe CL_PLATFORM_VENDOR             = Advanced Micro Devices, Inc.

                  hpp2.exe CL_DEVICE_VENDOR              = Advanced Micro Devices, Inc.

                  hpp2.exe CL_DEVICE_NAME                 = Cayman

                  hpp2.exe CL_DEVICE_PROFILE              = FULL_PROFILE

                  hpp2.exe CL_DEVICE_VERSION              = OpenCL 1.1 AMD-APP (831.4)

                  hpp2.exe CL_DEVICE_MAX_WORK_GROUP_SIZE  = 256

                  hpp2.exe KERNEL source compile "__kernel void hello(void) {   } " EXECUTION SUCCESSFUL

                  hpp2.exe source compile program works - now grab binary and compile binary

                  hpp2.exe binary size = 15708 bytes

                  hpp2.exe program.build with binary failed!

                  hpp2.exe Internal error: Input OpenCL binary is not for the target!

                  hpp2.exe ERROR: clBuildProgram -11

           

                  C:\Users\greg\Documents\programming\attic>

           

           

          L I N U X

                  $ scons hpp2 && ./hpp2

                  scons: Reading SConscript files ...

                  os.name= posix

                  os.uname= ('Linux', 'greg-home', '2.6.32-37-generic', '#81-Ubuntu SMP Fri Dec 2 20:32:42 UTC 2011', 'x86_64')

                  platform.dist= ('Ubuntu', '10.04', 'lucid')

                  sys.platform= linux2

                  scons: done reading SConscript files.

                  scons: Building targets ...

                  g++ -o hpp2.o -c -DAMD -g -Wall -I/opt/AMDAPP/include -I/ntfs/Users/greg/Documents/programming/boost hpp2.cpp

                  g++ -o hpp2 hpp2.o -L/opt/AMDAPP/lib/x86_64 -lOpenCL

                  scons: done building targets.

           

                  ./hpp2 CL_PLATFORM_VENDOR             = Advanced Micro Devices, Inc.

                  ./hpp2 CL_DEVICE_VENDOR               = Advanced Micro Devices, Inc.

                  ./hpp2 CL_DEVICE_NAME                 = Cayman

                  ./hpp2 CL_DEVICE_PROFILE              = FULL_PROFILE

                  ./hpp2 CL_DEVICE_VERSION              = OpenCL 1.1 AMD-APP-SDK-v2.5 (684.213)

                  ./hpp2 CL_DEVICE_MAX_WORK_GROUP_SIZE  = 256

                  ./hpp2 KERNEL source compile "__kernel void hello(void) {   } " EXECUTION SUCCESSFUL

                  ./hpp2 source compile program works - now grab binary and compile binary

                  ./hpp2 binary size = 15392 bytes

                  ./hpp2 program.build with binary successful

                  KERNEL binary compile "__kernel void hello(void) {   } " EXECUTION SUCCESSFUL

                  $