Using the c++ cl.hpp wrapper, saving a compiled kernel binary and loading it works in linux.
Under windows, we get:
Internal error: Input OpenCL binary is not for the target!
ERROR: clBuildProgram -11
Running the attached code as-is demostrates the problem.
The kernel executes in Windows if one changes the if(1) to if(0) to disable the binary load.
// g++ -o hpp -c -g -Wall -I/opt/AMDAPP/include -L/opt/AMDAPP/lib/x86_64 -lOpenCL hpp.cpp // cl /Fohpp.obj /c hpp.cpp /TP /nologo /EHsc /Zi /RTCs /Fd -DAMD "/IC:\Program Files (x86)\AMD APP\include" "/I C:\Program Files\Windows SDKs\Windows\v6.0AInclude" hpp.cpp // link /nologo /OUT:hpp.exe "/LIBPATH:C:\Program Files (x86)\AMD APP\lib\x86_64" OpenCL.lib hpp.obj #include <stdlib.h> // prototype: int main(int argc, char **argv); // #include <cstdio> // #include <cstdlib> #include <iostream> using namespace std; #define __CL_ENABLE_EXCEPTIONS // #define __NO_STD_STRING #define __NO_STD_VECTOR #include <CL/cl.hpp> const char * helloStr = "__kernel void hello(void) { } "; int main(int argc, char **argv) { cl_int err = CL_SUCCESS; try { cl::vector<cl::Platform> platforms; cl::Platform::get(&platforms); cout << "CL_PLATFORM_VENDOR = " << platforms[0].getInfo<CL_PLATFORM_VENDOR>() << "\n"; // choose 1st platform cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[ 0 ])(), 0 }; cl::Context context(CL_DEVICE_TYPE_GPU, properties); // or CL_DEVICE_TYPE_CPU; cl::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); cout << "CL_DEVICE_NAME = " << devices[0].getInfo<CL_DEVICE_NAME>() << "\n"; cl::Program::Sources source(1, make_pair(helloStr,strlen(helloStr))); cl::Program program = cl::Program(context, source); program.build(devices); if(1) { VECTOR_CLASS<size_t> programBinarySizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>(); VECTOR_CLASS<char*> programBinaries = program.getInfo<CL_PROGRAM_BINARIES>(); cl::Program::Binaries binaries(1, make_pair( (const void*) programBinaries[0], (size_t) programBinarySizes[0] ) ); program = cl::Program( context, devices, binaries); try { program.build(devices); cout << "program.build with binary successful\n"; } catch (cl::Error err) // print build log { cerr << "program.build with binary failed!\n"; cerr << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]); throw err; } } cl::Kernel kernel(program, "hello", &err); cl::CommandQueue cmdQ(context, devices[0], 0, &err); cl::Event event; cmdQ.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(4,4), cl::NDRange(2,2), NULL, &event); event.wait(); cout << "KERNEL \"" << helloStr << "\" EXECUTION SUCCESSFUL\n"; } catch (cl::Error err) { cerr << "ERROR: " << err.what() << " " << err.err() << "\n"; } return EXIT_SUCCESS; }
I revised the program so that it
Again, it works under linux, but fails under windows.
The modified source code, file hpp.cpp, is
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// g++ -o hpp -c -g -Wall -I/opt/AMDAPP/include -L/opt/AMDAPP/lib/x86_64 -lOpenCL hpp.cpp
// cl /Fohpp.obj /c hpp.cpp /TP /nologo /EHsc /Zi /RTCs /Fd -DAMD "/IC:\Program Files (x86)\AMD APP\include" "/I C:\Program Files\Windows SDKs\Windows\v6.0AInclude" hpp.cpp
// link /nologo /OUT:hpp.exe "/LIBPATH:C:\Program Files (x86)\AMD APP\lib\x86_64" OpenCL.lib hpp.obj
// #include <stdlib.h> // prototype: int main(int argc, char **argv);
#include <cstdlib>
#include <iostream>
using namespace std;
#define __CL_ENABLE_EXCEPTIONS
// #define __NO_STD_STRING
#define __NO_STD_VECTOR
#include <CL/cl.hpp>
const char * helloStr = "__kernel void hello(void) { } ";
int main(int argc, char **argv)
{
cout << "\n";
cl_int err = CL_SUCCESS;
try {
cl::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
cout << argv[0] << " CL_PLATFORM_VENDOR = " << platforms[0].getInfo<CL_PLATFORM_VENDOR>() << "\n";
// choose 1st platform
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[ 0 ])(), 0 };
cl::Context context(CL_DEVICE_TYPE_GPU, properties); // or CL_DEVICE_TYPE_CPU;
cl::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
cout << argv[0] << " CL_DEVICE_VENDOR = " << devices[0].getInfo <CL_DEVICE_VENDOR> () << "\n";
cout << argv[0] << " CL_DEVICE_NAME = " << devices[0].getInfo <CL_DEVICE_NAME> () << "\n";
cout << argv[0] << " CL_DEVICE_PROFILE = " << devices[0].getInfo <CL_DEVICE_PROFILE> () << "\n";
cout << argv[0] << " CL_DEVICE_VERSION = " << devices[0].getInfo <CL_DEVICE_VERSION> () << "\n";
cout << argv[0] << " CL_DEVICE_MAX_WORK_GROUP_SIZE = " << devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << "\n";
cl::Program::Sources source(1, make_pair(helloStr,strlen(helloStr)));
cl::Program program = cl::Program(context, source);
program.build(devices);
cl::Kernel kernelFromSource(program, "hello", &err);
cl::CommandQueue cmdQ(context, devices[0], 0, &err);
cl::Event event;
cmdQ.enqueueNDRangeKernel(kernelFromSource, cl::NullRange, cl::NDRange(4,4), cl::NDRange(2,2), NULL, &event);
event.wait();
cout << argv[0] << " KERNEL source compile \"" << helloStr << "\" EXECUTION SUCCESSFUL\n";
cout << argv[0] << " source compile program works - now grab binary and compile binary\n";
VECTOR_CLASS<size_t> programBinarySizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
cout << argv[0] << " binary size = " << programBinarySizes[0] << " bytes\n";
VECTOR_CLASS<char*> programBinaries = program.getInfo<CL_PROGRAM_BINARIES>();
cl::Program::Binaries binaries(1, make_pair( (const void*) programBinaries[0], (size_t) programBinarySizes[0] ) );
program = cl::Program( context, devices, binaries);
try
{
program.build(devices);
cout << argv[0] << " program.build with binary successful\n";
}
catch (cl::Error err) // print build log
{
cerr << argv[0] << " program.build with binary failed!\n";
cerr << argv[0] << " " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]);
throw err;
}
cl::Kernel kernelFromBinary(program, "hello", &err);
cmdQ.enqueueNDRangeKernel(kernelFromBinary, cl::NullRange, cl::NDRange(4,4), cl::NDRange(2,2), NULL, &event);
event.wait();
cout << argv[0] << " KERNEL binary compile \"" << helloStr << "\" EXECUTION SUCCESSFUL\n";
}
catch (cl::Error err) { cerr << argv[0] << " ERROR: " << err.what() << " " << err.err() << "\n"; }
return EXIT_SUCCESS;
}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
The output is
W I N D O W S
C:\...>scons && hpp2.exe
scons: Reading SConscript files ...
os.name= nt
sys.platform= win32
scons: done reading SConscript files.
scons: Building targets ...
cl /Fohpp2.obj /c hpp2.cpp /TP /nologo /EHsc /Zi /RTCs /Fd -DAMD "/IC:\Program Files (x86)\AMD APP\include" "/I C:\Program Files\Windows SDKs\Windows\v6.0AInclude" hpp2.cpp
link /nologo /OUT:hpp2.exe "/LIBPATH:C:\Program Files (x86)\AMD APP\lib\x86_64" OpenCL.lib hpp2.obj
scons: done building targets.
hpp2.exe CL_PLATFORM_VENDOR = Advanced Micro Devices, Inc.
hpp2.exe CL_DEVICE_VENDOR = Advanced Micro Devices, Inc.
hpp2.exe CL_DEVICE_NAME = Cayman
hpp2.exe CL_DEVICE_PROFILE = FULL_PROFILE
hpp2.exe CL_DEVICE_VERSION = OpenCL 1.1 AMD-APP (831.4)
hpp2.exe CL_DEVICE_MAX_WORK_GROUP_SIZE = 256
hpp2.exe KERNEL source compile "__kernel void hello(void) { } " EXECUTION SUCCESSFUL
hpp2.exe source compile program works - now grab binary and compile binary
hpp2.exe binary size = 15708 bytes
hpp2.exe program.build with binary failed!
hpp2.exe Internal error: Input OpenCL binary is not for the target!
hpp2.exe ERROR: clBuildProgram -11
C:\Users\greg\Documents\programming\attic>
L I N U X
$ scons hpp2 && ./hpp2
scons: Reading SConscript files ...
os.name= posix
os.uname= ('Linux', 'greg-home', '2.6.32-37-generic', '#81-Ubuntu SMP Fri Dec 2 20:32:42 UTC 2011', 'x86_64')
platform.dist= ('Ubuntu', '10.04', 'lucid')
sys.platform= linux2
scons: done reading SConscript files.
scons: Building targets ...
g++ -o hpp2.o -c -DAMD -g -Wall -I/opt/AMDAPP/include -I/ntfs/Users/greg/Documents/programming/boost hpp2.cpp
g++ -o hpp2 hpp2.o -L/opt/AMDAPP/lib/x86_64 -lOpenCL
scons: done building targets.
./hpp2 CL_PLATFORM_VENDOR = Advanced Micro Devices, Inc.
./hpp2 CL_DEVICE_VENDOR = Advanced Micro Devices, Inc.
./hpp2 CL_DEVICE_NAME = Cayman
./hpp2 CL_DEVICE_PROFILE = FULL_PROFILE
./hpp2 CL_DEVICE_VERSION = OpenCL 1.1 AMD-APP-SDK-v2.5 (684.213)
./hpp2 CL_DEVICE_MAX_WORK_GROUP_SIZE = 256
./hpp2 KERNEL source compile "__kernel void hello(void) { } " EXECUTION SUCCESSFUL
./hpp2 source compile program works - now grab binary and compile binary
./hpp2 binary size = 15392 bytes
./hpp2 program.build with binary successful
KERNEL binary compile "__kernel void hello(void) { } " EXECUTION SUCCESSFUL
$
Hi,
I have the same problem. I used the example code reported in "AMD APP SDK v2.3 Support for Binary OpenCL kernels". The binary kernel is generated and saved, but not loaded. The output is " Internal error: Input OpenCL binary is not for the target!"
Any suggestions?