On Catalyst 14.12, the executeNDRangeKernel crashes with Segmentation fault, if all three conditions are met:
1. The kernel is run on GPU (Radeon R9 290x);
2. The kernel uses printf function;
3. The kernel is built from binary.
The test source is attached and displayed here:
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <string>
#include <algorithm>
#include <iostream>
using namespace cl;
int main()
{
std::vector<Platform> platformList;
Platform::get(&platformList);
cl_context_properties cprops[] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platformList[0])(),
0};
Context context(CL_DEVICE_TYPE_GPU, cprops);
std::vector<Device> devices;
devices = context.getInfo<CL_CONTEXT_DEVICES>();
if (devices.size() == 0) {
std::cerr << "No GPU devices found\n";
exit(-1);
}
devices.erase(devices.begin()+1, devices.end()); // pick the first device
// std::string source = "kernel void test() { }\n"; // this is OK
std::string source = "kernel void test() { printf(\"Hello world %d\\n\",0);}\n";
Program program(context, source);
program.build(devices, "");
std::vector<::size_t> sizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
std::vector<char *> binaries = program.getInfo<CL_PROGRAM_BINARIES>();
Program::Binaries binaries1;
binaries1.push_back(std::pair<void*,::size_t>(binaries[0], sizes[0]));
Program program1(context, devices, binaries1);
program1.build(devices,"");
Kernel kernel(program1, "test"); // OK when using 'program' instead of 'program1'
CommandQueue queue(context, devices[0], CL_QUEUE_PROFILING_ENABLE);
Event event;
queue.enqueueNDRangeKernel(kernel, NullRange, NDRange(1,1,1), NDRange(1,1,1), NULL, &event);
event.wait();
}
Linux 64 bit. This was working fine on Catalyst 14.9.