Hello, everyone!
I was just working on opengl opencl interop stuff when I came across a weird bug I just could not understand, so I made a short (at least as short as I could make it ) testprogram to track the bug down and it still appeared. This is my code:
#include <stdio.h>
#include <stdlib.h>
#include <cstring>
#include "Cl/cl.h"
typedef struct {
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue commandQueue;
} clInterface;
cl_kernel kernel;
void checkError(cl_int err) {
if (err != CL_SUCCESS)
printf("Error with errorcode: %d\n", err);
}
clInterface initOpenCL() {
clInterface out;
cl_int err;
// Speichere 1 Plattform in platform
err = clGetPlatformIDs(1, &out.platform, NULL);
checkError(err);
printf("platform selected\n");
// Speichere 1 Device beliebigen Typs in device
err = clGetDeviceIDs(out.platform, CL_DEVICE_TYPE_ALL, 1, &out.device, NULL);
checkError(err);
printf("device selected\n");
// erzeuge Context fuer das Device device
out.context = clCreateContext(NULL, 1, &out.device, NULL, NULL, &err);
checkError(err);
printf("context created\n");
// erzeuge Command Queue zur Verwaltung von device
out.commandQueue = clCreateCommandQueue(out.context, out.device, 0, &err);
checkError(err);
printf("commandQueue created\n");
return out;
}
void printBuildLog(cl_program program, cl_device_id device) {
cl_int err;
char* build_log;
size_t build_log_size;
// Speichere den Build Log fuer program und device in build_log
err = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_size);
checkError(err);
build_log = (char*) malloc(build_log_size);
err = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, build_log_size, build_log, NULL);
checkError(err);
printf("Log:\n%s\n", build_log);
free(build_log);
}
cl_kernel makeKernel(clInterface interface, const char* kernelSource, const char* kernelName) {
cl_kernel out;
cl_int err;
// Laenge des Kernel Quellcodes
size_t sourceLength = strlen(kernelSource);
cl_program program;
// Ein Programm aus dem Kernel Quellcode wird erzeugt
program = clCreateProgramWithSource(interface.context, 1, &kernelSource, &sourceLength, &err);
checkError(err);
printf("program created\n");
// Das Programm wird fuer alle Devices des Contextes gebaut
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
printBuildLog(program, interface.device);
else
printf("program build successfully\n");
out = clCreateKernel(program, kernelName, &err);
checkError(err);
printf("kernel created\n");
return out;
}
int main(int argc, char** argv) {
clInterface interface = initOpenCL();
cl_int err = CL_SUCCESS;
const cl_image_format format = {CL_RGBA, CL_FLOAT};
const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D,
4,
4,
0,
1,
0,
0,
0,
0,
NULL };
cl_mem img = clCreateImage(interface.context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
cl_kernel kernel = makeKernel(interface, "\
__kernel void testImg(__write_only image2d_t img) { \
int x = get_global_id(0); \
int y = get_global_id(1); \
int2 pos = (x, y); \
float4 color = (float4)(x*0.1, y*0.1, 0, 1); \
write_imagef(img, pos, color); \
printf(\"x=%i, y=%i, rgba=%2.2v4f\\n\", x, y, color);\
}\
", "testImg");
err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
checkError(err);
printf("kernel arguments set\n");
size_t globalSize[] = {5, 5};
err = clEnqueueNDRangeKernel( interface.commandQueue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL);
checkError(err);
printf("kernel enqueued\n");
kernel = makeKernel(interface, "\
__kernel void testImg(__read_only image2d_t img) { \
int x = get_global_id(0); \
int y = get_global_id(1); \
int2 pos = (x, y); \
float4 color = read_imagef(img,pos); \
printf(\"x=%i, y=%i, rgba=%2.2v4f\\n\", x, y, color);\
}\
", "testImg");
err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
checkError(err);
printf("kernel arguments set\n");
err = clEnqueueNDRangeKernel( interface.commandQueue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL);
checkError(err); printf("kernel enqueued\n");
clFinish(interface.commandQueue);
printf("Finish!");
}
So basically I write some Colours in a 2dimage and read it afterwards. This is the output:
$ test.exe
platform selected
device selected
context created
commandQueue created
program created
program build successfully
kernel created
kernel arguments set
kernel enqueued
program created
x=0, y=0, rgba=0.00,0.00,0.00,1.00
x=1, y=0, rgba=0.10,0.00,0.00,1.00
x=2, y=0, rgba=0.20,0.00,0.00,1.00
x=3, y=0, rgba=0.30,0.00,0.00,1.00
x=4, y=0, rgba=0.40,0.00,0.00,1.00
x=0, y=1, rgba=0.00,0.10,0.00,1.00
x=1, y=1, rgba=0.10,0.10,0.00,1.00
x=2, y=1, rgba=0.20,0.10,0.00,1.00
x=3, y=1, rgba=0.30,0.10,0.00,1.00
x=4, y=1, rgba=0.40,0.10,0.00,1.00
x=0, y=2, rgba=0.00,0.20,0.00,1.00
x=1, y=2, rgba=0.10,0.20,0.00,1.00
x=2, y=2, rgba=0.20,0.20,0.00,1.00
x=3, y=2, rgba=0.30,0.20,0.00,1.00
x=4, y=2, rgba=0.40,0.20,0.00,1.00
x=0, y=3, rgba=0.00,0.30,0.00,1.00
program build successfully
kernel created
kernel arguments set
kernel enqueued
x=0, y=0, rgba=0.40,0.00,0.00,1.00
x=1, y=0, rgba=0.40,0.00,0.00,1.00
x=2, y=0, rgba=0.40,0.00,0.00,1.00
x=3, y=0, rgba=0.40,0.00,0.00,1.00
x=4, y=0, rgba=0.40,0.00,0.00,1.00
x=0, y=1, rgba=0.40,0.10,0.00,1.00
x=1, y=1, rgba=0.40,0.10,0.00,1.00
x=2, y=1, rgba=0.40,0.10,0.00,1.00
x=3, y=1, rgba=0.40,0.10,0.00,1.00
x=4, y=1, rgba=0.40,0.10,0.00,1.00
Finish!
I just don't understand whats going on there. First it seems to only start 16 workitems [edited here, oops, globalsize is 5x5...]. The second time only 10 workitems are started? Moreover, almost every Pixel, that is printed by the second kernel seems to have a wrong red-value.
I just updated my driver to version Catalyst Version 13.4 for my HD Radeon 7870, but that did not change anything.
Also, I am using minGW on Windows 7 to compile.
I would really appreciate some helpful advice
Greetings sylze
Are you running this on CPU Device (or) GPU device?
Can you run CodeXL and check how the launch configuration looks like?
Since 5 is a prime number, I believe the run-time will use a workgroup-size of (1x1) so that local size divides the global size dimension wise..
Apart from that, I don't understand why you are getting wrong values.
If you confirm the info above, I can download n check your code..
-
Bruha...
Oh, well... Turns out I forgot to put "(int2)" when declaring pos, oops. Now everything works .
Thanks for pointing out CodeXL. I did not know about it before and the KernelAnalyzer showed me that mistake.
However, it seems i get a bluescreen everytime I try to run above program and i call printf from the GPU inside a kernel. When I select the CPU as the device everything is fine. That seems strange...
Thank you very much for your help!
sylze