Using clinfo, I discovered that the RX5700XT supports OpenCL 2.1. So I decided to try out support for SPIR-V IL loading. However, I have run into some issues. Can anyone help point out my error.
simple_add.cl
__kernel void add(__global float *a, __global float *b, __global float *output) {
int id = get_global_id(0);
output[id] = a[id] + b[id];
}
program.c
#define CL_TARGET_OPENCL_VERSION 210
#include <CL/cl.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
int read_source(const char *file_name, char **source_code, size_t *length) {
FILE *file = fopen(file_name, "rb");
fseek(file, -1, SEEK_END);
long int offset = ftell(file);
rewind(file);
*source_code = malloc((offset+1)*sizeof(char));
fread(*source_code, sizeof(char), offset, file);
(*source_code)[offset] = '\0';
*length = offset;
fclose(file);
return 0;
}
typedef struct {
cl_context context;
cl_command_queue command_queue;
cl_program program;
cl_kernel kernel;
cl_int err;
cl_uint num_of_platforms;
cl_platform_id platform_id;
cl_platform_id *platform_ids;
cl_device_id device_id;
cl_uint num_of_devices;
} OCL_Meta;
int main() {
OCL_Meta meta = {0};
clGetPlatformIDs(0, NULL, &meta.num_of_platforms);
meta.platform_ids = malloc(meta.num_of_platforms * sizeof(cl_platform_id));
if (clGetPlatformIDs(meta.num_of_platforms, meta.platform_ids, NULL) != CL_SUCCESS) {
printf("Unable to get any platform_ids\n");
return 1;
}
bool found_desired_platform = false;
const char* desired_platform = "AMD Accelerated Parallel Processing";
//const char* desired_platform = "Portable Computing Language";
for (cl_uint i = 0; i < meta.num_of_platforms; i++) {
cl_platform_id platform = meta.platform_ids[i];
size_t string_len = 0;
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &string_len);
char *string = malloc(string_len + 1);
string[string_len] = '\0';
clGetPlatformInfo(platform, CL_PLATFORM_NAME, string_len+1, string, NULL);
if (strcmp(desired_platform, string) == 0) {
found_desired_platform = true;
meta.platform_id = platform;
free(string);
break;
} else {
free(string);
}
}
if (!found_desired_platform) {
printf("%s device platform not found, exiting\n", desired_platform);
return 1;
} else {
printf("%s device platform found and selected\n", desired_platform);
}
// NOTE assumes only one device per platform
if (clGetDeviceIDs(meta.platform_id, CL_DEVICE_TYPE_ALL, 1, &meta.device_id, &meta.num_of_devices)
!= CL_SUCCESS) {
printf("Unable to get device_id\n");
return 1;
}
cl_context_properties properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) meta.platform_id, 0 };
meta.context = clCreateContext(properties, 1, &meta.device_id, NULL, NULL, &meta.err);
meta.command_queue = clCreateCommandQueueWithProperties(meta.context, meta.device_id, NULL, &meta.err);
char *kernel_il = NULL;
size_t kernel_il_len = 0;
read_source("simple_add.spv", &kernel_il, &kernel_il_len);
meta.program = clCreateProgramWithIL(meta.context, kernel_il, kernel_il_len, &meta.err);
if ((meta.err = clBuildProgram(meta.program, 0, NULL, NULL, NULL, NULL)) != CL_SUCCESS) {
printf("Err: %i\n", meta.err);
return 1;
}
meta.kernel = clCreateKernel(meta.program, "simple_add", &meta.err);
const size_t work_size = 16;
float a[work_size];
float b[work_size];
for(int i = 0; i < work_size; i++) {
a[i] = i+1;
b[i] = work_size - i;
}
cl_mem a_buf = clCreateBuffer(meta.context, CL_MEM_READ_ONLY,
sizeof(float)*work_size, NULL, NULL);
cl_mem b_buf = clCreateBuffer(meta.context, CL_MEM_READ_ONLY,
sizeof(float)*work_size, NULL, NULL);
cl_mem out_buf = clCreateBuffer(meta.context, CL_MEM_READ_ONLY,
sizeof(float)*work_size, NULL, NULL);
clEnqueueWriteBuffer(meta.command_queue, a_buf, CL_TRUE, 0,
sizeof(float)*work_size, a, 0, NULL, NULL);
clEnqueueWriteBuffer(meta.command_queue, b_buf, CL_TRUE, 0,
sizeof(float)*work_size, b, 0, NULL, NULL);
clSetKernelArg(meta.kernel, 0, sizeof(cl_mem), &a_buf);
clSetKernelArg(meta.kernel, 1, sizeof(cl_mem), &b_buf);
clSetKernelArg(meta.kernel, 2, sizeof(cl_mem), &out_buf);
meta.err = clEnqueueNDRangeKernel(meta.command_queue, meta.kernel, 1, NULL,
&work_size, NULL, 0, NULL, NULL);
printf("err: %i\n", meta.err);
clFlush(meta.command_queue);
clFinish(meta.command_queue);
float c[work_size];
clEnqueueReadBuffer(meta.command_queue, out_buf, CL_TRUE, 0,
sizeof(float)*work_size, c, 0, NULL, NULL);
printf("a: ");
for (int i = 0; i < work_size; i++) {
printf("%.2f ", a[i]);
}
printf("\n");
printf("b: ");
for (int i = 0; i < work_size; i++) {
printf("%.2f ", b[i]);
}
printf("\n");
printf("a+b: ");
for (int i = 0; i < work_size; i++) {
printf("%.2f ", c[i]);
}
printf("\n");
return 0;
}
Build Commands
clang -cc1 -triple spir64-unknown-unknown -finclude-default-header -x cl -std=cl2.0 simple_add.cl -O0 -emit-llvm-bc -o simple_add.bc
llvm-spirv simple_add.bc -o simple_add.spv
gcc program.c -lOpenC
I keep gettting error code -48 when running the kernel with clEnqueqeNDRangeKernel which according to https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ means that the kernel is invalid.
Is there any compiling options or program functions that I need to call or call differently to make this work. I tried searching the internet but found no solutions.