Archives Discussions

mosix0 · ‎01-08-2010

According to the OpenCL specifications, when running assynchroneous kernels, a call to clFlush() is only required when using several command queues:

Chapter 5.10 - "Flush and Finish" says:

To use event objects that refer to commands enqueued in a command-queue as event objects to wait on by commands enqueued in a different command-queue, the application must call a clFlush or any blocking commands that perform an implicit flush of the command-queue where the commands that refer to these event objects are enqueued.

However, the program below uses only one command-queue and the kernel never gets to actually run unless a "clFlush()" is issued. Run the program with no arguments - and it will enter an infinite loop waiting for a kernel that does not even start. Running it with any argument and it will complete.

Common-sense is that if a kernel is queued, it will eventually run!

Here is the program:


#include <cl.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>

char *src = "__kernel void add(__global int *A, __global int *B, __global int *C, int n){int i = get_global_id(0);C = A + B;}";

#define SIZE 60000

int
main(na, argv)
char *argv[];
{
 cl_platform_id platform;
 cl_context_properties cps[3];
 cl_context context;
 cl_command_queue queue;
 cl_device_id device;
 cl_program program;
 cl_mem ma, mb, mc;
 cl_kernel kernel;
 cl_event event;
 int n = SIZE;
 int A[SIZE], B[SIZE], C[SIZE];
 size_t sizes[3] = {SIZE, 0, 0};
 cl_int res;
 cl_int status;
 int do_flush = (na > 1);

 if(clGetPlatformIDs(1, &platform, NULL) != CL_SUCCESS)
 {
 fprintf(stderr, "Could not get a platform\n");
 exit(1);
 }
 cps[0] = CL_CONTEXT_PLATFORM;
 cps[1] = (cl_context_properties)platform;
 cps[2] = 0;
 if(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL)
 != CL_SUCCESS)
 {
 fprintf(stderr, "No device\n");
 exit(1);
 }
 if(!(context = clCreateContext(cps, 1, &device, NULL, NULL, NULL)))
 {
 fprintf(stderr, "No context\n");
 exit(1);
 }
 if(!(queue = clCreateCommandQueue(context, device,
 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, NULL)))
 {
 fprintf(stderr, "No queue\n");
 exit(1);
 }
 if(!(program = clCreateProgramWithSource(context, 1,
 (const char **)&src, NULL, NULL)))
 {
 fprintf(stderr, "No program\n");
 exit(1);
 }
 if(clBuildProgram(program, 0, NULL, "", NULL, NULL) != CL_SUCCESS)
 {
 printf("Program not built\n");
 exit(1);
 }
 if(!(kernel = clCreateKernel(program, "add", NULL)))
 {
 printf("No kernel\n");
 exit(1);
 }
 if(!(ma = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
 sizeof(A), A, NULL)) ||
 !(mb = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
 sizeof(B), B, NULL)) ||
 !(mc = clCreateBuffer(context, CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR,
 sizeof(C), C, NULL)))
 {
 fprintf(stderr, "Failed creating buffers\n");
 exit(1);
 }
 if(clSetKernelArg(kernel, 0, sizeof(ma), &ma) != CL_SUCCESS ||
 clSetKernelArg(kernel, 1, sizeof(mb), &mb) != CL_SUCCESS ||
 clSetKernelArg(kernel, 2, sizeof(mc), &mc) != CL_SUCCESS ||
 clSetKernelArg(kernel, 3, sizeof(n), &n) != CL_SUCCESS)
 fprintf(stderr, "Failed setting arg n\n");
 if((res = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, sizes,
 NULL, 0, NULL, &event)) != CL_SUCCESS)
 {
 fprintf(stderr, "Kernel not running, res=%d\n", res);
 exit(1);
 }
 do
 {
 res = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
 sizeof(cl_int), &status, NULL);
 if(res != CL_SUCCESS)
 {
 fprintf(stderr, "clGetEventInfo failed (%d)\n", (int)res);
 exit(1);
 }
 switch(status)
 {
 case CL_QUEUED:
 printf("Execution Status: Queued\n");
 break;
 case CL_SUBMITTED:
 printf("Execution Status: Submitted\n");
 break;
 case CL_RUNNING:
 printf("Execution Status: Running\n");
 break;
 case CL_COMPLETE:
 printf("Execution Status: Complete\n");
 break;
 default:
 printf("Execution Status: Error (%d)\n", status);
 break;
 }
 if(do_flush)
 clFlush(queue);
 }
 while(status != CL_COMPLETE && status >= 0);
 exit(0);
}

genaganna · ‎01-08-2010

You have to use clFinish or clWaitForEvents to complete commands or You should use blocking calls.

cstolarik · ‎02-02-2010

Can you please provide more detail? Why is it necessary to use a clWaitForEvents() or clFinish(), why can't a query on the event with clEventGetInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, ...) be used to determine when the kernel has completed?

All of the suggested methods are blocking in nature, is there any method of determining kernel completion in a non-blocking manner?

nou · ‎02-02-2010

well you shold use clFlush() and quering status of kernel execution. then it should work.

genaganna · ‎02-04-2010

Originally posted by: cstolarik Can you please provide more detail?

Steps to create, issue and complete OpenCL commands

1. Creating OpenCL commands(clEnqueue* calls)

2. Issuing OpenCL commands(clFlush)

3. Issuing and waiting to complete OpenCL commands.(blocking calls, clWaitForEvents, clFinish)

Please see OpenCL spec for more details of these API's

Why is it necessary to use a clWaitForEvents() or clFinish(),

After creating any OpenCL command you have to issue that command to assosiated device. See clFlush to issue all commands. Now you can use clGetEventInfo to know status of issued command.

why can't a query on the event with clEventGetInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, ...) be used to determine when the kernel has completed?

clGetEventInfo does not say any things about issuing and completing of event.

All of the suggested methods are blocking in nature, is there any method of determining kernel completion in a non-blocking manner?

clEnqueueNDRangeKernel is a non-blocking function.

Archives Discussions

Is clFlush() required?