AnsweredAssumed Answered

Enqueuing to device side queue in a loop issue

Question asked by doqtor on Oct 13, 2015
Latest reply on Oct 14, 2015 by dipak

In my code I have kernelA and kernelB. kernelB depends on kernelA results. I am iterating over this kernels thousand of times and each iteration depends on the results from the previous iteration.

 

 

Below is the minimal version reproducing the issue - enqueue_kernel is either returning -1 or hanging when size parameter from kernelLauncher is set to >= 513. Everything seems to be OK when size is set to <= 512. Is this code OK? Am I hitting some hardware limit here? (1025th kernel enqueue is failing).

 

__kernel  __attribute__((reqd_work_group_size(256, 1, 1)))
void kernelA(int index)
{}

__kernel  __attribute__((reqd_work_group_size(256, 1, 1)))
void kernelB(int index)
{}

__kernel  __attribute__((reqd_work_group_size(1, 1, 1)))
__kernel void kernelLauncher(int size,  __global int *err)
{
    queue_t default_queue = get_default_queue();
    clk_event_t ev1, ev2;
    int ret;

    for (int index = 0; index < size; ++index)
    {
        void(^fnKernelA)(void) = ^{ kernelA(index); };

        if (index == 0)
        {
            ret = enqueue_kernel(default_queue,
                CLK_ENQUEUE_FLAGS_NO_WAIT,
                ndrange_1D(3*256, 256),
                0, NULL, &ev1,
                fnKernelA);
        }
        else
        {
            ret = enqueue_kernel(default_queue,
                CLK_ENQUEUE_FLAGS_NO_WAIT,
                ndrange_1D(3 * 256, 256),
                1, &ev2, &ev1,
                fnKernelA);
        }

        if (ret != CLK_SUCCESS)
        {
            *err = ret;
            return;
        }

        void(^fnKernelB)(void) = ^{ kernelB(index); };

        ret = enqueue_kernel(default_queue,
            CLK_ENQUEUE_FLAGS_NO_WAIT,
            ndrange_1D(256, 256),
            1, &ev1, &ev2,
            fnKernelB);

        if (ret != CLK_SUCCESS)
        {
            *err = ret;
            return;
        }
    }
}

 

My set up:

Ubuntu 14.04, R9 295, fglrx 15.20.3, AMD APP SDK 3.0

Outcomes