izncu

Delay when using OpenCL API asyn write

Discussion created by izncu on Dec 4, 2019

Dear experts:

Sorry I do not know where to create issue, So make it here. 

my platform: Intel i3 CPU + AMD Radeon RX560 GPU

 

I want to use the overlap feature to design my code. But I find there is heavy delay so I cannot continue. I would paste a example to explain my issue.

In code 3 queues were created, and called 3 clEnqueueWriteBuffer API to write data blocks(Y,U,V data block) separately, Asychronously.  But it’s abnormal:

 

--------------------------

it can write, but why so heavy delay (~60ms)? it’s too long.

--------------------------

 

the code is below:

int main() //main_v2

{   

    cl_platform_id *platformIds;

    cl_device_id device; //TODO: extend for multi devices on platform

    //should release below after use

    cl_context ocl_ctx;

    cl_command_queue mvQueue;

    cl_command_queue yuvQueue[4];

    cl_command_queue queue;     //according to CUDA's default stream ?  

    int err = 0;

   

    platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id));

    err = clGetPlatformIDs(1, platformIds, NULL);

 

    err = clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_GPU, 1, &device, NULL);

    if (err != CL_SUCCESS) {

        printf("can't get gpu device, try cpu...\n");

        err = clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_CPU, 1, &device, NULL);     

    }

    //创建OpenCL Context

    ocl_ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &err);

    if (ocl_ctx == NULL) {

        printf("create OpenCL context fail\n");

        exit(EXIT_FAILURE);

    }

 

    //create command queue

     cl_queue_properties props[] = {

      CL_QUEUE_PROPERTIES,

      CL_QUEUE_PROFILING_ENABLE,

      0

    };  

    for(int i=0; i<4; i++)

    {

        yuvQueue[i] = clCreateCommandQueueWithProperties(ocl_ctx, device, NULL/*props*/, &err);

        if (yuvQueue[i] == NULL) {

            printf("create command queue fail %d\n",err);

            exit(EXIT_FAILURE);

        }  

    }

   

    //memory object

    cl_mem pCurFrameObj;

    char *orig_buffer = (char *)malloc(1920 * 1080 * 3);

    pCurFrameObj = clCreateBuffer(ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,

       1920 * 1080 * 3, orig_buffer, &err);

    //pCurFrameObj = clCreateBuffer(ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,

    //    1920 * 1080 * 3, NULL, &err);

 

    int i = 0;

    while(i++<3)   //test  3 Rounds.===================

    {

        unsigned char *y = (unsigned char *)malloc(HEIGHT * WIDTH);

        unsigned char *u = (unsigned char *)malloc(HEIGHT * WIDTH);

        unsigned char *v = (unsigned char *)malloc(HEIGHT * WIDTH);

 

        readFrameFromYUVFile("z:\\test_files\\test_mv.yuv", y, u, v, 0, WIDTH, HEIGHT);

       //call API to write data. each call write about 2MB.======================

        err = clEnqueueWriteBuffer(yuvQueue[0], pCurFrameObj, CL_FALSE, 0, 1080*1920, (void*)y, 0, NULL, NULL);

        err = clEnqueueWriteBuffer(yuvQueue[1], pCurFrameObj, CL_FALSE, 1080*1920, 1080*1920, (void*)u, 0, NULL, NULL);

        err = clEnqueueWriteBuffer(yuvQueue[2], pCurFrameObj, CL_FALSE, 1080*1920 * 2, 1080*1920, (void*)v, 0, NULL, NULL);

        //Sleep(10);

        //free(v);

        //free(u);

        //free(y);

    }

    Sleep(60);

 

    //free all opencl resources, like queues, memory objs, etc.

    clReleaseMemObject(pCurFrameObj);

    for(int i=0;i<4;i++)

        clReleaseCommandQueue(yuvQueue[0]);

    clReleaseContext(ocl_ctx);

   

    printf("Press anykey to quit...");

    getchar();

    return 0;

}

Attachments

Outcomes