Dear experts:
Sorry I do not know where to create issue, So make it here.
my platform: Intel i3 CPU + AMD Radeon RX560 GPU
I want to use the overlap feature to design my code. But I find there is heavy delay so I cannot continue. I would paste a example to explain my issue.
In code 3 queues were created, and called 3 clEnqueueWriteBuffer API to write data blocks(Y,U,V data block) separately, Asychronously. But it’s abnormal:
--------------------------
it can write, but why so heavy delay (~60ms)? it’s too long.
--------------------------
the code is below:
int main() //main_v2
{
cl_platform_id *platformIds;
cl_device_id device; //TODO: extend for multi devices on platform
//should release below after use
cl_context ocl_ctx;
cl_command_queue mvQueue;
cl_command_queue yuvQueue[4];
cl_command_queue queue; //according to CUDA's default stream ?
int err = 0;
platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id));
err = clGetPlatformIDs(1, platformIds, NULL);
err = clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err != CL_SUCCESS) {
printf("can't get gpu device, try cpu...\n");
err = clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
}
//创建OpenCL Context
ocl_ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (ocl_ctx == NULL) {
printf("create OpenCL context fail\n");
exit(EXIT_FAILURE);
}
//create command queue
cl_queue_properties props[] = {
CL_QUEUE_PROPERTIES,
CL_QUEUE_PROFILING_ENABLE,
0
};
for(int i=0; i<4; i++)
{
yuvQueue = clCreateCommandQueueWithProperties(ocl_ctx, device, NULL/*props*/, &err);
if (yuvQueue == NULL) {
printf("create command queue fail %d\n",err);
exit(EXIT_FAILURE);
}
}
//memory object
cl_mem pCurFrameObj;
char *orig_buffer = (char *)malloc(1920 * 1080 * 3);
pCurFrameObj = clCreateBuffer(ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
1920 * 1080 * 3, orig_buffer, &err);
//pCurFrameObj = clCreateBuffer(ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
// 1920 * 1080 * 3, NULL, &err);
int i = 0;
while(i++<3) //test 3 Rounds.===================
{
unsigned char *y = (unsigned char *)malloc(HEIGHT * WIDTH);
unsigned char *u = (unsigned char *)malloc(HEIGHT * WIDTH);
unsigned char *v = (unsigned char *)malloc(HEIGHT * WIDTH);
readFrameFromYUVFile("z:\\test_files\\test_mv.yuv", y, u, v, 0, WIDTH, HEIGHT);
//call API to write data. each call write about 2MB.======================
err = clEnqueueWriteBuffer(yuvQueue[0], pCurFrameObj, CL_FALSE, 0, 1080*1920, (void*)y, 0, NULL, NULL);
err = clEnqueueWriteBuffer(yuvQueue[1], pCurFrameObj, CL_FALSE, 1080*1920, 1080*1920, (void*)u, 0, NULL, NULL);
err = clEnqueueWriteBuffer(yuvQueue[2], pCurFrameObj, CL_FALSE, 1080*1920 * 2, 1080*1920, (void*)v, 0, NULL, NULL);
//Sleep(10);
//free(v);
//free(u);
//free(y);
}
Sleep(60);
//free all opencl resources, like queues, memory objs, etc.
clReleaseMemObject(pCurFrameObj);
for(int i=0;i<4;i++)
clReleaseCommandQueue(yuvQueue[0]);
clReleaseContext(ocl_ctx);
printf("Press anykey to quit...");
getchar();
return 0;
}