Archives Discussions

wwolff · ‎06-12-2016

Hi!

I´m spending a lot of time to solve a interoperation problem between OpenCL and OpenGL.

Basically i setup in my project Both OpenCL and OpenGL , and everything is running lovelly, including the interoperation using the OpenCL examples.

But...my problem is all this work fine if i use single buffers...(Like a float4 * buffer of vertices to be updated in OpenGL) , but when i use my interleaved buffers , i dont know why...only the first item of the buffer are updated and all other vertices are ignored.

Can someone there help me with this issue?

This is my Kernel Code:

typedef struct ICLVertexBuffer

    float4  position;

    float2  texCoord;

    float4  normal;

    float4  smooth;

    float4  tangente;

    float4  bitangente;

} ICLVertexBuffer;

inline float4 MultiplyMatrixVector(float16 m , float x , float y , float z)

   float   rhw, _x, _y, _z;

   float4  resultado;

   rhw = (x * m.s3 + y * m.s7 + z * m.sb + m.sf);

   if (fabs(rhw) < 0.00001)

      resultado.s0 = 0;

      resultado.s1 = 0;

      resultado.s2 = 0;

      return resultado;

   rhw = 1.0f / rhw;

   _x = rhw * (x * m.s0 + y * m.s4 + z * m.s8 + m.sc);

   _y = rhw * (x * m.s1 + y * m.s5 + z * m.s9 + m.sd);

   _z = rhw * (x * m.s2 + y * m.s6 + z * m.sa + m.se);

   resultado.s0 = _x;

   resultado.s1 = _y;

   resultado.s2 = _z;

   return resultado;

__kernel void TransformGLBuffer( __global  ICLVertexBuffer* VertexBuffer,

                                 unsigned int              BufferSize  ,

                                 float16                   TransfMatrix)

   local float4 vert;

   local float4 vert1;

   size_t id    = get_global_id(0);

   size_t index = id*sizeof(ICLVertexBuffer);

   vert1.x = VertexBuffer[index].position.x;

   vert1.y = VertexBuffer[index].position.y;

   vert1.z = VertexBuffer[index].position.z;

   vert =   MultiplyMatrixVector( TransfMatrix , vert1.x,

                                                 vert1.y,

                                                 vert1.z);

   VertexBuffer[index].position.x=vert.x;

   VertexBuffer[index].position.y=vert.y;

   VertexBuffer[index].position.z=vert.z;

The buffer inside of Vertex Buffer are:

//Vertex Buffer Data

attribute vec3 a_position;

attribute vec2 a_texcoord;

attribute vec3 a_normal;

attribute vec3 a_smooth;

attribute vec3 a_tangente;

attribute vec3 a_bitangente;

C++ Kernel Code to execute the interoperation:
void CICLKernel::PreparaParametro(cl_context pICL_DeviceContext,uint pBufferID, uint pGLBufferSize, CIMTMatriz pTransfMatrix) 
{
    cl_int  ICL_Error;
    float * matriz = new float[16];
    ICL_BufferVertex = clCreateFromGLBuffer(pICL_DeviceContext, CL_MEM_WRITE_ONLY, pBufferID , &ICL_Error);
    if(ICL_Error != CL_SUCCESS)
    {
        qWarning() << "Não foi possivel Mapear o Buffer OpenGL.";
        exit(1);
    }
    //Seta o tamanho do buffer
    //ICL_GLBufferSize = pGLBufferSize;
    ICL_GLBufferSize = sizeof(ICLVertexBuffer);
    //Seta a matriz de transformação
    matriz           = pTransfMatrix;
    ICL_TransfMatrix.s0 = matriz[0];
    ICL_TransfMatrix.s1 = matriz[1];
    ICL_TransfMatrix.s2 = matriz[2];
    ICL_TransfMatrix.s3 = matriz[3];
    ICL_TransfMatrix.s4 = matriz[4];
    ICL_TransfMatrix.s5 = matriz[5];
    ICL_TransfMatrix.s6 = matriz[6];
    ICL_TransfMatrix.s7 = matriz[7];
    ICL_TransfMatrix.s8 = matriz[8];
    ICL_TransfMatrix.s9 = matriz[9];
    ICL_TransfMatrix.sa = matriz[10];
    ICL_TransfMatrix.sb = matriz[11];
    ICL_TransfMatrix.sc = matriz[12];
    ICL_TransfMatrix.sd = matriz[13];
    ICL_TransfMatrix.se = matriz[14];
    ICL_TransfMatrix.sf = matriz[15];
    //Seta o Parametro do Buffer
    ICL_Error = clSetKernelArg(ICL_KernelID,0,sizeof(cl_mem),(void*)&ICL_BufferVertex);
    if(ICL_Error != CL_SUCCESS)
    {
        qWarning() << "Não foi possivel atribuir buffer a Kernel.";
        exit(1);
    }
    //Seta parametro do tamanho do buffer
    ICL_Error = clSetKernelArg(ICL_KernelID,1,sizeof(cl_uint),(void *)&ICL_GLBufferSize);
    if(ICL_Error != CL_SUCCESS)
    {
        qWarning() << "Não foi possivel atribuir buffer a Kernel.";
        exit(1);
    }
    ICL_Error = clSetKernelArg(ICL_KernelID,2,sizeof(cl_float16),(void *)&ICL_TransfMatrix);
    if(ICL_Error != CL_SUCCESS)
    {
        qWarning() << "Não foi possivel atribuir buffer a Kernel.";
        exit(1);
    }
}
void CICLKernel::ExecutaKernel(cl_command_queue pICL_DeviceCommandQueue)
{
   cl_int  ICL_Error;
   size_t globalWorkSize[1] = {ICL_GLBufferSize};
   size_t localWorkSize[2]  = {256, 1};
   // Adquire o Buffer GL
   ICL_Error = clEnqueueAcquireGLObjects(pICL_DeviceCommandQueue,1,&ICL_BufferVertex, 0, 0, NULL);
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
   cl_event  eventND[1];
   /*
   ICL_Error = clEnqueueNDRangeKernel(pICL_DeviceCommandQueue, ICL_KernelID, 1 , NULL, globalWorkSize ,NULL, 0, NULL , eventND );
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
   */
   ICL_Error = clEnqueueTask(pICL_DeviceCommandQueue, ICL_KernelID, 0 , NULL, NULL );
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
   
   ICL_Error = clWaitForEvents(1, &eventND[0]);
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
   ICL_Error = clFlush(pICL_DeviceCommandQueue);
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
   ICL_Error = clReleaseEvent(*eventND);
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
   
   ICL_Error = clEnqueueReleaseGLObjects(pICL_DeviceCommandQueue, 1, &ICL_BufferVertex, 0, 0, 0);
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
   ICL_Error = clFinish(pICL_DeviceCommandQueue);
   if(ICL_Error != CL_SUCCESS)
   {
       qWarning() << "Não foi possivel requisitar buffer a Kernel.";
       exit(1);
   }
}

I need help to identify why only one vertice are being updated, since the buffer have in this case more than 1 vertice to be processed?
The transformation are correct and this single vertice are being properly transformed and rendered.
I test not using the Interoperation and the update using glMapBuffer runs ok too.
My only problem is identify why OpenCL are not processing ALL the vertices in the Buffer.

Any help will be much appreciated.
Kind Regards.

nou · ‎06-14-2016

Of course it process only one item when you use clEnqueueTask(). It is equal to clEnqueueNDRange with global work size set to 1.

Why do you have this line in kernel code?

size_t index = id*sizeof(ICLVertexBuffer);

wwolff · ‎06-14-2016

Hi!

Thanks so much for the response.

Well...i try use clEnqueueNDRange too using different values at the

Global work size , and the results still the same.

Can you have an example of a code using interleaved OpenGL buffers?

I only find examples updating images and float* buffers.

Any help will be much appreciated.

Kind Regards.

nou · ‎06-15-2016

My second question still stand. Why do you multiply id by sizeof() ICLVertexBuffer?

Archives Discussions

OpenCL Interoperation with Interleaved OpenGL Buffers