3 Replies Latest reply on Jun 15, 2016 1:22 PM by nou

    OpenCL Interoperation with Interleaved OpenGL Buffers

    wwolff

      Hi!

      I´m spending a lot of time to solve a interoperation problem between OpenCL and OpenGL.

      Basically i setup in my project Both OpenCL and OpenGL , and everything is running lovelly, including the interoperation using the OpenCL examples.

      But...my problem is all this work fine if i use single buffers...(Like a float4 * buffer of vertices to be updated in OpenGL) , but when i use my interleaved buffers , i dont know why...only the first item of the buffer are updated and all other vertices are ignored.

      Can someone there help me with this issue?

       

      This is my Kernel Code:

      typedef struct ICLVertexBuffer

      {
          float4  position;
          float2  texCoord;
          float4  normal;
          float4  smooth;
          float4  tangente;
          float4  bitangente;
      } ICLVertexBuffer;
      inline float4 MultiplyMatrixVector(float16 m , float x , float y , float z)
      {
         float   rhw, _x, _y, _z;
         float4  resultado;
         rhw = (x * m.s3 + y * m.s7 + z * m.sb + m.sf);
         if (fabs(rhw) < 0.00001)
         {
            resultado.s0 = 0;
            resultado.s1 = 0;
            resultado.s2 = 0;
            return resultado;
         }
         rhw = 1.0f / rhw;
         _x = rhw * (x * m.s0 + y * m.s4 + z * m.s8 + m.sc);
         _y = rhw * (x * m.s1 + y * m.s5 + z * m.s9 + m.sd);
         _z = rhw * (x * m.s2 + y * m.s6 + z * m.sa + m.se);
         resultado.s0 = _x;
         resultado.s1 = _y;
         resultado.s2 = _z;
         return resultado;
      }
      __kernel void TransformGLBuffer( __global  ICLVertexBuffer* VertexBuffer,
                                       unsigned int              BufferSize  ,
                                       float16                   TransfMatrix)
      {
         local float4 vert;
         local float4 vert1;
         size_t id    = get_global_id(0);
         size_t index = id*sizeof(ICLVertexBuffer);
         vert1.x = VertexBuffer[index].position.x;
         vert1.y = VertexBuffer[index].position.y;
         vert1.z = VertexBuffer[index].position.z;
         vert =   MultiplyMatrixVector( TransfMatrix , vert1.x,
                                                       vert1.y,
                                                       vert1.z);
         VertexBuffer[index].position.x=vert.x;
         VertexBuffer[index].position.y=vert.y;
         VertexBuffer[index].position.z=vert.z;
      }

      The buffer inside of Vertex Buffer are:

      //Vertex Buffer Data

      attribute vec3 a_position;
      attribute vec2 a_texcoord;
      attribute vec3 a_normal;
      attribute vec3 a_smooth;
      attribute vec3 a_tangente;
      attribute vec3 a_bitangente;

      C++ Kernel Code to execute the interoperation:

      void CICLKernel::PreparaParametro(cl_context pICL_DeviceContext,uint pBufferID, uint pGLBufferSize, CIMTMatriz pTransfMatrix)

      {
          cl_int  ICL_Error;
          float * matriz = new float[16];
          ICL_BufferVertex = clCreateFromGLBuffer(pICL_DeviceContext, CL_MEM_WRITE_ONLY, pBufferID , &ICL_Error);
          if(ICL_Error != CL_SUCCESS)
          {
              qWarning() << "Não foi possivel Mapear o Buffer OpenGL.";
              exit(1);
          }
          //Seta o tamanho do buffer
          //ICL_GLBufferSize = pGLBufferSize;
          ICL_GLBufferSize = sizeof(ICLVertexBuffer);
          //Seta a matriz de transformação
          matriz           = pTransfMatrix;
          ICL_TransfMatrix.s0 = matriz[0];
          ICL_TransfMatrix.s1 = matriz[1];
          ICL_TransfMatrix.s2 = matriz[2];
          ICL_TransfMatrix.s3 = matriz[3];
          ICL_TransfMatrix.s4 = matriz[4];
          ICL_TransfMatrix.s5 = matriz[5];
          ICL_TransfMatrix.s6 = matriz[6];
          ICL_TransfMatrix.s7 = matriz[7];
          ICL_TransfMatrix.s8 = matriz[8];
          ICL_TransfMatrix.s9 = matriz[9];
          ICL_TransfMatrix.sa = matriz[10];
          ICL_TransfMatrix.sb = matriz[11];
          ICL_TransfMatrix.sc = matriz[12];
          ICL_TransfMatrix.sd = matriz[13];
          ICL_TransfMatrix.se = matriz[14];
          ICL_TransfMatrix.sf = matriz[15];
          //Seta o Parametro do Buffer
          ICL_Error = clSetKernelArg(ICL_KernelID,0,sizeof(cl_mem),(void*)&ICL_BufferVertex);
          if(ICL_Error != CL_SUCCESS)
          {
              qWarning() << "Não foi possivel atribuir buffer a Kernel.";
              exit(1);
          }
          //Seta parametro do tamanho do buffer
          ICL_Error = clSetKernelArg(ICL_KernelID,1,sizeof(cl_uint),(void *)&ICL_GLBufferSize);
          if(ICL_Error != CL_SUCCESS)
          {
              qWarning() << "Não foi possivel atribuir buffer a Kernel.";
              exit(1);
          }
          ICL_Error = clSetKernelArg(ICL_KernelID,2,sizeof(cl_float16),(void *)&ICL_TransfMatrix);
          if(ICL_Error != CL_SUCCESS)
          {
              qWarning() << "Não foi possivel atribuir buffer a Kernel.";
              exit(1);
          }
      }
      void CICLKernel::ExecutaKernel(cl_command_queue pICL_DeviceCommandQueue)
      {
         cl_int  ICL_Error;
         size_t globalWorkSize[1] = {ICL_GLBufferSize};
         size_t localWorkSize[2]  = {256, 1};
         // Adquire o Buffer GL
         ICL_Error = clEnqueueAcquireGLObjects(pICL_DeviceCommandQueue,1,&ICL_BufferVertex, 0, 0, NULL);
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
         cl_event  eventND[1];
         /*
         ICL_Error = clEnqueueNDRangeKernel(pICL_DeviceCommandQueue, ICL_KernelID, 1 , NULL, globalWorkSize ,NULL, 0, NULL , eventND );
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
         */
         ICL_Error = clEnqueueTask(pICL_DeviceCommandQueue, ICL_KernelID, 0 , NULL, NULL );
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
         
         ICL_Error = clWaitForEvents(1, &eventND[0]);
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
         ICL_Error = clFlush(pICL_DeviceCommandQueue);
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
         ICL_Error = clReleaseEvent(*eventND);
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
         
         ICL_Error = clEnqueueReleaseGLObjects(pICL_DeviceCommandQueue, 1, &ICL_BufferVertex, 0, 0, 0);
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
         ICL_Error = clFinish(pICL_DeviceCommandQueue);
         if(ICL_Error != CL_SUCCESS)
         {
             qWarning() << "Não foi possivel requisitar buffer a Kernel.";
             exit(1);
         }
      }

      I need help to identify why only one vertice are being updated, since the buffer have in this case more than 1 vertice to be processed?
      The transformation are correct and this single vertice are being properly transformed and rendered.
      I test not using the Interoperation and the update using glMapBuffer runs ok too.
      My only problem is identify why OpenCL are not processing ALL the vertices in the Buffer.

      Any help will be much appreciated.
      Kind Regards.