6 Replies Latest reply on Apr 21, 2010 10:18 AM by omkaranathan

    openCL cl_mem object value not consistent. PLEASE HELP

    zoalord12

      i have a global cl_mem object called htt.

       

      htt is a single dimensional array .. but its used to represent a 3D array

      of size [2][LM+2][M_BLOCK]

      thus its size is 2 * (LM+2) * M_BLOCK * sizeod(cl_float)

      after i get it back .. i use a 3d offset inside the braces [  ]

       

      I use it in multiple kernels . the problem is that the value is not passed consistently to the other kernels.

       

      There is a weird issue, when i copy the buffer back right after the execution of the kernel, i can get the right value of htt anywhere in my code, if i dont, then i cannot get the right value of htt anywhere in my code.

       

      The code is attached. please help me out. thanks in advance .

      // -------------------------- CODE execute_sectionD(); int cdim[3]; cdim[0] = M_BLOCK; cdim[1] = LM+2; cdim[2] = 2; float *ptr;//[2][LM+2][M_BLOCK]; ptr = malloc (2 * (LM+2) * M_BLOCK * sizeof(float)); myerror = .0000001f; get_device_array("htt ",ptr,3,cdim); // gets the device array htt .. into ptr check_arrays("htt",myerror,ptr,tt,3,cdim); //checks 1 to 1 with the correct value present in tt for ptr on the outside // ------------------------------------------- calling the kernel void execute_sectionD(){ int MAKE_BINARY = 1; int USE_BINARY = 2; //****************** SECTION D cl_kernel hKernel = getkernel("sectionD.cl","sectionD","sectionD.binary",USE_BINARY); // setup parameter values CHECK_ERROR(clSetKernelArg(hKernel, 0, sizeof(cl_mem), (void *)&hdsm)); CHECK_ERROR(clSetKernelArg(hKernel, 1, sizeof(cl_mem), (void *)&hfdiruv)); CHECK_ERROR(clSetKernelArg(hKernel, 2, sizeof(cl_mem), (void *)&hfdifuv)); CHECK_ERROR(clSetKernelArg(hKernel, 3, sizeof(cl_mem), (void *)&hrr)); CHECK_ERROR(clSetKernelArg(hKernel, 4, sizeof(cl_mem), (void *)&hrsuvbm)); CHECK_ERROR(clSetKernelArg(hKernel, 5, sizeof(cl_mem), (void *)&hrs)); CHECK_ERROR(clSetKernelArg(hKernel, 6, sizeof(cl_mem), (void *)&hrsuvdf)); CHECK_ERROR(clSetKernelArg(hKernel, 7, sizeof(cl_mem), (void *)&htd)); CHECK_ERROR(clSetKernelArg(hKernel, 8, sizeof(cl_mem), (void *)&htt)); CHECK_ERROR(clSetKernelArg(hKernel, 9, sizeof(cl_mem), (void *)&hts)); CHECK_ERROR(clSetKernelArg(hKernel, 10, sizeof(cl_mem), (void *)&hcc)); CHECK_ERROR(clSetKernelArg(hKernel, 11, sizeof(cl_mem), (void *)&hLM)); CHECK_ERROR(clSetKernelArg(hKernel, 12, sizeof(cl_mem), (void *)&hM_BLOCK)); // execute kernel int dim = 1; //x , y size_t globalWorkSizeData[1] = {M_BLOCK}; CHECK_ERROR(clEnqueueNDRangeKernel(hCmdQueue, hKernel, dim,NULL, globalWorkSizeData, NULL,0, NULL, NULL)); clEnqueueBarrier (hCmdQueue); /* IF THIS PART OF THE CODE IS COMMENTED I GET THE WRONG VALUE OF HTT WHEN I USE CHECK_ARRAY() //OTHERWISE , WHEN IT IS NOT COMMENTED I GET THE RIGHT VALUE WHEN I CHECK THE VALUE OUTSIDE. float *ptr;//[2][LM+2][M_BLOCK]; ptr = malloc (2 * (LM+2) * M_BLOCK * sizeof(float)); CHECK_ERROR( clEnqueueReadBuffer(hCmdQueue, htt, CL_TRUE, 0,2*(LM+2)*M_BLOCK*sizeof(cl_float),ptr, 0, 0, 0)); int ii=0,jj=0,kk=0; for(kk=0;kk<2;kk++) {for(jj=0;jj<(LM+2);jj++) {for(ii=0;ii<M_BLOCK;ii++) { if(ptr[ kk*(LM+2)*M_BLOCK + jj*M_BLOCK + ii ] ==1.0f) printf( "INSIDE[%i,%i,%i]=%f ---" ,kk,jj,ii, ptr[kk*(LM+2)*M_BLOCK + jj*M_BLOCK + ii]); }}} free(ptr); */ clReleaseKernel(hKernel); } // ------------------- htt initialization htt = clCreateBuffer(hContext,CL_MEM_READ_WRITE, 2*(LM+2)*M_BLOCK*sizeof(cl_float),0, 0); //-------------------------------- KERNEL CODE _kernel void sectionD( __global float * dsm, __global float * fdiruv, __global float * fdifuv, __global float * rr, __global float * rsuvbm, __global float * rs , __global float * rsuvdf, __global float * td, __global float * tt, __global float * ts, __global float * cc, __global const int *hLM, __global const int *hM_BLOCK ) { // Vector element index int xIndex = get_global_id(0); int mb = xIndex; int M_BLOCK = hM_BLOCK[0]; int LM = hLM[0]; tt[0*(LM+2)*M_BLOCK + 0 *M_BLOCK + mb]=1.0f; tt[1*(LM+2)*M_BLOCK + 0 *M_BLOCK + mb]=1.0f; }

        • openCL cl_mem object value not consistent. PLEASE HELP
          Raistmer
          what version of Catalyst driver do you use?
            • openCL cl_mem object value not consistent. PLEASE HELP
              zoalord12

              hi ,

               

              all i know is that the device i am running opencl on is a

              JS22 (Power6 4GHz):  with linux 2.6.18-164.2.1.el5

               

              i am using an out of order queure, but i make sure i have barriers enqueued for all results to pass ... this is really annoying :'(

               

              I tried doing a row major offset in the ptr array  and a column major, thinking that maybe the device moves things around after the kernel has run successfully .

              The worst part is there is not a single array element that equal 1 ( ..which  the kernel execution is supposed to do  )...

               

              but when i copy a buffer right after the kernel executes, it all works fine. :S

               

              i have like a 100+ arrays in the code .. 2D and 3D... i cant bring em back right after execution. that would kill the performance.. i am trying to do all the computation in the device by passing cl_mem object addresses to different kernels ..