0 Replies Latest reply on Feb 25, 2009 7:02 PM by rick.weber

    Weird performance issue caused by heavy workload on host

    rick.weber

      So, I have a kernel running in CAL which runs via the following code:

       

       sleep(1);

       

          clock_gettime(CLOCK_REALTIME, &time2);

       

          cacheHitRateCounter.begin();

          idlePercentCounter.begin();

       

          radiiKernel.load(device);

          expKernel.load(device);

          //Run kernels, reassigning x,y,z,basis symbols

          for(i = 0; i < kNumSets; i+=8)

          {

            CALdomain expDomain = {0, 0, kNbas/4, kBlockSize};

            CALevent computeRadiiDone;

       

            CALbuffer* radiiParameters[] = {coordGPU[i/8], r2GPU[0]};

            const char* radiiSymbols[] = {"i0", "o0"};

            CALdomain radiiDomain = {0, 0, kBlockSize, 8};

            radiiKernel.assignSymbolAndRun(

              radiiParameters,

              radiiSymbols,

              2,

              device,

              "main",

              &computeRadiiDone,

              &radiiDomain

            ;

       

            //while(calCtxIsEventDone(device.getContext(),computeRadiiDone) ==

            //  CAL_RESULT_PENDING){}

       

            //Assign symbol names and run kernel

            CALbuffer* expParameters[] = {

              r2GPU[0],

              alphaGPU,

              basisGPU[i+0],

              basisGPU[i+1],

              basisGPU[i+2],

              basisGPU[i+3],

              basisGPU[i+4],

              basisGPU[i+5],

              basisGPU[i+6],

              basisGPU[i+7],

              };

            const char* expSymbols[] = {
              "i0",
              "i1",
              "o0",
              "o1",
              "o2",
              "o3",
              "o4",
              "o5",
              "o6",
              "o7"};
          expKernel.assignSymbolAndRun(
              expParameters,
              expSymbols,
              10,
              device,
              "main",
              &event[i/8],
              &expDomain);
          }
          for(i = 0; i < kNumSets/8; i++)
          {
        RETRY_WAIT:
            //printf("%d %d\n",event,i);
            if(calCtxIsEventDone(device.getContext(), event)
              == CAL_RESULT_PENDING)
            {
              goto RETRY_WAIT;
            }
          }
          clock_gettime(CLOCK_REALTIME, &time3);
      I compute the execution time as the difference between time 3 and time2. assignSymbolAndRun is a high level wrapper that I wrote to make my life easier. The issue is this: when sleep(1) is commented out, the execution takes ~16ms to complete. When sleep(1) is not commented out, the execution takes over 40ms. Sleep(1) can be substituted with any heavy computation load running on the host and the same result occurs. What could cause this? The sleep occurs before I take my time measurement.