1 Reply Latest reply on Jul 2, 2013 10:43 AM by liwoog

    clAmdFft bug

    liwoog

      Hi,

       

      I have isolated what seems like a bug in the clAmdFFT. Test code is below.

      I do a batch of 2 1D vectors, one with a short pulse, and the other 0 values, but a zero vector ends up getting non-zero values after the FFT.

       

      Input:

      0.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00
      0.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+005.00E-011.00E+005.00E-010.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00

       

      Output:

      0.00E+000.00E+000.00E+000.00E+000.00E+000.00E+001.49E-080.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00-2.98E-08-1.49E-08-2.98E-080.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00
      2.00E+000.00E+00-1.98E+001.49E-081.92E+000.00E+00-1.83E+001.49E-081.71E+000.00E+00-1.56E+000.00E+001.38E+000.00E+00-1.20E+001.00E+000.00E+00-8.05E-010.00E+006.17E-010.00E+00-4.44E-010.00E+002.93E-010.00E+00-1.69E-011.49E-087.61E-020.00E+00-1.92E-021.49E-080.00E+000.00E+000.00E+00

       

      #include <stdio.h>

      #include <math.h>

      #include <string.h>

      #ifdef __APPLE__

      #include <OpenCL/opencl.h>

      #else

      #include <CL/opencl.h>

      #endif

      #include "clAmdFft.h"

       

      int main(int argc, char *argv[])

      {

          cl_platform_id      platform_id;

          cl_device_id        device_id;

          cl_context          context;

          cl_mem              mem;

          cl_command_queue    queue;

          clAmdFftPlanHandle  plan;

          clAmdFftSetupData   setupData;

       

       

          float           data[2][48];

          size_t          distance = &data[1][0] - &data[0][0];

          size_t          lengths[3]  = {32, 1, 1};

          size_t          strideIn[4] = {1, distance, distance, distance};

          size_t          strideOut[4]    = {1, distance / 2, distance / 2, distance / 2};

       

       

          memset(data, 0, sizeof(data));

          data[1][15] = 0.5f;

          data[1][16] = 1.0f;

          data[1][17] = 0.5f;

       

       

          clGetPlatformIDs(1, &platform_id, NULL);

          clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);

          context = clCreateContext(NULL, 1, &device_id, NULL, NULL, NULL);

          queue = clCreateCommandQueue(context, device_id, 0, NULL);

          mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * 2 * distance, NULL, NULL);

          clEnqueueWriteBuffer(queue, mem, CL_TRUE, 0L, sizeof(float) * 2 * distance, data[0], 0, NULL, NULL);

       

       

          setupData.major         = clAmdFftVersionMajor;

          setupData.minor         = clAmdFftVersionMinor;

          setupData.patch         = clAmdFftVersionPatch;

          setupData.debugFlags    = 0;

       

       

          clAmdFftSetup(&setupData);

          clAmdFftCreateDefaultPlan(&plan, context, CLFFT_1D, lengths);

          clAmdFftSetPlanPrecision(plan, CLFFT_SINGLE);

          clAmdFftSetLayout(plan, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);

          clAmdFftSetPlanDistance(plan, distance, distance / 2);

          clAmdFftSetPlanInStride(plan, CLFFT_1D, strideIn);

          clAmdFftSetPlanOutStride(plan, CLFFT_1D, strideOut);

          clAmdFftSetResultLocation(plan, CLFFT_INPLACE);

          clAmdFftSetPlanBatchSize(plan, 2);

          clAmdFftBakePlan(plan, 1, &queue, NULL, NULL);

          clAmdFftEnqueueTransform(plan, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &mem, &mem, NULL);

       

         for (int j = 0; j < 2; j++) {

              for (int i = 0; i < 34; i++)

                  printf("%e\t", data[j][i]);

              printf("\n");

          }

          printf("\n");

       

          clEnqueueReadBuffer(queue, mem, CL_TRUE, 0L, sizeof(float) * 2 * distance, data[0], 0, NULL, NULL);

       

          for (int j = 0; j < 2; j++) {

              for (int i = 0; i < 34; i++)

                  printf("%e\t", data[j][i]);

              printf("\n");

          }

          printf("\n");

       

          clAmdFftDestroyPlan(&plan);

          clAmdFftTeardown();

       

          clReleaseMemObject(mem);

          clReleaseCommandQueue(queue);

          clReleaseContext(context);

          clReleaseDevice(device_id);

      }