AnsweredAssumed Answered

clAmdFft bug

Question asked by liwoog on Jun 14, 2013
Latest reply on Jul 2, 2013 by liwoog

Hi,

 

I have isolated what seems like a bug in the clAmdFFT. Test code is below.

I do a batch of 2 1D vectors, one with a short pulse, and the other 0 values, but a zero vector ends up getting non-zero values after the FFT.

 

Input:

0.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00
0.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+005.00E-011.00E+005.00E-010.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00

 

Output:

0.00E+000.00E+000.00E+000.00E+000.00E+000.00E+001.49E-080.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00-2.98E-08-1.49E-08-2.98E-080.00E+000.00E+000.00E+000.00E+000.00E+000.00E+00
2.00E+000.00E+00-1.98E+001.49E-081.92E+000.00E+00-1.83E+001.49E-081.71E+000.00E+00-1.56E+000.00E+001.38E+000.00E+00-1.20E+001.00E+000.00E+00-8.05E-010.00E+006.17E-010.00E+00-4.44E-010.00E+002.93E-010.00E+00-1.69E-011.49E-087.61E-020.00E+00-1.92E-021.49E-080.00E+000.00E+000.00E+00

 

#include <stdio.h>

#include <math.h>

#include <string.h>

#ifdef __APPLE__

#include <OpenCL/opencl.h>

#else

#include <CL/opencl.h>

#endif

#include "clAmdFft.h"

 

int main(int argc, char *argv[])

{

    cl_platform_id      platform_id;

    cl_device_id        device_id;

    cl_context          context;

    cl_mem              mem;

    cl_command_queue    queue;

    clAmdFftPlanHandle  plan;

    clAmdFftSetupData   setupData;

 

 

    float           data[2][48];

    size_t          distance = &data[1][0] - &data[0][0];

    size_t          lengths[3]  = {32, 1, 1};

    size_t          strideIn[4] = {1, distance, distance, distance};

    size_t          strideOut[4]    = {1, distance / 2, distance / 2, distance / 2};

 

 

    memset(data, 0, sizeof(data));

    data[1][15] = 0.5f;

    data[1][16] = 1.0f;

    data[1][17] = 0.5f;

 

 

    clGetPlatformIDs(1, &platform_id, NULL);

    clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);

    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, NULL);

    queue = clCreateCommandQueue(context, device_id, 0, NULL);

    mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * 2 * distance, NULL, NULL);

    clEnqueueWriteBuffer(queue, mem, CL_TRUE, 0L, sizeof(float) * 2 * distance, data[0], 0, NULL, NULL);

 

 

    setupData.major         = clAmdFftVersionMajor;

    setupData.minor         = clAmdFftVersionMinor;

    setupData.patch         = clAmdFftVersionPatch;

    setupData.debugFlags    = 0;

 

 

    clAmdFftSetup(&setupData);

    clAmdFftCreateDefaultPlan(&plan, context, CLFFT_1D, lengths);

    clAmdFftSetPlanPrecision(plan, CLFFT_SINGLE);

    clAmdFftSetLayout(plan, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);

    clAmdFftSetPlanDistance(plan, distance, distance / 2);

    clAmdFftSetPlanInStride(plan, CLFFT_1D, strideIn);

    clAmdFftSetPlanOutStride(plan, CLFFT_1D, strideOut);

    clAmdFftSetResultLocation(plan, CLFFT_INPLACE);

    clAmdFftSetPlanBatchSize(plan, 2);

    clAmdFftBakePlan(plan, 1, &queue, NULL, NULL);

    clAmdFftEnqueueTransform(plan, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &mem, &mem, NULL);

 

   for (int j = 0; j < 2; j++) {

        for (int i = 0; i < 34; i++)

            printf("%e\t", data[j][i]);

        printf("\n");

    }

    printf("\n");

 

    clEnqueueReadBuffer(queue, mem, CL_TRUE, 0L, sizeof(float) * 2 * distance, data[0], 0, NULL, NULL);

 

    for (int j = 0; j < 2; j++) {

        for (int i = 0; i < 34; i++)

            printf("%e\t", data[j][i]);

        printf("\n");

    }

    printf("\n");

 

    clAmdFftDestroyPlan(&plan);

    clAmdFftTeardown();

 

    clReleaseMemObject(mem);

    clReleaseCommandQueue(queue);

    clReleaseContext(context);

    clReleaseDevice(device_id);

}

Outcomes