cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

chris10
Journeyman III

need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library

Hello,

I am working on a code which performs 3D FFT on 2 arrays (of dimension Ng*Ng*Ng) with fftw3 libray. The part of this code that interests me is this one :


fftw_plan fft_green, fft_potential;



    // Allocate grid storage variables


    if (!(green = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for Green's function.\n");


      return -1;


      }


    if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for potential buffer.\n");


      return -1;


      }



      //Allocate the fftw complex output value and the fftw dft plan.


      fft_potential = fftw_plan_r2r_3d(Ng,Ng,Ng,potential,potential,FFTW_REDFT00,


      FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);


      fft_green = fftw_plan_r2r_3d(Ng,Ng,Ng,green,green,FFTW_REDFT00,


      FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);



      // Perform the fourier transforms. Density first, Green's function second.


      fftw_execute(fft_potential);


      fftw_execute(fft_green);



      // do stuff on potential array ...



       // Inversion


      fftw_execute(fft_potential);



I try to do the equivalent thing with OpenCL 3D FFT. For this, I use the clAmdFft library.

Firstly, I don't understand why the second "fftw_execute(fft_potential)" call performs an inverse FFT. There is no specified "backward" option above in fftw_plan_r2r_3d.

Secondly, to make the equivalent of this part of code, I have done the following routine :


  /////////////////////////////////////


  // OpenCL FFT 3D function ///////////


  /////////////////////////////////////



  int fft_opencl(double *tab)


  {


    // OpenCL variables


    cl_int err;


    cl_platform_id platform = 0;


    cl_device_id device = 0;


    cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};


    cl_context ctx = 0;


    cl_command_queue queue = 0;


    cl_mem buffer;


    cl_event event = NULL;


    int ret = 0;


    // size of vectors


    size_t N = Ng/2;


    // Output array


    float *dest_tab;



    /* FFT library realted declarations */


    clAmdFftPlanHandle planHandle;


    clAmdFftDim dim = CLFFT_3D;


    size_t clLengths[3] = {N, N, N};



    /* Setup OpenCL environment. */


    err = clGetPlatformIDs( 1, &platform, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clGetPlatformIDs\n");


    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clGetDeviceIDs\n");



    props[1] = (cl_context_properties)platform;


    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err);


    queue = clCreateCommandQueue( ctx, device, 0, &err);



    /* Setup clFFT. */


    clAmdFftSetupData fftSetup;


    err = clAmdFftInitSetupData(&fftSetup);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftInitSetupData\n");


    err = clAmdFftSetup(&fftSetup);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetup\n");



    // Output array


    dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));



    /* Prepare OpenCL memory objects and place data inside them. */


    buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);



    err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0,


        Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clEnqueueWriteBuffer\n");



    /* Create a default plan for a complex FFT. */


    err = clAmdFftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftCreateDefaultPlan\n");



    /* Set plan parameters. */


    err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetPlanPrecision\n");


    //err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);


    err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetLayout\n");


    err = clAmdFftSetResultLocation(planHandle, CLFFT_INPLACE);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetResultLocation\n");



    /* Bake the plan. */


    err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftBakePlan\n");



    /* Execute the plan. */


    err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &buffer, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftEnqueueTransform\n");



    /* Wait for calculations to be finished. */


    err = clFinish(queue);


    if (err != CL_SUCCESS)


      printf("Error with clFinish\n");



    /* Fetch results of calculations. */


    err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clEnqueueReadBuffer\n");


   


    // Assign tab with dest_tab


    tab = (double*) dest_tab;



    /* Release OpenCL memory objects. */


    clReleaseMemObject(buffer);



    /* Release the plan. */


    err = clAmdFftDestroyPlan(&planHandle);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftDestroyPlan\n");



    /* Release clFFT library. */


    clAmdFftTeardown();



    /* Release OpenCL working objects. */


    clReleaseCommandQueue(queue);


    clReleaseContext(ctx);



    return ret;


  }


and I call it from the main source (like at the beginning) this way :


// Allocate grid storage variables


    if (!(green = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for Green's function.\n");


      return -1;


      }


    if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for potential buffer.\n");


      return -1;


      }




      // Perform the fourier transforms. Density first, Green's function second.


      fft_opencl(potential);


      fft_opencl(green);



      // do stuff on potential array ...



       // Inversion


      fft_opencl(potential);


But the output "potential" array contains null values.

As fft_plan_r2_r_3d manipulates arrays of dimension Ng*Ng*Ng, does the real to real FFT need to have "size_t clLengths[3] = {N, N, N};" with N=Ng/2 ? (I did this because of the example with 1D FFT : clMathLibraries/clFFT · GitHub )

I think this type of FFT only haves real values, so I should have half of the points.

Finally, I am not sure that my second call to "fft_opencl" makes the inverse FFT. How to specify it into my routine ?

Any help would be great !

0 Likes
2 Replies
chris10
Journeyman III

I have done progress. Actually, I didn't know that I had to do two different plans with clAmdFftSetLayout and then specify backward and forward transforms with CLFFT_BACKWARD and CLFFT_FORWARD into clAmdFftEnqueueTransform.

I try to use temporary buffer, you can see my last routine :

      /////////////////////////////////////

      // OpenCL FFT 3D function ///////////

      /////////////////////////////////////

   

      int FftOpenCL(double *tab, const char* direction)

      {

        // OpenCL variables

        cl_int err;

        cl_platform_id platform = 0;

        cl_device_id device = 0;

        cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};

        cl_context context = 0;

        cl_command_queue queue = 0;

        cl_mem bufferIn = 0;

        cl_mem bufferOut = 0;

        cl_mem tmpBuffer = 0;

        int ret = 0;

        // size of vectors

        size_t N = Ng/2;

        // size of temp buffer

        size_t tmpBufferSize = 0;

        // Output array

        float *dest_tab;

   

        /* FFT library realted declarations */

        clAmdFftPlanHandle planHandle;

        clAmdFftDim dim = CLFFT_3D;

        size_t clLengths[3] = {N, N, N};

   

        /* Setup OpenCL environment. */

        err = clGetPlatformIDs( 1, &platform, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clGetPlatformIDs\n");

        err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clGetDeviceIDs\n");

   

        props[1] = (cl_context_properties)platform;

        context = clCreateContext( props, 1, &device, NULL, NULL, &err);

        queue = clCreateCommandQueue( context, device, 0, &err);

   

        /* Setup clFFT. */

        clAmdFftSetupData fftSetup;

        err = clAmdFftInitSetupData(&fftSetup);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftInitSetupData\n");

        err = clAmdFftSetup(&fftSetup);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftSetup\n");

   

        // Output array

        dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));

   

        /* Create a default plan for a complex FFT. */

        err = clAmdFftCreateDefaultPlan(&planHandle, context, dim, clLengths);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftCreateDefaultPlan\n");

   

        /* Set plan parameters. */

        err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftSetPlanPrecision\n");

   

        err = clAmdFftSetResultLocation(planHandle, CLFFT_OUTOFPLACE);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftSetResultLocation\n");

   

        if (!strcmp(direction,"forward")) {

          err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_COMPLEX_INTERLEAVED);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftSetLayout Forward\n");

        }

        else if (!strcmp(direction,"backward")) {

          err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_REAL);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftSetLayout Backward\n");

        }

    

        /* Bake the plan. */

        err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftBakePlan\n");

   

        /* Prepare OpenCL memory objects : create buffer for input. */

        bufferIn = clCreateBuffer( context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);

        if (err != CL_SUCCESS)

          printf("Error with bufferIn clCreateBuffer\n");

   

        /* Enqueue write tab array into bufferIn. */

        err = clEnqueueWriteBuffer(queue, bufferIn, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);

        if (err != CL_SUCCESS)

          printf("Error with bufferIn clEnqueueWriteBuffer\n");

   

        /* Prepare OpenCL memory objects : create buffer for output. */

        bufferOut = clCreateBuffer(context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);

        if (err != CL_SUCCESS)

          printf("Error with bufferOut clCreateBuffer\n");

    

        /* Create temporary buffer. */

        err = clAmdFftGetTmpBufSize (planHandle, &tmpBufferSize);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftGetTmpBufSize\n");

        if (tmpBufferSize == 0)

          printf("Error with tmpBufferSize clAmdFftGetTmpBufSize\n");

       

          tmpBuffer = clCreateBuffer (context, CL_MEM_READ_WRITE, tmpBufferSize, 0, &err);

        if (err != CL_SUCCESS)

          printf("Error with tmpBuffer clCreateBuffer\n");

     

        /* Execute the plan. */

        if (!strcmp(direction,"forward")) {         

          err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftEnqueueTransform Forward\n");

        }

        else if (!strcmp(direction,"backward")) {

          err = clAmdFftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftEnqueueTransform Backward\n");

        }

        /* Wait for calculations to be finished. */

        err = clFinish(queue);

        if (err != CL_SUCCESS)

          printf("Error with clFinish\n");

   

        /* Fetch results of calculation. */

        err = clEnqueueReadBuffer( queue, bufferOut, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clEnqueueReadBuffer\n");

   

        // Assign tab with dest_tab

        tab = (double*) dest_tab;

   

        /* Release OpenCL memory objects. */

        clReleaseMemObject(bufferIn);

        clReleaseMemObject(bufferOut);

        clReleaseMemObject(tmpBuffer);

   

        /* Release the plan. */

        err = clAmdFftDestroyPlan(&planHandle);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftDestroyPlan\n");

   

        /* Release clFFT library. */

        clAmdFftTeardown();

   

        /* Release OpenCL working objects. */

        clReleaseCommandQueue(queue);

        clReleaseContext(context);

   

        return ret;

      }

But the creation of temporary buffer fails, I get the following error :

Error with tmpBufferSize clAmdFftGetTmpBufSize

Error with tmpBuffer clCreateBuffer

I saw that plan sould be baked before the call of temporary buffer creation but despite of this, it doesn't work ...

Anyone could see what's wrong ?

0 Likes

Hi,

You are getting this error because clFFT does not require any temporary buffer.

The documentation of clAmdFftGetTmpBufSize() says that the temporary buffer size will be returned as 0 if no temp buffer is required. Since in the above code the temp buffer size is being returned as 0, subsequent clCreateBuffer is trying to create a buffer of size zero, and so both the errors you have listed are being generated.

Please try without explicitly allocating temporary buffer. The run-time automatically creates temporary buffer if it is required.

Thanks,

AMD Support.

0 Likes