cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

chris10
Journeyman III

need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library

Hello,

I am working on a code which performs 3D FFT on 2 arrays (of dimension Ng*Ng*Ng) with fftw3 libray. The part of this code that interests me is this one :


fftw_plan fft_green, fft_potential;



    // Allocate grid storage variables


    if (!(green = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for Green's function.\n");


      return -1;


      }


    if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for potential buffer.\n");


      return -1;


      }



      //Allocate the fftw complex output value and the fftw dft plan.


      fft_potential = fftw_plan_r2r_3d(Ng,Ng,Ng,potential,potential,FFTW_REDFT00,


      FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);


      fft_green = fftw_plan_r2r_3d(Ng,Ng,Ng,green,green,FFTW_REDFT00,


      FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);



      // Perform the fourier transforms. Density first, Green's function second.


      fftw_execute(fft_potential);


      fftw_execute(fft_green);



      // do stuff on potential array ...



       // Inversion


      fftw_execute(fft_potential);



I try to do the equivalent thing with OpenCL 3D FFT. For this, I use the clAmdFft library.

Firstly, I don't understand why the second "fftw_execute(fft_potential)" call performs an inverse FFT. There is no specified "backward" option above in fftw_plan_r2r_3d.

Secondly, to make the equivalent of this part of code, I have done the following routine :


  /////////////////////////////////////


  // OpenCL FFT 3D function ///////////


  /////////////////////////////////////



  int fft_opencl(double *tab)


  {


    // OpenCL variables


    cl_int err;


    cl_platform_id platform = 0;


    cl_device_id device = 0;


    cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};


    cl_context ctx = 0;


    cl_command_queue queue = 0;


    cl_mem buffer;


    cl_event event = NULL;


    int ret = 0;


    // size of vectors


    size_t N = Ng/2;


    // Output array


    float *dest_tab;



    /* FFT library realted declarations */


    clAmdFftPlanHandle planHandle;


    clAmdFftDim dim = CLFFT_3D;


    size_t clLengths[3] = {N, N, N};



    /* Setup OpenCL environment. */


    err = clGetPlatformIDs( 1, &platform, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clGetPlatformIDs\n");


    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clGetDeviceIDs\n");



    props[1] = (cl_context_properties)platform;


    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err);


    queue = clCreateCommandQueue( ctx, device, 0, &err);



    /* Setup clFFT. */


    clAmdFftSetupData fftSetup;


    err = clAmdFftInitSetupData(&fftSetup);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftInitSetupData\n");


    err = clAmdFftSetup(&fftSetup);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetup\n");



    // Output array


    dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));



    /* Prepare OpenCL memory objects and place data inside them. */


    buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);



    err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0,


        Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clEnqueueWriteBuffer\n");



    /* Create a default plan for a complex FFT. */


    err = clAmdFftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftCreateDefaultPlan\n");



    /* Set plan parameters. */


    err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetPlanPrecision\n");


    //err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);


    err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetLayout\n");


    err = clAmdFftSetResultLocation(planHandle, CLFFT_INPLACE);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftSetResultLocation\n");



    /* Bake the plan. */


    err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftBakePlan\n");



    /* Execute the plan. */


    err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &buffer, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftEnqueueTransform\n");



    /* Wait for calculations to be finished. */


    err = clFinish(queue);


    if (err != CL_SUCCESS)


      printf("Error with clFinish\n");



    /* Fetch results of calculations. */


    err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);


    if (err != CL_SUCCESS)


      printf("Error with clEnqueueReadBuffer\n");


   


    // Assign tab with dest_tab


    tab = (double*) dest_tab;



    /* Release OpenCL memory objects. */


    clReleaseMemObject(buffer);



    /* Release the plan. */


    err = clAmdFftDestroyPlan(&planHandle);


    if (err != CL_SUCCESS)


      printf("Error with clAmdFftDestroyPlan\n");



    /* Release clFFT library. */


    clAmdFftTeardown();



    /* Release OpenCL working objects. */


    clReleaseCommandQueue(queue);


    clReleaseContext(ctx);



    return ret;


  }


and I call it from the main source (like at the beginning) this way :


// Allocate grid storage variables


    if (!(green = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for Green's function.\n");


      return -1;


      }


    if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {


      fprintf(stderr,"Unable to allocate space for potential buffer.\n");


      return -1;


      }




      // Perform the fourier transforms. Density first, Green's function second.


      fft_opencl(potential);


      fft_opencl(green);



      // do stuff on potential array ...



       // Inversion


      fft_opencl(potential);


But the output "potential" array contains null values.

As fft_plan_r2_r_3d manipulates arrays of dimension Ng*Ng*Ng, does the real to real FFT need to have "size_t clLengths[3] = {N, N, N};" with N=Ng/2 ? (I did this because of the example with 1D FFT : clMathLibraries/clFFT · GitHub )

I think this type of FFT only haves real values, so I should have half of the points.

Finally, I am not sure that my second call to "fft_opencl" makes the inverse FFT. How to specify it into my routine ?

Any help would be great !

0 Likes
2 Replies
chris10
Journeyman III

Re: need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library

I have done progress. Actually, I didn't know that I had to do two different plans with clAmdFftSetLayout and then specify backward and forward transforms with CLFFT_BACKWARD and CLFFT_FORWARD into clAmdFftEnqueueTransform.

I try to use temporary buffer, you can see my last routine :

      /////////////////////////////////////

      // OpenCL FFT 3D function ///////////

      /////////////////////////////////////

   

      int FftOpenCL(double *tab, const char* direction)

      {

        // OpenCL variables

        cl_int err;

        cl_platform_id platform = 0;

        cl_device_id device = 0;

        cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};

        cl_context context = 0;

        cl_command_queue queue = 0;

        cl_mem bufferIn = 0;

        cl_mem bufferOut = 0;

        cl_mem tmpBuffer = 0;

        int ret = 0;

        // size of vectors

        size_t N = Ng/2;

        // size of temp buffer

        size_t tmpBufferSize = 0;

        // Output array

        float *dest_tab;

   

        /* FFT library realted declarations */

        clAmdFftPlanHandle planHandle;

        clAmdFftDim dim = CLFFT_3D;

        size_t clLengths[3] = {N, N, N};

   

        /* Setup OpenCL environment. */

        err = clGetPlatformIDs( 1, &platform, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clGetPlatformIDs\n");

        err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clGetDeviceIDs\n");

   

        props[1] = (cl_context_properties)platform;

        context = clCreateContext( props, 1, &device, NULL, NULL, &err);

        queue = clCreateCommandQueue( context, device, 0, &err);

   

        /* Setup clFFT. */

        clAmdFftSetupData fftSetup;

        err = clAmdFftInitSetupData(&fftSetup);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftInitSetupData\n");

        err = clAmdFftSetup(&fftSetup);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftSetup\n");

   

        // Output array

        dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));

   

        /* Create a default plan for a complex FFT. */

        err = clAmdFftCreateDefaultPlan(&planHandle, context, dim, clLengths);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftCreateDefaultPlan\n");

   

        /* Set plan parameters. */

        err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftSetPlanPrecision\n");

   

        err = clAmdFftSetResultLocation(planHandle, CLFFT_OUTOFPLACE);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftSetResultLocation\n");

   

        if (!strcmp(direction,"forward")) {

          err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_COMPLEX_INTERLEAVED);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftSetLayout Forward\n");

        }

        else if (!strcmp(direction,"backward")) {

          err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_REAL);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftSetLayout Backward\n");

        }

    

        /* Bake the plan. */

        err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftBakePlan\n");

   

        /* Prepare OpenCL memory objects : create buffer for input. */

        bufferIn = clCreateBuffer( context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);

        if (err != CL_SUCCESS)

          printf("Error with bufferIn clCreateBuffer\n");

   

        /* Enqueue write tab array into bufferIn. */

        err = clEnqueueWriteBuffer(queue, bufferIn, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);

        if (err != CL_SUCCESS)

          printf("Error with bufferIn clEnqueueWriteBuffer\n");

   

        /* Prepare OpenCL memory objects : create buffer for output. */

        bufferOut = clCreateBuffer(context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);

        if (err != CL_SUCCESS)

          printf("Error with bufferOut clCreateBuffer\n");

    

        /* Create temporary buffer. */

        err = clAmdFftGetTmpBufSize (planHandle, &tmpBufferSize);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftGetTmpBufSize\n");

        if (tmpBufferSize == 0)

          printf("Error with tmpBufferSize clAmdFftGetTmpBufSize\n");

       

          tmpBuffer = clCreateBuffer (context, CL_MEM_READ_WRITE, tmpBufferSize, 0, &err);

        if (err != CL_SUCCESS)

          printf("Error with tmpBuffer clCreateBuffer\n");

     

        /* Execute the plan. */

        if (!strcmp(direction,"forward")) {         

          err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftEnqueueTransform Forward\n");

        }

        else if (!strcmp(direction,"backward")) {

          err = clAmdFftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);

          if (err != CL_SUCCESS)

            printf("Error with clAmdFftEnqueueTransform Backward\n");

        }

        /* Wait for calculations to be finished. */

        err = clFinish(queue);

        if (err != CL_SUCCESS)

          printf("Error with clFinish\n");

   

        /* Fetch results of calculation. */

        err = clEnqueueReadBuffer( queue, bufferOut, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);

        if (err != CL_SUCCESS)

          printf("Error with clEnqueueReadBuffer\n");

   

        // Assign tab with dest_tab

        tab = (double*) dest_tab;

   

        /* Release OpenCL memory objects. */

        clReleaseMemObject(bufferIn);

        clReleaseMemObject(bufferOut);

        clReleaseMemObject(tmpBuffer);

   

        /* Release the plan. */

        err = clAmdFftDestroyPlan(&planHandle);

        if (err != CL_SUCCESS)

          printf("Error with clAmdFftDestroyPlan\n");

   

        /* Release clFFT library. */

        clAmdFftTeardown();

   

        /* Release OpenCL working objects. */

        clReleaseCommandQueue(queue);

        clReleaseContext(context);

   

        return ret;

      }

But the creation of temporary buffer fails, I get the following error :

Error with tmpBufferSize clAmdFftGetTmpBufSize

Error with tmpBuffer clCreateBuffer

I saw that plan sould be baked before the call of temporary buffer creation but despite of this, it doesn't work ...

Anyone could see what's wrong ?

0 Likes
amd_support
Staff
Staff

Re: need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library

Hi,

You are getting this error because clFFT does not require any temporary buffer.

The documentation of clAmdFftGetTmpBufSize() says that the temporary buffer size will be returned as 0 if no temp buffer is required. Since in the above code the temp buffer size is being returned as 0, subsequent clCreateBuffer is trying to create a buffer of size zero, and so both the errors you have listed are being generated.

Please try without explicitly allocating temporary buffer. The run-time automatically creates temporary buffer if it is required.

Thanks,

AMD Support.

0 Likes