AnsweredAssumed Answered

need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library

Question asked by chris10 on Nov 22, 2013
Latest reply on Mar 17, 2014 by amd_support

Hello,

 

I am working on a code which performs 3D FFT on 2 arrays (of dimension Ng*Ng*Ng) with fftw3 libray. The part of this code that interests me is this one :

 

 

fftw_plan fft_green, fft_potential;

    // Allocate grid storage variables
    if (!(green = calloc(pow(Ng,3),sizeof(double)))) {
      fprintf(stderr,"Unable to allocate space for Green's function.\n");
      return -1;
      }
    if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {
      fprintf(stderr,"Unable to allocate space for potential buffer.\n");
      return -1;
      }

      //Allocate the fftw complex output value and the fftw dft plan.
      fft_potential = fftw_plan_r2r_3d(Ng,Ng,Ng,potential,potential,FFTW_REDFT00,
      FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);
      fft_green = fftw_plan_r2r_3d(Ng,Ng,Ng,green,green,FFTW_REDFT00,
      FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);

      // Perform the fourier transforms. Density first, Green's function second.
      fftw_execute(fft_potential);
      fftw_execute(fft_green);

      // do stuff on potential array ...

       // Inversion
      fftw_execute(fft_potential);

 

I try to do the equivalent thing with OpenCL 3D FFT. For this, I use the clAmdFft library.

 

Firstly, I don't understand why the second "fftw_execute(fft_potential)" call performs an inverse FFT. There is no specified "backward" option above in fftw_plan_r2r_3d.

 

Secondly, to make the equivalent of this part of code, I have done the following routine :

 

  /////////////////////////////////////
  // OpenCL FFT 3D function ///////////
  /////////////////////////////////////

  int fft_opencl(double *tab)
  {
    // OpenCL variables
    cl_int err;
    cl_platform_id platform = 0;
    cl_device_id device = 0;
    cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
    cl_context ctx = 0;
    cl_command_queue queue = 0;
    cl_mem buffer;
    cl_event event = NULL;
    int ret = 0;
    // size of vectors
    size_t N = Ng/2;
    // Output array
    float *dest_tab;

    /* FFT library realted declarations */
    clAmdFftPlanHandle planHandle;
    clAmdFftDim dim = CLFFT_3D;
    size_t clLengths[3] = {N, N, N};

    /* Setup OpenCL environment. */
    err = clGetPlatformIDs( 1, &platform, NULL);
    if (err != CL_SUCCESS)
      printf("Error with clGetPlatformIDs\n");
    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    if (err != CL_SUCCESS)
      printf("Error with clGetDeviceIDs\n");

    props[1] = (cl_context_properties)platform;
    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err);
    queue = clCreateCommandQueue( ctx, device, 0, &err);

    /* Setup clFFT. */
    clAmdFftSetupData fftSetup;
    err = clAmdFftInitSetupData(&fftSetup);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftInitSetupData\n");
    err = clAmdFftSetup(&fftSetup);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftSetup\n");

    // Output array
    dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));

    /* Prepare OpenCL memory objects and place data inside them. */
    buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);

    err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0,
        Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);
    if (err != CL_SUCCESS)
      printf("Error with clEnqueueWriteBuffer\n");

    /* Create a default plan for a complex FFT. */
    err = clAmdFftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftCreateDefaultPlan\n");

    /* Set plan parameters. */
    err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftSetPlanPrecision\n");
    //err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
    err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftSetLayout\n");
    err = clAmdFftSetResultLocation(planHandle, CLFFT_INPLACE);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftSetResultLocation\n");

    /* Bake the plan. */
    err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftBakePlan\n");

    /* Execute the plan. */
    err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &buffer, NULL, NULL);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftEnqueueTransform\n");

    /* Wait for calculations to be finished. */
    err = clFinish(queue);
    if (err != CL_SUCCESS)
      printf("Error with clFinish\n");

    /* Fetch results of calculations. */
    err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);
    if (err != CL_SUCCESS)
      printf("Error with clEnqueueReadBuffer\n");
    
    // Assign tab with dest_tab
    tab = (double*) dest_tab;

    /* Release OpenCL memory objects. */
    clReleaseMemObject(buffer);

    /* Release the plan. */
    err = clAmdFftDestroyPlan(&planHandle);
    if (err != CL_SUCCESS)
      printf("Error with clAmdFftDestroyPlan\n");

    /* Release clFFT library. */
    clAmdFftTeardown();

    /* Release OpenCL working objects. */
    clReleaseCommandQueue(queue);
    clReleaseContext(ctx);

    return ret;
  }

 

and I call it from the main source (like at the beginning) this way :

 

// Allocate grid storage variables
    if (!(green = calloc(pow(Ng,3),sizeof(double)))) {
      fprintf(stderr,"Unable to allocate space for Green's function.\n");
      return -1;
      }
    if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {
      fprintf(stderr,"Unable to allocate space for potential buffer.\n");
      return -1;
      }

      // Perform the fourier transforms. Density first, Green's function second.
      fft_opencl(potential);
      fft_opencl(green);

      // do stuff on potential array ...

       // Inversion
      fft_opencl(potential);

 

 

But the output "potential" array contains null values.

 

As fft_plan_r2_r_3d manipulates arrays of dimension Ng*Ng*Ng, does the real to real FFT need to have "size_t clLengths[3] = {N, N, N};" with N=Ng/2 ? (I did this because of the example with 1D FFT : clMathLibraries/clFFT · GitHub )

 

I think this type of FFT only haves real values, so I should have half of the points.

 

Finally, I am not sure that my second call to "fft_opencl" makes the inverse FFT. How to specify it into my routine ?

 

Any help would be great !

Outcomes