2 Replies Latest reply on Mar 17, 2014 2:01 AM by amd_support

    need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library

    chris10

      Hello,

       

      I am working on a code which performs 3D FFT on 2 arrays (of dimension Ng*Ng*Ng) with fftw3 libray. The part of this code that interests me is this one :

       

       

      fftw_plan fft_green, fft_potential;
      
          // Allocate grid storage variables
          if (!(green = calloc(pow(Ng,3),sizeof(double)))) {
            fprintf(stderr,"Unable to allocate space for Green's function.\n");
            return -1;
            }
          if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {
            fprintf(stderr,"Unable to allocate space for potential buffer.\n");
            return -1;
            }
      
            //Allocate the fftw complex output value and the fftw dft plan.
            fft_potential = fftw_plan_r2r_3d(Ng,Ng,Ng,potential,potential,FFTW_REDFT00,
            FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);
            fft_green = fftw_plan_r2r_3d(Ng,Ng,Ng,green,green,FFTW_REDFT00,
            FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);
      
            // Perform the fourier transforms. Density first, Green's function second.
            fftw_execute(fft_potential);
            fftw_execute(fft_green);
      
            // do stuff on potential array ...
      
             // Inversion
            fftw_execute(fft_potential);
      
      

       

      I try to do the equivalent thing with OpenCL 3D FFT. For this, I use the clAmdFft library.

       

      Firstly, I don't understand why the second "fftw_execute(fft_potential)" call performs an inverse FFT. There is no specified "backward" option above in fftw_plan_r2r_3d.

       

      Secondly, to make the equivalent of this part of code, I have done the following routine :

       

        /////////////////////////////////////
        // OpenCL FFT 3D function ///////////
        /////////////////////////////////////
      
        int fft_opencl(double *tab)
        {
          // OpenCL variables
          cl_int err;
          cl_platform_id platform = 0;
          cl_device_id device = 0;
          cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
          cl_context ctx = 0;
          cl_command_queue queue = 0;
          cl_mem buffer;
          cl_event event = NULL;
          int ret = 0;
          // size of vectors
          size_t N = Ng/2;
          // Output array
          float *dest_tab;
      
          /* FFT library realted declarations */
          clAmdFftPlanHandle planHandle;
          clAmdFftDim dim = CLFFT_3D;
          size_t clLengths[3] = {N, N, N};
      
          /* Setup OpenCL environment. */
          err = clGetPlatformIDs( 1, &platform, NULL);
          if (err != CL_SUCCESS)
            printf("Error with clGetPlatformIDs\n");
          err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
          if (err != CL_SUCCESS)
            printf("Error with clGetDeviceIDs\n");
      
          props[1] = (cl_context_properties)platform;
          ctx = clCreateContext( props, 1, &device, NULL, NULL, &err);
          queue = clCreateCommandQueue( ctx, device, 0, &err);
      
          /* Setup clFFT. */
          clAmdFftSetupData fftSetup;
          err = clAmdFftInitSetupData(&fftSetup);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftInitSetupData\n");
          err = clAmdFftSetup(&fftSetup);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftSetup\n");
      
          // Output array
          dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));
      
          /* Prepare OpenCL memory objects and place data inside them. */
          buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);
      
          err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0,
              Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);
          if (err != CL_SUCCESS)
            printf("Error with clEnqueueWriteBuffer\n");
      
          /* Create a default plan for a complex FFT. */
          err = clAmdFftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftCreateDefaultPlan\n");
      
          /* Set plan parameters. */
          err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftSetPlanPrecision\n");
          //err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
          err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftSetLayout\n");
          err = clAmdFftSetResultLocation(planHandle, CLFFT_INPLACE);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftSetResultLocation\n");
      
          /* Bake the plan. */
          err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftBakePlan\n");
      
          /* Execute the plan. */
          err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &buffer, NULL, NULL);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftEnqueueTransform\n");
      
          /* Wait for calculations to be finished. */
          err = clFinish(queue);
          if (err != CL_SUCCESS)
            printf("Error with clFinish\n");
      
          /* Fetch results of calculations. */
          err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);
          if (err != CL_SUCCESS)
            printf("Error with clEnqueueReadBuffer\n");
          
          // Assign tab with dest_tab
          tab = (double*) dest_tab;
      
          /* Release OpenCL memory objects. */
          clReleaseMemObject(buffer);
      
          /* Release the plan. */
          err = clAmdFftDestroyPlan(&planHandle);
          if (err != CL_SUCCESS)
            printf("Error with clAmdFftDestroyPlan\n");
      
          /* Release clFFT library. */
          clAmdFftTeardown();
      
          /* Release OpenCL working objects. */
          clReleaseCommandQueue(queue);
          clReleaseContext(ctx);
      
          return ret;
        }
      

       

      and I call it from the main source (like at the beginning) this way :

       

      // Allocate grid storage variables
          if (!(green = calloc(pow(Ng,3),sizeof(double)))) {
            fprintf(stderr,"Unable to allocate space for Green's function.\n");
            return -1;
            }
          if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {
            fprintf(stderr,"Unable to allocate space for potential buffer.\n");
            return -1;
            }
      
            // Perform the fourier transforms. Density first, Green's function second.
            fft_opencl(potential);
            fft_opencl(green);
      
            // do stuff on potential array ...
      
             // Inversion
            fft_opencl(potential);
      

       

       

      But the output "potential" array contains null values.

       

      As fft_plan_r2_r_3d manipulates arrays of dimension Ng*Ng*Ng, does the real to real FFT need to have "size_t clLengths[3] = {N, N, N};" with N=Ng/2 ? (I did this because of the example with 1D FFT : clMathLibraries/clFFT · GitHub )

       

      I think this type of FFT only haves real values, so I should have half of the points.

       

      Finally, I am not sure that my second call to "fft_opencl" makes the inverse FFT. How to specify it into my routine ?

       

      Any help would be great !

        • Re: need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library
          chris10

          I have done progress. Actually, I didn't know that I had to do two different plans with clAmdFftSetLayout and then specify backward and forward transforms with CLFFT_BACKWARD and CLFFT_FORWARD into clAmdFftEnqueueTransform.

           

          I try to use temporary buffer, you can see my last routine :

           

                /////////////////////////////////////

                // OpenCL FFT 3D function ///////////

                /////////////////////////////////////

             

                int FftOpenCL(double *tab, const char* direction)

                {

                  // OpenCL variables

                  cl_int err;

                  cl_platform_id platform = 0;

                  cl_device_id device = 0;

                  cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};

                  cl_context context = 0;

                  cl_command_queue queue = 0;

                  cl_mem bufferIn = 0;

                  cl_mem bufferOut = 0;

                  cl_mem tmpBuffer = 0;

                  int ret = 0;

                  // size of vectors

                  size_t N = Ng/2;

                  // size of temp buffer

                  size_t tmpBufferSize = 0;

                  // Output array

                  float *dest_tab;

             

                  /* FFT library realted declarations */

                  clAmdFftPlanHandle planHandle;

                  clAmdFftDim dim = CLFFT_3D;

                  size_t clLengths[3] = {N, N, N};

             

                  /* Setup OpenCL environment. */

                  err = clGetPlatformIDs( 1, &platform, NULL);

                  if (err != CL_SUCCESS)

                    printf("Error with clGetPlatformIDs\n");

                  err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

                  if (err != CL_SUCCESS)

                    printf("Error with clGetDeviceIDs\n");

             

                  props[1] = (cl_context_properties)platform;

                  context = clCreateContext( props, 1, &device, NULL, NULL, &err);

                  queue = clCreateCommandQueue( context, device, 0, &err);

             

                  /* Setup clFFT. */

                  clAmdFftSetupData fftSetup;

                  err = clAmdFftInitSetupData(&fftSetup);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftInitSetupData\n");

                  err = clAmdFftSetup(&fftSetup);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftSetup\n");

             

                  // Output array

                  dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));

             

                  /* Create a default plan for a complex FFT. */

                  err = clAmdFftCreateDefaultPlan(&planHandle, context, dim, clLengths);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftCreateDefaultPlan\n");

             

                  /* Set plan parameters. */

                  err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftSetPlanPrecision\n");

             

                  err = clAmdFftSetResultLocation(planHandle, CLFFT_OUTOFPLACE);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftSetResultLocation\n");

             

                  if (!strcmp(direction,"forward")) {

                    err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_COMPLEX_INTERLEAVED);

                    if (err != CL_SUCCESS)

                      printf("Error with clAmdFftSetLayout Forward\n");

                  }

                  else if (!strcmp(direction,"backward")) {

                    err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_REAL);

                    if (err != CL_SUCCESS)

                      printf("Error with clAmdFftSetLayout Backward\n");

                  }

              

                  /* Bake the plan. */

                  err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftBakePlan\n");

             

                  /* Prepare OpenCL memory objects : create buffer for input. */

                  bufferIn = clCreateBuffer( context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);

                  if (err != CL_SUCCESS)

                    printf("Error with bufferIn clCreateBuffer\n");

             

                  /* Enqueue write tab array into bufferIn. */

                  err = clEnqueueWriteBuffer(queue, bufferIn, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);

                  if (err != CL_SUCCESS)

                    printf("Error with bufferIn clEnqueueWriteBuffer\n");

             

                  /* Prepare OpenCL memory objects : create buffer for output. */

                  bufferOut = clCreateBuffer(context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);

                  if (err != CL_SUCCESS)

                    printf("Error with bufferOut clCreateBuffer\n");

              

                  /* Create temporary buffer. */

                  err = clAmdFftGetTmpBufSize (planHandle, &tmpBufferSize);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftGetTmpBufSize\n");

                  if (tmpBufferSize == 0)

                    printf("Error with tmpBufferSize clAmdFftGetTmpBufSize\n");

                 

                    tmpBuffer = clCreateBuffer (context, CL_MEM_READ_WRITE, tmpBufferSize, 0, &err);

                  if (err != CL_SUCCESS)

                    printf("Error with tmpBuffer clCreateBuffer\n");

               

                  /* Execute the plan. */

                  if (!strcmp(direction,"forward")) {         

                    err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);

                    if (err != CL_SUCCESS)

                      printf("Error with clAmdFftEnqueueTransform Forward\n");

                  }

                  else if (!strcmp(direction,"backward")) {

                    err = clAmdFftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);

                    if (err != CL_SUCCESS)

                      printf("Error with clAmdFftEnqueueTransform Backward\n");

                  }

                  /* Wait for calculations to be finished. */

                  err = clFinish(queue);

                  if (err != CL_SUCCESS)

                    printf("Error with clFinish\n");

             

                  /* Fetch results of calculation. */

                  err = clEnqueueReadBuffer( queue, bufferOut, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);

                  if (err != CL_SUCCESS)

                    printf("Error with clEnqueueReadBuffer\n");

             

                  // Assign tab with dest_tab

                  tab = (double*) dest_tab;

             

                  /* Release OpenCL memory objects. */

                  clReleaseMemObject(bufferIn);

                  clReleaseMemObject(bufferOut);

                  clReleaseMemObject(tmpBuffer);

             

                  /* Release the plan. */

                  err = clAmdFftDestroyPlan(&planHandle);

                  if (err != CL_SUCCESS)

                    printf("Error with clAmdFftDestroyPlan\n");

             

                  /* Release clFFT library. */

                  clAmdFftTeardown();

             

                  /* Release OpenCL working objects. */

                  clReleaseCommandQueue(queue);

                  clReleaseContext(context);

             

                  return ret;

                }

           

          But the creation of temporary buffer fails, I get the following error :

           

          Error with tmpBufferSize clAmdFftGetTmpBufSize

          Error with tmpBuffer clCreateBuffer

           

          I saw that plan sould be baked before the call of temporary buffer creation but despite of this, it doesn't work ...

           

          Anyone could see what's wrong ?

            • Re: need help to make equivalent of fftw_plan_r2r_3d with clAmdFft library
              amd_support

              Hi,

              You are getting this error because clFFT does not require any temporary buffer.

               

              The documentation of clAmdFftGetTmpBufSize() says that the temporary buffer size will be returned as 0 if no temp buffer is required. Since in the above code the temp buffer size is being returned as 0, subsequent clCreateBuffer is trying to create a buffer of size zero, and so both the errors you have listed are being generated.

               

              Please try without explicitly allocating temporary buffer. The run-time automatically creates temporary buffer if it is required.

               

              Thanks,

              AMD Support.