Hello,
I am working on a code which performs 3D FFT on 2 arrays (of dimension Ng*Ng*Ng) with fftw3 libray. The part of this code that interests me is this one :
fftw_plan fft_green, fft_potential;
// Allocate grid storage variables
if (!(green = calloc(pow(Ng,3),sizeof(double)))) {
fprintf(stderr,"Unable to allocate space for Green's function.\n");
return -1;
}
if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {
fprintf(stderr,"Unable to allocate space for potential buffer.\n");
return -1;
}
//Allocate the fftw complex output value and the fftw dft plan.
fft_potential = fftw_plan_r2r_3d(Ng,Ng,Ng,potential,potential,FFTW_REDFT00,
FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);
fft_green = fftw_plan_r2r_3d(Ng,Ng,Ng,green,green,FFTW_REDFT00,
FFTW_REDFT00,FFTW_REDFT00,FFTW_ESTIMATE);
// Perform the fourier transforms. Density first, Green's function second.
fftw_execute(fft_potential);
fftw_execute(fft_green);
// do stuff on potential array ...
// Inversion
fftw_execute(fft_potential);
I try to do the equivalent thing with OpenCL 3D FFT. For this, I use the clAmdFft library.
Firstly, I don't understand why the second "fftw_execute(fft_potential)" call performs an inverse FFT. There is no specified "backward" option above in fftw_plan_r2r_3d.
Secondly, to make the equivalent of this part of code, I have done the following routine :
/////////////////////////////////////
// OpenCL FFT 3D function ///////////
/////////////////////////////////////
int fft_opencl(double *tab)
{
// OpenCL variables
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
cl_context ctx = 0;
cl_command_queue queue = 0;
cl_mem buffer;
cl_event event = NULL;
int ret = 0;
// size of vectors
size_t N = Ng/2;
// Output array
float *dest_tab;
/* FFT library realted declarations */
clAmdFftPlanHandle planHandle;
clAmdFftDim dim = CLFFT_3D;
size_t clLengths[3] = {N, N, N};
/* Setup OpenCL environment. */
err = clGetPlatformIDs( 1, &platform, NULL);
if (err != CL_SUCCESS)
printf("Error with clGetPlatformIDs\n");
err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err != CL_SUCCESS)
printf("Error with clGetDeviceIDs\n");
props[1] = (cl_context_properties)platform;
ctx = clCreateContext( props, 1, &device, NULL, NULL, &err);
queue = clCreateCommandQueue( ctx, device, 0, &err);
/* Setup clFFT. */
clAmdFftSetupData fftSetup;
err = clAmdFftInitSetupData(&fftSetup);
if (err != CL_SUCCESS)
printf("Error with clAmdFftInitSetupData\n");
err = clAmdFftSetup(&fftSetup);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetup\n");
// Output array
dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));
/* Prepare OpenCL memory objects and place data inside them. */
buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0,
Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);
if (err != CL_SUCCESS)
printf("Error with clEnqueueWriteBuffer\n");
/* Create a default plan for a complex FFT. */
err = clAmdFftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
if (err != CL_SUCCESS)
printf("Error with clAmdFftCreateDefaultPlan\n");
/* Set plan parameters. */
err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetPlanPrecision\n");
//err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetLayout\n");
err = clAmdFftSetResultLocation(planHandle, CLFFT_INPLACE);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetResultLocation\n");
/* Bake the plan. */
err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);
if (err != CL_SUCCESS)
printf("Error with clAmdFftBakePlan\n");
/* Execute the plan. */
err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &buffer, NULL, NULL);
if (err != CL_SUCCESS)
printf("Error with clAmdFftEnqueueTransform\n");
/* Wait for calculations to be finished. */
err = clFinish(queue);
if (err != CL_SUCCESS)
printf("Error with clFinish\n");
/* Fetch results of calculations. */
err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);
if (err != CL_SUCCESS)
printf("Error with clEnqueueReadBuffer\n");
// Assign tab with dest_tab
tab = (double*) dest_tab;
/* Release OpenCL memory objects. */
clReleaseMemObject(buffer);
/* Release the plan. */
err = clAmdFftDestroyPlan(&planHandle);
if (err != CL_SUCCESS)
printf("Error with clAmdFftDestroyPlan\n");
/* Release clFFT library. */
clAmdFftTeardown();
/* Release OpenCL working objects. */
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
return ret;
}
and I call it from the main source (like at the beginning) this way :
// Allocate grid storage variables
if (!(green = calloc(pow(Ng,3),sizeof(double)))) {
fprintf(stderr,"Unable to allocate space for Green's function.\n");
return -1;
}
if (!(potential = calloc(pow(Ng,3),sizeof(double)))) {
fprintf(stderr,"Unable to allocate space for potential buffer.\n");
return -1;
}
// Perform the fourier transforms. Density first, Green's function second.
fft_opencl(potential);
fft_opencl(green);
// do stuff on potential array ...
// Inversion
fft_opencl(potential);
But the output "potential" array contains null values.
As fft_plan_r2_r_3d manipulates arrays of dimension Ng*Ng*Ng, does the real to real FFT need to have "size_t clLengths[3] = {N, N, N};" with N=Ng/2 ? (I did this because of the example with 1D FFT : clMathLibraries/clFFT · GitHub )
I think this type of FFT only haves real values, so I should have half of the points.
Finally, I am not sure that my second call to "fft_opencl" makes the inverse FFT. How to specify it into my routine ?
Any help would be great !
I have done progress. Actually, I didn't know that I had to do two different plans with clAmdFftSetLayout and then specify backward and forward transforms with CLFFT_BACKWARD and CLFFT_FORWARD into clAmdFftEnqueueTransform.
I try to use temporary buffer, you can see my last routine :
/////////////////////////////////////
// OpenCL FFT 3D function ///////////
/////////////////////////////////////
int FftOpenCL(double *tab, const char* direction)
{
// OpenCL variables
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
cl_context context = 0;
cl_command_queue queue = 0;
cl_mem bufferIn = 0;
cl_mem bufferOut = 0;
cl_mem tmpBuffer = 0;
int ret = 0;
// size of vectors
size_t N = Ng/2;
// size of temp buffer
size_t tmpBufferSize = 0;
// Output array
float *dest_tab;
/* FFT library realted declarations */
clAmdFftPlanHandle planHandle;
clAmdFftDim dim = CLFFT_3D;
size_t clLengths[3] = {N, N, N};
/* Setup OpenCL environment. */
err = clGetPlatformIDs( 1, &platform, NULL);
if (err != CL_SUCCESS)
printf("Error with clGetPlatformIDs\n");
err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err != CL_SUCCESS)
printf("Error with clGetDeviceIDs\n");
props[1] = (cl_context_properties)platform;
context = clCreateContext( props, 1, &device, NULL, NULL, &err);
queue = clCreateCommandQueue( context, device, 0, &err);
/* Setup clFFT. */
clAmdFftSetupData fftSetup;
err = clAmdFftInitSetupData(&fftSetup);
if (err != CL_SUCCESS)
printf("Error with clAmdFftInitSetupData\n");
err = clAmdFftSetup(&fftSetup);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetup\n");
// Output array
dest_tab = (float*) malloc(Ng*Ng*Ng * sizeof(float));
/* Create a default plan for a complex FFT. */
err = clAmdFftCreateDefaultPlan(&planHandle, context, dim, clLengths);
if (err != CL_SUCCESS)
printf("Error with clAmdFftCreateDefaultPlan\n");
/* Set plan parameters. */
err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetPlanPrecision\n");
err = clAmdFftSetResultLocation(planHandle, CLFFT_OUTOFPLACE);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetResultLocation\n");
if (!strcmp(direction,"forward")) {
err = clAmdFftSetLayout(planHandle, CLFFT_REAL, CLFFT_COMPLEX_INTERLEAVED);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetLayout Forward\n");
}
else if (!strcmp(direction,"backward")) {
err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_REAL);
if (err != CL_SUCCESS)
printf("Error with clAmdFftSetLayout Backward\n");
}
/* Bake the plan. */
err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);
if (err != CL_SUCCESS)
printf("Error with clAmdFftBakePlan\n");
/* Prepare OpenCL memory objects : create buffer for input. */
bufferIn = clCreateBuffer( context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);
if (err != CL_SUCCESS)
printf("Error with bufferIn clCreateBuffer\n");
/* Enqueue write tab array into bufferIn. */
err = clEnqueueWriteBuffer(queue, bufferIn, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), tab, 0, NULL, NULL);
if (err != CL_SUCCESS)
printf("Error with bufferIn clEnqueueWriteBuffer\n");
/* Prepare OpenCL memory objects : create buffer for output. */
bufferOut = clCreateBuffer(context, CL_MEM_READ_WRITE, Ng*Ng*Ng * sizeof(float), NULL, &err);
if (err != CL_SUCCESS)
printf("Error with bufferOut clCreateBuffer\n");
/* Create temporary buffer. */
err = clAmdFftGetTmpBufSize (planHandle, &tmpBufferSize);
if (err != CL_SUCCESS)
printf("Error with clAmdFftGetTmpBufSize\n");
if (tmpBufferSize == 0)
printf("Error with tmpBufferSize clAmdFftGetTmpBufSize\n");
tmpBuffer = clCreateBuffer (context, CL_MEM_READ_WRITE, tmpBufferSize, 0, &err);
if (err != CL_SUCCESS)
printf("Error with tmpBuffer clCreateBuffer\n");
/* Execute the plan. */
if (!strcmp(direction,"forward")) {
err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);
if (err != CL_SUCCESS)
printf("Error with clAmdFftEnqueueTransform Forward\n");
}
else if (!strcmp(direction,"backward")) {
err = clAmdFftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &queue, 0, 0, NULL, &bufferIn, &bufferOut, tmpBuffer);
if (err != CL_SUCCESS)
printf("Error with clAmdFftEnqueueTransform Backward\n");
}
/* Wait for calculations to be finished. */
err = clFinish(queue);
if (err != CL_SUCCESS)
printf("Error with clFinish\n");
/* Fetch results of calculation. */
err = clEnqueueReadBuffer( queue, bufferOut, CL_TRUE, 0, Ng*Ng*Ng * sizeof(float), dest_tab, 0, NULL, NULL);
if (err != CL_SUCCESS)
printf("Error with clEnqueueReadBuffer\n");
// Assign tab with dest_tab
tab = (double*) dest_tab;
/* Release OpenCL memory objects. */
clReleaseMemObject(bufferIn);
clReleaseMemObject(bufferOut);
clReleaseMemObject(tmpBuffer);
/* Release the plan. */
err = clAmdFftDestroyPlan(&planHandle);
if (err != CL_SUCCESS)
printf("Error with clAmdFftDestroyPlan\n");
/* Release clFFT library. */
clAmdFftTeardown();
/* Release OpenCL working objects. */
clReleaseCommandQueue(queue);
clReleaseContext(context);
return ret;
}
But the creation of temporary buffer fails, I get the following error :
Error with tmpBufferSize clAmdFftGetTmpBufSize
Error with tmpBuffer clCreateBuffer
I saw that plan sould be baked before the call of temporary buffer creation but despite of this, it doesn't work ...
Anyone could see what's wrong ?
Hi,
You are getting this error because clFFT does not require any temporary buffer.
The documentation of clAmdFftGetTmpBufSize() says that the temporary buffer size will be returned as 0 if no temp buffer is required. Since in the above code the temp buffer size is being returned as 0, subsequent clCreateBuffer is trying to create a buffer of size zero, and so both the errors you have listed are being generated.
Please try without explicitly allocating temporary buffer. The run-time automatically creates temporary buffer if it is required.
Thanks,
AMD Support.