thamerhatem

Type double problem

Discussion created by thamerhatem on Aug 9, 2011
Latest reply on Aug 10, 2011 by genaganna

Hi, I'm trying use double type in openCL, but doesn't work anyway, i want use double for more precision, if have any other type make this, please, tell me.

if you don't have time for read my code, resuming is: I use double inside of kernel source and in main code, i tried other things like double_t, float_t, double2, half... nothing work.

#pragma OPENCL EXTENSION cl_amd_fp64 : enable #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <OpenCL/opencl.h> //////////////////////////////////////////////////////////////////////////////// // Use a static data size for simplicity // #define DATA_SIZE (1000000) //////////////////////////////////////////////////////////////////////////////// #define TIPO cl_double // Simple compute kernel that computes the calcpi of an input array. [1] // const char *KernelSource = "\n" \ "#pragma OPENCL EXTENSION cl_amd_fp64 : enable \n" \ "__kernel void calcpi( \n" \ " __global double* input, \n" \ " __global double* output, \n" \ " const unsigned int count) \n" \ "{ \n" \ " int i = get_global_id(0); \n" \ " double z = get_global_id(0)*2+1; \n" \ " if(i < count) \n" \ " output[i] = 4.0/z; \n" \ "} \n" \ "\n"; //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { int err; // error code returned from api calls //printf("%d",sizeof(TIPO)); //scanf("%d",&err); TIPO data[2]; // original data set given to device TIPO *results = malloc(sizeof(TIPO)*DATA_SIZE); // results returned from device //unsigned int correct; // number of correct results returned //printf("TESTE"); size_t global; // global domain size for our calculation size_t local; // local domain size for our calculation cl_device_id device_id; // device ID cl_context context; // context cl_command_queue queue; // command queue cl_program program; // program cl_kernel kernel; // kernel cl_mem input; // device memory used for the input array cl_mem output; // device memory used for the output array // Get data on which to operate // //int i = 0; //int n = 3; unsigned int count = DATA_SIZE; //for(i = 0; i < count; i+=2) { //data[i] = n; //n += 2; //} //printf("TESTE"); // Get an ID for the device [2] int gpu = 1; err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1,&device_id, NULL); if (err != CL_SUCCESS) printf("ERROR CLGETDEVICEIDS!\n"); // [3] // Create a context [4] // context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context) { printf("ERROR CONTEXT\n"); } // Create a command queue [5] // queue = clCreateCommandQueue(context, device_id, 0, &err); if (!queue) { printf("ERROR QUEUE\n"); } // Create the compute program from the source buffer [6] // program = clCreateProgramWithSource(context, 1,(const char **) & KernelSource, NULL, &err); if ( !program) { printf("ERROR PROGRAM\n"); } // Build the program executable [7] // err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable\n"); //[8] clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,sizeof(buffer), buffer, &len); printf("%s\n", buffer); exit(1); } // Create the compute kernel in the program we wish to run [9] // kernel = clCreateKernel(program, "calcpi", &err); if (!kernel || err != CL_SUCCESS) { printf("ERROR KERNEL OR CL_SUCESS\n"); } // Create the input and output arrays in device memory for our calculation // [10] input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(TIPO) *count,NULL, NULL); output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(TIPO) *count,NULL, NULL); if (!input || !output) { printf("ERROR !INPUT OR !OUTPUT\n"); } // Write our data set into the input array in device memory [11] // err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0,sizeof(TIPO) *2, data, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("ERROR WRITE OUR DATA\n"); } // Set the arguments to our compute kernel [12] // err = 0; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); if (err != CL_SUCCESS) { printf("ERROR ARGUMENTS COMPUTE KERNEL - ERROR NUMBER: %d\n",err); exit(1); } // Get the maximum work-group size for executing the kernel on the device // [13] err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t), &local, NULL); if (err != CL_SUCCESS) { printf("ERROR MAXIMUM WORK-GROUP - ERROR NUMBER: %d\n",err); exit(1); } // Execute the kernel over the entire range of the data set [14] // global = count; //printf("TESTE"); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL,0, NULL, NULL); if (err) { printf("ERROR EXECUTE KERNEL - ERROR NUMBER: %d\n",err); printf("ERROS: CL_INVALID_PROGRAM_EXECUTABLE %d\n", CL_INVALID_PROGRAM_EXECUTABLE); printf("ERROS: CL_INVALID_COMMAND_QUEUE %d\n", CL_INVALID_COMMAND_QUEUE); printf("ERROS: CL_INVALID_KERNEL %d\n", CL_INVALID_KERNEL); printf("ERROS: CL_INVALID_CONTEXT %d\n", CL_INVALID_CONTEXT); printf("ERROS: CL_INVALID_KERNEL_ARGS %d\n", CL_INVALID_KERNEL_ARGS); printf("ERROS: CL_INVALID_WORK_DIMENSION %d\n", CL_INVALID_WORK_DIMENSION); printf("ERROS: CL_INVALID_WORK_GROUP_SIZE %d\n", CL_INVALID_WORK_GROUP_SIZE); printf("ERROS: CL_MEM_OBJECT_ALLOCATION_FAILURE %d\n", CL_MEM_OBJECT_ALLOCATION_FAILURE); printf("ERROS: CL_INVALID_WORK_ITEM_SIZE %d\n", CL_INVALID_WORK_ITEM_SIZE); printf("ERROS: CL_INVALID_GLOBAL_OFFSET %d\n", CL_INVALID_GLOBAL_OFFSET); printf("ERROS: CL_OUT_OF_RESOURCES %d\n", CL_OUT_OF_RESOURCES); printf("ERROS: CL_INVALID_EVENT_WAIT_LIST %d\n", CL_INVALID_EVENT_WAIT_LIST); printf("ERROS: CL_OUT_OF_HOST_MEMORY %d\n", CL_OUT_OF_HOST_MEMORY); exit(1); } // Wait for the command queue to get serviced before reading back results // [15] clFinish(queue); // Read the results from the device [16] // err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,sizeof(TIPO) *count, results, 0, NULL, NULL ); if (err != CL_SUCCESS) { printf("ERROR READ RESULTS - ERROR NUMBER: %d\n",err); } //printf("TESTE"); TIPO pi = 0.0; int i; for (i=0;i<count-1;i++) { //printf("%f",results[i]); pi += (pow(-1.0,i)) * (TIPO) results[i]; //pi = (TIPO) results[i]; //printf("casa %d deu: %1.50f\n",i,pi); //printf("%f",(pow(-1,i))); //pi += (pow(-1.0,i)); } printf("PI: %1.50lf",pi); // Shut down and clean up // clReleaseMemObject(input); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseContext(context); scanf("%d",&i); return 0; }

Outcomes