3 Replies Latest reply on Aug 10, 2011 11:06 AM by genaganna

    Type double problem

    thamerhatem

      Hi, I'm trying use double type in openCL, but doesn't work anyway, i want use double for more precision, if have any other type make this, please, tell me.

      if you don't have time for read my code, resuming is: I use double inside of kernel source and in main code, i tried other things like double_t, float_t, double2, half... nothing work.

      #pragma OPENCL EXTENSION cl_amd_fp64 : enable #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <OpenCL/opencl.h> //////////////////////////////////////////////////////////////////////////////// // Use a static data size for simplicity // #define DATA_SIZE (1000000) //////////////////////////////////////////////////////////////////////////////// #define TIPO cl_double // Simple compute kernel that computes the calcpi of an input array. [1] // const char *KernelSource = "\n" \ "#pragma OPENCL EXTENSION cl_amd_fp64 : enable \n" \ "__kernel void calcpi( \n" \ " __global double* input, \n" \ " __global double* output, \n" \ " const unsigned int count) \n" \ "{ \n" \ " int i = get_global_id(0); \n" \ " double z = get_global_id(0)*2+1; \n" \ " if(i < count) \n" \ " output[i] = 4.0/z; \n" \ "} \n" \ "\n"; //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { int err; // error code returned from api calls //printf("%d",sizeof(TIPO)); //scanf("%d",&err); TIPO data[2]; // original data set given to device TIPO *results = malloc(sizeof(TIPO)*DATA_SIZE); // results returned from device //unsigned int correct; // number of correct results returned //printf("TESTE"); size_t global; // global domain size for our calculation size_t local; // local domain size for our calculation cl_device_id device_id; // device ID cl_context context; // context cl_command_queue queue; // command queue cl_program program; // program cl_kernel kernel; // kernel cl_mem input; // device memory used for the input array cl_mem output; // device memory used for the output array // Get data on which to operate // //int i = 0; //int n = 3; unsigned int count = DATA_SIZE; //for(i = 0; i < count; i+=2) { //data[i] = n; //n += 2; //} //printf("TESTE"); // Get an ID for the device [2] int gpu = 1; err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1,&device_id, NULL); if (err != CL_SUCCESS) printf("ERROR CLGETDEVICEIDS!\n"); // [3] // Create a context [4] // context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context) { printf("ERROR CONTEXT\n"); } // Create a command queue [5] // queue = clCreateCommandQueue(context, device_id, 0, &err); if (!queue) { printf("ERROR QUEUE\n"); } // Create the compute program from the source buffer [6] // program = clCreateProgramWithSource(context, 1,(const char **) & KernelSource, NULL, &err); if ( !program) { printf("ERROR PROGRAM\n"); } // Build the program executable [7] // err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable\n"); //[8] clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,sizeof(buffer), buffer, &len); printf("%s\n", buffer); exit(1); } // Create the compute kernel in the program we wish to run [9] // kernel = clCreateKernel(program, "calcpi", &err); if (!kernel || err != CL_SUCCESS) { printf("ERROR KERNEL OR CL_SUCESS\n"); } // Create the input and output arrays in device memory for our calculation // [10] input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(TIPO) *count,NULL, NULL); output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(TIPO) *count,NULL, NULL); if (!input || !output) { printf("ERROR !INPUT OR !OUTPUT\n"); } // Write our data set into the input array in device memory [11] // err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0,sizeof(TIPO) *2, data, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("ERROR WRITE OUR DATA\n"); } // Set the arguments to our compute kernel [12] // err = 0; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); if (err != CL_SUCCESS) { printf("ERROR ARGUMENTS COMPUTE KERNEL - ERROR NUMBER: %d\n",err); exit(1); } // Get the maximum work-group size for executing the kernel on the device // [13] err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t), &local, NULL); if (err != CL_SUCCESS) { printf("ERROR MAXIMUM WORK-GROUP - ERROR NUMBER: %d\n",err); exit(1); } // Execute the kernel over the entire range of the data set [14] // global = count; //printf("TESTE"); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL,0, NULL, NULL); if (err) { printf("ERROR EXECUTE KERNEL - ERROR NUMBER: %d\n",err); printf("ERROS: CL_INVALID_PROGRAM_EXECUTABLE %d\n", CL_INVALID_PROGRAM_EXECUTABLE); printf("ERROS: CL_INVALID_COMMAND_QUEUE %d\n", CL_INVALID_COMMAND_QUEUE); printf("ERROS: CL_INVALID_KERNEL %d\n", CL_INVALID_KERNEL); printf("ERROS: CL_INVALID_CONTEXT %d\n", CL_INVALID_CONTEXT); printf("ERROS: CL_INVALID_KERNEL_ARGS %d\n", CL_INVALID_KERNEL_ARGS); printf("ERROS: CL_INVALID_WORK_DIMENSION %d\n", CL_INVALID_WORK_DIMENSION); printf("ERROS: CL_INVALID_WORK_GROUP_SIZE %d\n", CL_INVALID_WORK_GROUP_SIZE); printf("ERROS: CL_MEM_OBJECT_ALLOCATION_FAILURE %d\n", CL_MEM_OBJECT_ALLOCATION_FAILURE); printf("ERROS: CL_INVALID_WORK_ITEM_SIZE %d\n", CL_INVALID_WORK_ITEM_SIZE); printf("ERROS: CL_INVALID_GLOBAL_OFFSET %d\n", CL_INVALID_GLOBAL_OFFSET); printf("ERROS: CL_OUT_OF_RESOURCES %d\n", CL_OUT_OF_RESOURCES); printf("ERROS: CL_INVALID_EVENT_WAIT_LIST %d\n", CL_INVALID_EVENT_WAIT_LIST); printf("ERROS: CL_OUT_OF_HOST_MEMORY %d\n", CL_OUT_OF_HOST_MEMORY); exit(1); } // Wait for the command queue to get serviced before reading back results // [15] clFinish(queue); // Read the results from the device [16] // err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,sizeof(TIPO) *count, results, 0, NULL, NULL ); if (err != CL_SUCCESS) { printf("ERROR READ RESULTS - ERROR NUMBER: %d\n",err); } //printf("TESTE"); TIPO pi = 0.0; int i; for (i=0;i<count-1;i++) { //printf("%f",results[i]); pi += (pow(-1.0,i)) * (TIPO) results[i]; //pi = (TIPO) results[i]; //printf("casa %d deu: %1.50f\n",i,pi); //printf("%f",(pow(-1,i))); //pi += (pow(-1.0,i)); } printf("PI: %1.50lf",pi); // Shut down and clean up // clReleaseMemObject(input); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseContext(context); scanf("%d",&i); return 0; }

        • Type double problem
          genaganna

           

          Originally posted by: thamerhatem Hi, I'm trying use double type in openCL, but doesn't work anyway, i want use double for more precision, if have any other type make this, please, tell me. if you don't have time for read my code, resuming is: I use double inside of kernel source and in main code, i tried other things like double_t, float_t, double2, half... nothing work.

           



          What error you are getting?  It looks like you are not able to compile your C code.

          Please remove following statement from top 

          #pragma OPENCL EXTENSION cl_amd_fp64 : enable

          • Type double problem
            thamerhatem

            when i put in kernelSource:

            output = 4.0;

             

            only like this, i get 512.000123023986816406250000000000000000000000000 

             

            in results..

            or 1.0 = 0.00781250184809323400259017944335937500000000000

            i don't understand the problem

            p.p1 {margin: 0.0px 0.0px 0.0px 28.0px; text-indent: -28.0px; font: 11.0px Menlo}

            p.p1 {margin: 0.0px 0.0px 0.0px 28.0px; text-indent: -28.0px; font: 11.0px Menlo}

             

            genaganna: sorry.. this line was an attempt, but is not the problem



             

              • Type double problem
                genaganna

                 

                Originally posted by: thamerhatem when i put in kernelSource:

                 

                output = 4.0;

                 

                 

                only like this, i get 512.000123023986816406250000000000000000000000000 

                 

                in results..

                 

                or 1.0 = 0.00781250184809323400259017944335937500000000000

                 

                i don't understand the problem

                 

                p.p1 {margin: 0.0px 0.0px 0.0px 28.0px; text-indent: -28.0px; font: 11.0px Menlo}

                 

                p.p1 {margin: 0.0px 0.0px 0.0px 28.0px; text-indent: -28.0px; font: 11.0px Menlo}

                 

                 

                 

                genaganna: sorry.. this line was an attempt, but is not the problem

                 

                 



                Thamerhatem,

                You must get platform id before creating context or getting device ids.  Could you please give me following information also?

                       OS, SDK version, Driver vesion, CPU and GPU.

                 

                Please look MatrixMulDouble sample where matrix multiplication done for double type.