13 Replies Latest reply on Mar 25, 2010 6:58 AM by Mugga

    CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine

    Mugga

      Hello!

       

      i am currently trying to get my opencl project running on my new ATI radeon 5770. The program terminates at clBuildProgram with the following error:

      Found 1 platform(s).
      1: Advanced Micro Devices, Inc.
      clBuildProgram failed.: CL_BUILD_PROGRAM_FAILURE

       

      The application runs perfectly with a Nvidia card. The program also runs perfectly, when i choose the CPU as device type - only on device type GPU, this error occurs. When i compile manually, the compiliation runs without errors.

       

      Any ideas?

      Greetings from Germany,

      Chris

        • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
          omkaranathan

          Chirs,

          Use clGetProgramBuildInfo API call to get the buildlog of the kernel compilation.

          You can also use SKA to compile your kernel and check for errors, if you are in Windows.

          Here is a sample code to get the buildlog

           

          /* create a cl program executable for all the devices specified */ status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); //error checking code if(!sampleCommon->checkVal(status,CL_SUCCESS,"clBuildProgram failed.")) { //print kernel compilation error char programLog[1024]; status = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 1024, programLog, 0); std::cout<<programLog<<std::endl; return 0; }

            • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
              Mugga

              Thanks!

               

              I got a Link Failed error and i already found out from where it comes. Hence, my next question

              Why doesn't this example link? (I know the performance is ugly, but this is just the first try for me!)

              The problem is the "while"....

              __kernel void InterpolateSplineFull( __global float* interpolates, __global float* all, const uint arraySize) { int localId = get_local_id(0); int l=0, r=arraySize-1, m; while (l != (r-1)) { m = (l+r)/2; if (all[(m*5)+4] > interpolates[localId]) { r=m; } else { l=m; } } float x = interpolates[localId] - all[(l*5)+4]; float result = all[l*5+0] + x * (all[l*5+1] + x * (all[l*5+2] + x * (all[l*5+3]))) ; interpolates[localId] = result; }

                • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                  omkaranathan

                  Chris,

                  I'm not able to reproduce your issue with 2.01 SDK. Please provide more information like, OS, SDK, driver etc.

                    • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                      Mugga

                      Hi!

                       

                      I am also working with SDK 2.01 on Debian 5.

                      >uname -a
                      Linux ix1049 2.6.26-2-686 #1 SMP Tue Mar 9 17:35:51 UTC 2010 i686 GNU/Linux

                      >lspci | grep VGA
                      01:00.0 VGA compatible controller: ATI Technologies Inc Device 68b8

                      > glxinfo | grep version
                      server glx version string: 1.4
                      client glx version string: 1.4
                      GLX version: 1.4
                      OpenGL version string: 3.2.9252 Compatibility Profile Context

                       

                      Anyone an idea? This issue keeps me awake and I think i am going insane....

                       

                      tankes,

                      chris

                        • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                          omkaranathan

                          Chris,

                          Please run the CAL sample 'FindNumDevice' and see if the drivers are installed correctly and the device is being detected properly. Please post the output of CLInfo sample too.

                            • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                              Mugga

                              Hi,

                               

                               ./FindNumDevices
                              Supported CAL Runtime Version: 1.3.185
                              Found CAL Runtime Version: 1.4.519
                              Use -? for help
                              CAL initialized.
                              Finding out number of devices :-
                              Device Count = 1
                              CAL shutdown successful.

                              Press enter to exit...

                                • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                                  omkaranathan

                                  Chris,

                                  Nothing wrong there. Could you post the host side code(cpp & header)?

                                    • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                                      Mugga

                                      this is the initialization part. Error occurs at clBuildProgram...

                                       

                                      void OpenClEngine::SetupContext(void) { cl_int status = 0; size_t deviceListSize; /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_uint numPlatforms; cl_platform_id platform = NULL; status=clGetPlatformIDs(0, NULL, &numPlatforms); check(status, "clGetPlatformIDs failed"); if (0 < numPlatforms) { cerr << "Found " << numPlatforms << " platform(s)." << endl; cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status=clGetPlatformIDs(numPlatforms, platforms, NULL); check(status, "clGetPlatformIDs failed"); for (unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf),pbuf, NULL); cerr << i+1 << ": " << pbuf << endl; check(status, "clGetPlatformInfo failed"); platform = platforms[i]; if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) { break; } } delete[] platforms; } /* * If we could find our platform, use it. Otherwise pass a NULL and get whatever the * implementation thinks we should be using. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; /* Use NULL for backward compatibility */ cl_context_properties* cprops = (NULL == platform) ? NULL : cps; context = clCreateContextFromType(cprops,CL_DEVICE_TYPE_CPU,NULL,NULL,&status); check(status, "clCreateContextFromType failed."); /* First, get the size of device list data */ status=clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); check(status, "clGetContextInfo failed."); /* Now allocate memory for device list based on the size we got earlier */ if ((devices = (cl_device_id *)malloc(deviceListSize)) == NULL) { cerr << ("Failed to allocate memory (devices).") << endl; throw OpenCL::ExecutionException(); } status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); check(status, "clGetContextInfo failed."); status = clGetDeviceInfo(devices[0],CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),(void *)&maxWorkGroupSize,NULL); check(status, "clGetDeviceInfo failed."); status = clGetDeviceInfo(devices[0],CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),(void *)&maxDimensions, NULL); check(status, "clGetDeviceInfo failed."); if ((maxWorkItemSizes = (size_t *)malloc(maxDimensions*sizeof(size_t))) == NULL) { cerr << "malloc: maxWorkItemSize" << endl; throw OpenCL::ExecutionException(); } status=clGetDeviceInfo(devices[0],CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*maxDimensions,(void *)maxWorkItemSizes, NULL); check(status, "clGetDeviceInfo failed."); status=clGetDeviceInfo(devices[0],CL_DEVICE_LOCAL_MEM_SIZE,sizeof(cl_ulong),(void *)&totalLocalMemory,NULL); check(status, "clGetDeviceInfo failed."); { /* The block is to move the declaration of prop closer to its use */ cl_command_queue_properties prop = 0; commandQueue = clCreateCommandQueue(context, devices[0], prop, &status); check(status, "clCreateCommandQueue failed."); } OpenCL::ClUtils kernelFile("kernels/SimilarityMatching.cl"); const char* source = kernelFile.source().c_str(); size_t sourceSize[] = { strlen(source) }; program = clCreateProgramWithSource(context, 1, &source, sourceSize, &status); check(status, "clCreateProgramWithSource failed"); /* create a cl program executable for all the devices specified */ status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); //check(status, "clBuildProgram failed."); { //print kernel compilation error char programLog[1024]; status = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 1024, programLog, 0); std::cout<<programLog<<std::endl; // return 0; } /* get a kernel object handle for a kernel with the given name */ kernelAngles = clCreateKernel(program, "CompareHeadingAngles" , &status); check(status, "clCreateKernel failed"); /* get a kernel object handle for a kernel with the given name */ kernelWhereabouts = clCreateKernel(program, "CompareSampledWhereabouts" , &status); check(status, "clCreateKernel failed"); /* get a kernel object handle for a kernel with the given name */ kernelEuclid = clCreateKernel(program, "CompareTrajectories" , &status); check(status, "clCreateKernel failed"); /* get a kernel object handle for a kernel with the given name */ kernelSplineInterpolationFull = clCreateKernel(program, "InterpolateSplineFull" , &status); check(status, "clCreateKernel failed"); }

                                        • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                                          omkaranathan

                                          Chris,

                                          Its better if you can post the whole code, or a simple standalone test case if the code is big, which will enable me to compile and reprouce the issue.

                                           

                                            • CL_BUILD_PROGRAM_FAILURE only on GPU - CPU runs fine
                                              Mugga

                                              Here:

                                              #include <CL/cl.h> #include <fstream> #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <string.h> #include <iostream> #include <cmath> using namespace std; namespace OpenCL{ class ExecutionException : public std::exception { public: virtual const char* what() const throw() { return "OpenCL Execution Exception"; } ExecutionException() {} ExecutionException(const char* str) { std::cerr << str << std::endl; } private: const char* message; }; class ClUtils { public: ClUtils(const char* filename) { open(filename); } std::string source_; const std::string& source() const { return source_; } bool open(const char* fileName) { size_t size; char* str; // Open file stream std::fstream f(fileName, (std::fstream::in | std::fstream::binary)); // Check if we have opened file stream if (f.is_open()) { size_t sizeFile; f.seekg(0, std::fstream::end); size = sizeFile = f.tellg(); f.seekg(0, std::fstream::beg); str = new char[size + 1]; if (!str) { f.close(); return NULL; } // Read file f.read(str, sizeFile); f.close(); str[size] = '\0'; source_ = str; return true; } return false; } }; const char* getOpenCLErrorCodeString(int errorCode) { switch(errorCode) { case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND"; case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE"; case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE"; case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES"; case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY"; case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE"; case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP"; case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH"; case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE"; case CL_MAP_FAILURE: return "CL_MAP_FAILURE"; case CL_INVALID_VALUE: return "CL_INVALID_VALUE"; case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE"; case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM"; case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE"; case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT"; case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES"; case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE"; case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR"; case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT"; case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE"; case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER"; case CL_INVALID_BINARY: return "CL_INVALID_BINARY"; case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS"; case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM"; case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE"; case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME"; case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION"; case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL"; case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX"; case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE"; case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE"; case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS"; case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION"; case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE"; case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE"; case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET"; case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST"; case CL_INVALID_EVENT: return "CL_INVALID_EVENT"; case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION"; case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT"; case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE"; case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL"; default: return "unknown error code"; } return "unknown error code"; } }; class OpenClEngine { cl_device_id *devices; /// CL device list cl_program program; /// CL program cl_kernel kernelSplineInterpolationFull; /// CL kenerl handle for the Full-Spline Interpolation Kernel size_t maxWorkGroupSize; /// Device Specific Information cl_uint maxDimensions; /// the maximum work items dimension size_t * maxWorkItemSizes; /// the maximum amount of work items on the device cl_ulong totalLocalMemory; /// the total local memory size of the device cl_ulong usedLocalMemory; /// the amount of used local memory of the device cl_ulong availableLocalMemory; /// the available amount of local memory of the device cl_ulong neededLocalMemory; /// the needed local memory of a specific kernel size_t kernelWorkGroupSize; /// Group Size returned by kernel public: cl_context context; /// CL context cl_command_queue commandQueue; /// CL command queue OpenClEngine(){ } void SetupContext() { cl_int status = 0; size_t deviceListSize; /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_uint numPlatforms; cl_platform_id platform = NULL; status=clGetPlatformIDs(0, NULL, &numPlatforms); check(status, "clGetPlatformIDs failed"); if (0 < numPlatforms) { cerr << "Found " << numPlatforms << " platform(s)." << endl; cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status=clGetPlatformIDs(numPlatforms, platforms, NULL); check(status, "clGetPlatformIDs failed"); for (unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf),pbuf, NULL); cerr << i+1 << ": " << pbuf << endl; check(status, "clGetPlatformInfo failed"); platform = platforms[i]; if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) { break; } } delete[] platforms; } /* * If we could find our platform, use it. Otherwise pass a NULL and get whatever the * implementation thinks we should be using. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; /* Use NULL for backward compatibility */ cl_context_properties* cprops = (NULL == platform) ? NULL : cps; context = clCreateContextFromType(cprops,CL_DEVICE_TYPE_GPU,NULL,NULL,&status); check(status, "clCreateContextFromType failed."); /* First, get the size of device list data */ status=clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); check(status, "clGetContextInfo failed."); /* Now allocate memory for device list based on the size we got earlier */ if ((devices = (cl_device_id *)malloc(deviceListSize)) == NULL) { cerr << ("Failed to allocate memory (devices).") << endl; throw OpenCL::ExecutionException(); } status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); check(status, "clGetContextInfo failed."); status = clGetDeviceInfo(devices[0],CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),(void *)&maxWorkGroupSize,NULL); check(status, "clGetDeviceInfo failed."); status = clGetDeviceInfo(devices[0],CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),(void *)&maxDimensions, NULL); check(status, "clGetDeviceInfo failed."); if ((maxWorkItemSizes = (size_t *)malloc(maxDimensions*sizeof(size_t))) == NULL) { cerr << "malloc: maxWorkItemSize" << endl; throw OpenCL::ExecutionException(); } status=clGetDeviceInfo(devices[0],CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*maxDimensions,(void *)maxWorkItemSizes, NULL); check(status, "clGetDeviceInfo failed."); status=clGetDeviceInfo(devices[0],CL_DEVICE_LOCAL_MEM_SIZE,sizeof(cl_ulong),(void *)&totalLocalMemory,NULL); check(status, "clGetDeviceInfo failed."); { /* The block is to move the declaration of prop closer to its use */ cl_command_queue_properties prop = 0; commandQueue = clCreateCommandQueue(context, devices[0], prop, &status); check(status, "clCreateCommandQueue failed."); } OpenCL::ClUtils kernelFile("kernels/SimilarityMatching.cl"); const char* source = kernelFile.source().c_str(); size_t sourceSize[] = { strlen(source) }; program = clCreateProgramWithSource(context, 1, &source, sourceSize, &status); check(status, "clCreateProgramWithSource failed"); /* create a cl program executable for all the devices specified */ status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); //check(status, "clBuildProgram failed."); { //print kernel compilation error char programLog[1024]; status = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 1024, programLog, 0); std::cout<<programLog<<std::endl; // return 0; } /* get a kernel object handle for a kernel with the given name */ kernelSplineInterpolationFull = clCreateKernel(program, "InterpolateSplineFull" , &status); check(status, "clCreateKernel failed"); } inline void check(cl_int status, const char* message) { if (status != CL_SUCCESS) { cerr << message << ": "; throw OpenCL::ExecutionException(OpenCL::getOpenCLErrorCodeString(status)); } } }; int main(void) { OpenClEngine *engine = new OpenClEngine(); engine->SetupContext(); }