8 Replies Latest reply on Jun 24, 2010 9:48 AM by timchist

    Link Error when using max_constant_size attribute on 4850

    timchist
      Link Error when using max_constant_size attribute on ATI 4850

      I'm trying to build the following kernel:

      __kernel void ConstDemo1(__global float* src, __global float* dst, int width, int height, __constant float *constDemoData __attribute__((max_constant_size(256))))
      {

          int x = get_global_id(0);
          if(x < width)
          {
              int y = get_global_id(1);
              if(y < height)
              {
                  int offset = y * width + x;
                  float sum = 0;
                  for(int i = 0; i < 20; i++)
                     sum += constDemoData[(offset + i) * 17 % 1000];
                  dst[offset] = sum;
              }
          }

      }

      When I call clBuildProgram(), it returns CL_SUCCESS. Then I extract binary using clGetProgramInfo(...CL_PROGRAM_BINARIES...), and create another cl_program object using clCreateProgramWithBinary. Now, when I call clBuildProgram() for this new program, it returns -11. clGetProgramBuildInfo(...CL_PROGRAM_BUILD_LOG...) returns

      Internal error: Link failed.
      Make sure the system setup is correct.

      When I remove the max_constant_size attribute, everything works fine. However, without the attribute it does not look that readings are cached (the performance is the same as if constDemoData was a __global pointer).

      I have an iMac 27" with ATI Radeon 4850. Driver version is 10.5 (8.732.0.0). ATI Stream SDK 2.1 64 bit. Windows 7 64 bit, a 32-bit application.

        • Link Error when using max_constant_size attribute on 4850timchist,
          omkaranathan

          timchist,

          Could you post the host side code too? Its easier to reproduce and track down the issue that way.

            • Link Error when using max_constant_size attribute on 4850timchist,
              timchist

              See the code attached.

              #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #include "CL/cl.h" #define CHECK_ERROR if(status != CL_SUCCESS) { printf("ERROR at %d: %d\n", __LINE__, status); exit(-9); } //------------------------------------------------------------------------------ static char* load(char* filename) { FILE* fin; #if _WIN32 if(fopen_s(&fin, filename ,"r")) fin = NULL; #else fin = fopen(filename ,"r"); #endif if(fin == NULL ) { return NULL; } char* string = NULL; int len = 0; while(!feof(fin)) { len++; string = (char*)realloc(string, len); string[len - 1] = fgetc(fin); } string[len - 1] = '\0'; fclose(fin); return string; } //------------------------------------------------------------------------------ cl_context_properties* getContextProperties(void) { // Have a look at the available platforms and pick either // the AMD one if available or a reasonable default. cl_uint numPlatforms; cl_platform_id platform = NULL; cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); CHECK_ERROR; if(numPlatforms > 0) { cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status = clGetPlatformIDs(numPlatforms, platforms, NULL); CHECK_ERROR; for(unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); CHECK_ERROR; platform = platforms[i]; if(!strcmp(pbuf, "Advanced Micro Devices, Inc.")) break; } delete[] platforms; } // If we could find our platform, use it. Otherwise pass a NULL and get whatever the // implementation thinks we should be using. cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; // Use NULL for backward compatibility if(platform == NULL) { return NULL; } cl_context_properties* ptr = (cl_context_properties*)malloc(sizeof(cl_context_properties ) * 3); for(int i = 0; i < 3; i++) ptr[i] = cps[i]; return ptr; } //------------------------------------------------------------------------------ int main(int argc, char** argv) { int gpu_build = 1; int initial_arg = 1; char* infile; char* outfile; if(argc < 2) { printf("Syntax [--gpu|--cpu] %s outfile infile [compilation arguments]\n", argv[0] ); exit(0); } if(argv[initial_arg][0] == '-') { if(!strcmp(argv[initial_arg], "--gpu")) { gpu_build = 1; initial_arg++; } else if(!strcmp(argv[initial_arg], "--cpu")) { gpu_build =0; initial_arg++; } else { printf("Unrecognised argument [%s]\n" , argv[initial_arg]); exit(1); } } outfile = argv[initial_arg++]; infile = argv[initial_arg++]; cl_int status = 0; cl_device_id* devices; size_t deviceListSize; size_t len; cl_context context; cl_context_properties* cprops = getContextProperties(); if(!cprops) CHECK_ERROR("Didn't get a list of context properties\n"); if(gpu_build) context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); else context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_CPU, NULL, NULL, &status); CHECK_ERROR; status = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); CHECK_ERROR; if(deviceListSize == 0) exit(-8); devices = (cl_device_id*)malloc(deviceListSize); status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); CHECK_ERROR; status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); CHECK_ERROR; cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, &status); char *buildargs= NULL ; buildargs = (char*) malloc( 1 ); buildargs[0]='\0'; for(int i = initial_arg; i < argc; i++) { if(i < argc-1) { if(!strcmp("-D", argv[i]) || !strcmp("-I", argv[i])) { size_t len = strlen(buildargs) + strlen(argv[i]) + 2; buildargs = (char*)realloc(buildargs, len); #if _WIN32 strcat_s(buildargs, len, argv[i]); strcat_s(buildargs, len, " "); strcat_s(buildargs, len, "\0"); #else strcat(buildargs, argv[i]); strcat(buildargs, " "); strcat(buildargs, "\0"); #endif len = strlen(buildargs) + strlen(argv[i + 1]) + 2; buildargs =(char*)realloc(buildargs, len); #if _WIN32 strcat_s(buildargs, len, argv[i + 1]); strcat_s(buildargs, len, " "); strcat_s(buildargs, len, "\0"); #else strcat(buildargs, argv[i + 1]); strcat(buildargs, " "); strcat(buildargs, "\0"); #endif continue; } } if(argv[i][0] == '-') { size_t len = strlen(buildargs) + strlen(argv[i]) + 2; buildargs = (char*)realloc(buildargs, len); #if _WIN32 strcat_s(buildargs, len, argv[i]); strcat_s(buildargs, len, " "); strcat_s(buildargs, len, "\0"); #else strcat(buildargs, argv[i]); strcat(buildargs, " "); strcat(buildargs, "\0"); #endif } } char* ptr; ptr = load(infile); if(ptr == NULL) { printf("Error loading file\n"); exit(-1); } size_t strsize[1]; strsize[0] = strlen( ptr ); cl_program program = clCreateProgramWithSource(context, 1, (const char**)&ptr, strsize, &status); CHECK_ERROR; status = clBuildProgram(program, 1, devices, buildargs, NULL, NULL); // get and print the build log clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &len); ptr = (char*)malloc(len + 1); clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, len, ptr, NULL); ptr[len] = '\0'; if(status != CL_SUCCESS) { printf("BUILD FAILED. Results follow:\n"); printf("%s\n", ptr); free(ptr); printf("--\n"); exit(-1); } printf("Compilation successful\n"); free(ptr); // now get the binary object status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 0, NULL, &len); size_t* sptr = (size_t*)malloc(sizeof(size_t) * len); memset(sptr, 0, sizeof(size_t)*len); status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, len, sptr, NULL); unsigned char *bin = (unsigned char*) malloc( sptr[0] ); status = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &len); CHECK_ERROR; ptr = (char*)malloc(len + 1); memset(ptr, 0, len); status = clGetProgramInfo(program, CL_PROGRAM_BINARIES, len, &bin, NULL); CHECK_ERROR; cl_int s2; len = sptr[0]; cl_program p2 = clCreateProgramWithBinary(context, 1, devices, &len, (const unsigned char**)&bin, &s2, &status); CHECK_ERROR; status = clBuildProgram(p2, 1, devices, NULL, NULL, NULL); // get and print the build log clGetProgramBuildInfo(p2, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &len); ptr = (char*)malloc(len + 1); clGetProgramBuildInfo(p2, devices[0], CL_PROGRAM_BUILD_LOG, len, ptr, NULL); ptr[len] = '\0'; if(status != CL_SUCCESS) { printf("TEST BUILD FAILED. Results follow:\n"); printf("%s\n", ptr); free(ptr); printf("--\n"); exit(-1); } printf("Test build succeeded\n"); free(ptr); FILE* fout; #if _WIN32 if(fopen_s(&fout, outfile, "w")) fout = NULL; #else fout = fopen(outfile, "w"); #endif fprintf(fout, "{\n"); for(unsigned int i = 0; i < sptr[0]; i++) { char c = bin[i]; fprintf(fout, "0x%02x", (unsigned char)bin[i]); if(i < sptr[0] - 1) fprintf(fout, ", "); if((i + 1) % 16 == 0) fprintf(fout, "\n"); } fprintf(fout, "\n};\n\n"); fclose(fout); return 0; }

                • Link Error when using max_constant_size attribute on 4850
                  timchist

                  I've removed all the unnecessary stuff from the code sample and also included the Open CL source code in the host code. Hopefully, this will simplify the error reproduction -- see the code attached.

                  On NVIDIA cards __constant params work fine and show significant performance increase when using them for passing small amount of frequently read information to kernels. On ATI 4850, however, this does not work and produce 'Link Error' even with the latest available driver and Stream SDK.

                  #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #include "CL/cl.h" #define CHECK_ERROR if(status != CL_SUCCESS) { printf("ERROR at %d: %d\n", __LINE__, status); exit(-1); } const char KernelSource[] = "__kernel void ConstDemo1(__global float* src, __global float* dst, int width, int height, __constant float *constDemoData __attribute__((max_constant_size(256))))\n" "{ dst[0] = src[0]; \n" "}"; //------------------------------------------------------------------------------ cl_context_properties* getContextProperties(void) { // Have a look at the available platforms and pick either // the AMD one if available or a reasonable default. cl_uint numPlatforms; cl_platform_id platform = NULL; cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); CHECK_ERROR; if(numPlatforms > 0) { cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status = clGetPlatformIDs(numPlatforms, platforms, NULL); CHECK_ERROR; for(unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); CHECK_ERROR; platform = platforms[i]; if(!strcmp(pbuf, "Advanced Micro Devices, Inc.")) break; } delete[] platforms; } // If we could find our platform, use it. Otherwise pass a NULL and get whatever the // implementation thinks we should be using. cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; // Use NULL for backward compatibility if(platform == NULL) { return NULL; } cl_context_properties* ptr = (cl_context_properties*)malloc(sizeof(cl_context_properties) * 3); for(int i = 0; i < 3; i++) ptr[i] = cps[i]; return ptr; } //------------------------------------------------------------------------------ int main(int argc, char** argv) { cl_int status = 0; cl_device_id* devices; size_t deviceListSize; size_t len; cl_context context; cl_context_properties* cprops = getContextProperties(); if(!cprops) CHECK_ERROR("Didn't get a list of context properties\n"); context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); CHECK_ERROR; status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); CHECK_ERROR; if(deviceListSize == 0) exit(-1); devices = (cl_device_id*)malloc(deviceListSize); status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); CHECK_ERROR; status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); CHECK_ERROR; cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, &status); size_t strsize[1]; strsize[0] = strlen(KernelSource); char* ptr = (char*)malloc(strsize[0] + 1); strcpy(ptr, KernelSource); cl_program program = clCreateProgramWithSource(context, 1, (const char**)&ptr, strsize, &status); free(ptr); CHECK_ERROR; status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); // get and print the build log clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &len); ptr = (char*)malloc(len + 1); clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, len, ptr, NULL); ptr[len] = '\0'; if(status != CL_SUCCESS) { printf("BUILD FAILED. Results follow:\n"); printf("%s\n", ptr); free(ptr); printf("--\n"); exit(-1); } printf("Compilation successful\n"); free(ptr); // now get the binary object status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 0, NULL, &len); size_t* sptr = (size_t*)malloc(sizeof(size_t) * len); memset(sptr, 0, sizeof(size_t)*len); status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, len, sptr, NULL); unsigned char *bin = (unsigned char*) malloc(sptr[0]); status = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &len); CHECK_ERROR; ptr = (char*)malloc(len + 1); memset(ptr, 0, len); status = clGetProgramInfo(program, CL_PROGRAM_BINARIES, len, &bin, NULL); CHECK_ERROR; cl_int s2; len = sptr[0]; cl_program p2 = clCreateProgramWithBinary(context, 1, devices, &len, (const unsigned char**)&bin, &s2, &status); CHECK_ERROR; status = clBuildProgram(p2, 1, devices, NULL, NULL, NULL); // get and print the build log clGetProgramBuildInfo(p2, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &len); ptr = (char*)malloc(len + 1); clGetProgramBuildInfo(p2, devices[0], CL_PROGRAM_BUILD_LOG, len, ptr, NULL); ptr[len] = '\0'; if(status != CL_SUCCESS) { printf("TEST BUILD FAILED. Results follow:\n"); printf("%s\n", ptr); free(ptr); printf("--\n"); exit(-1); } printf("Test build succeeded\n"); free(ptr); return 0; }