6 Replies Latest reply on Apr 25, 2011 3:11 PM by MicahVillmow

    Printf on x86_64 - doesn't work propery

    sir.um
      First 3 parameters resolve to consistent Random values

      Edit: I accidentaly hit the Ctrl+s keyboard shortcut before I finished explaining the problem and the forum program published an incomplete post, so sorry for anyone who viewed my post at that time. :-)

      System Specs:
      ------------------
      MinGW 64-bit Compiler
      OpenCL - ATI Stream SDK v2.2
      Windows 7 (x64)
      Intel Core 2 Quad Q6600
      Radeon HD 5870

      *I am Using the C++ bindings for the OpenCL Runtime

      ----------
      Problem
      ----------

      When using printf within an OpenCL kernel on an x86 CPU, the first 3 floating point numbers printed in a single printf() call, do not resolve to the accurate values stored in the provided variables. Instead they resolve in one of 2 ways:

      - If the floating point number is printed in scientific notation (i.e. %e or %g) the value resolves to a very small number. On the order of ___x10^-300. Although these numbers change every time you run the program, they are consistent within a single run of the program. (Ex) if you make 3 calls to printf in a single run of a given kernel, the first 3 floating point numbers of EVERY printf() call will resolve to the SAME set of numbers, even though this set will change the every time you run the program.

      - If the number is printed in decimal form (i.e. %f), these number resolve to zero every time.

      This problem, ONLY occurs when running kernels on the CPU. Printf Calls on the GPU work correctly. Additionally, within a CPU kernel, integers appear to be immune from this problem.

      Disabling OpenCL Optimizations on the OpenCL kernel compiler, has no effect.

      Example C++ code, OpenCL Kernel code, and program output are provided. This example runs a CPU Test and a GPU Test.

      thanks,
      -Chris

      /////////////////// /////////////////// ///// Output ///// /////////////////// /////////////////// OpenCL Initialized! -------------- CPU - Printf() -------------- %e_ s0: '2.410229e-316' s1: '4.396999e-316' s2: '1.942745e-316' s3: '4.000000e+000' -- s0: '1.000000e+000' s1: '2.000000e+000's2: '3.000000e+000' s3: '4.000000e+000' %f_ s0: '0.000000' s1: '0.000000' s2: '0.000000' s3: '4.000000' -- s0: '1.000000' s1: '2.000000' s2: '3.000000' s3: '4.000000' %i_ y: '2' y: '2' y: '2' y: '2' y: '2' y: '2' %e_ x: '2.410229e-316' x: '4.397002e-316' x: '1.942745e-316' x: '1.000000e+000' x: '1.000000e+000' x: '1.000000e+000' %f_ x: '0.000000' x: '0.000000' x: '0.000000' x: '1.000000' x: '1.000000' x: '1.000000' %i_ y: '2' y: '2' y: '2' y: '2' y: '2' y: '2' -------------- GPU - Printf() -------------- %e_ s0: '1.000000e+000' s1: '2.000000e+000' s2: '3.000000e+000' s3: '4.000000e+000' -- s0: '1.000000e+000' s1: '2.000000e+000' s2: '3.000000e+000' s3: '4.000000e+000' %f_ s0: '1.000000' s1: '2.000000' s2: '3.000000' s3: '4.000000' -- s0: '1.000000' s1: '2.000000' s2: '3.000000' s3: '4.000000' %i_ y: '2' y: '2' y: '2' y: '2' y: '2' y: '2' %e_ x: '1.000000e+000' x: '1.000000e+000' x: '1.000000e+000' x: '1.000000e+000' x: '1.000000e+000' x: '1.000000e+000' %f_ x: '1.000000' x: '1.000000' x: '1.000000' x: '1.000000' x: '1.000000' x: '1.000000' %i_ y: '2' y: '2' y: '2' y: '2' y: '2' y: '2' /////////////////////// /////////////////////// ///// kernels.cl ///// /////////////////////// /////////////////////// #pragma OPENCL EXTENSION cl_amd_printf : enable void do_work(float4 test); kernel void cpu_printf_test(float4 test) { printf("\n--------------\nCPU - Printf()\n--------------\n\n"); do_work(test); } kernel void gpu_printf_test(float4 test) { printf("\n--------------\nGPU - Printf()\n--------------\n\n"); do_work(test); } void do_work(float4 test) { printf("%%e_ s0: '%e' s1: '%e' s2: '%e' s3: '%e' -- s0: '%e' s1: '%e' s2: '%e' s3: '%e'\n", test.s0, test.s1, test.s2, test.s3, test.s0, test.s1, test.s2, test.s3); printf("%%f_ s0: '%f' s1: '%f' s2: '%f' s3: '%f' -- s0: '%f' s1: '%f' s2: '%f' s3: '%f'\n", test.s0, test.s1, test.s2, test.s3, test.s0, test.s1, test.s2, test.s3); int y = 2; printf("%%i_ y: '%i' y: '%i' y: '%i' y: '%i' y: '%i' y: '%i'\n", y, y, y, y, y, y); float x = 1; printf("%%e_ x: '%e' x: '%e' x: '%e' x: '%e' x: '%e' x: '%e'\n", x, x, x, x, x, x); printf("%%f_ x: '%f' x: '%f' x: '%f' x: '%f' x: '%f' x: '%f'\n", x, x, x, x, x, x); //Print y (int) Again printf("%%i_ y: '%i' y: '%i' y: '%i' y: '%i' y: '%i' y: '%i'\n", y, y, y, y, y, y); } /////////////////////////// /////////////////////////// ///// printfTest.cpp ///// /////////////////////////// /////////////////////////// /* * printfTest.cpp * * Created on: Nov 13, 2010 * Author: Chris Alexander */ //OpenCL Includes #define __NO_STD_VECTOR #include <CL/cl.hpp> //C++ API Includes #include <iostream> #include <fstream> using namespace std; using namespace cl; //#define DISABLE_CL_OPTIMIZATIONS void runDemo(Kernel kernel, CommandQueue * queue); Device * CPU_DEVICE; Device * GPU_DEVICE; Context * CPU_CONTEXT; Context * GPU_CONTEXT; CommandQueue * CPU_QUEUE; CommandQueue * GPU_QUEUE; Program * CPU_PROGRAM; Program * GPU_PROGRAM; int main() { //--OpenCL Init cl_int err = CL_SUCCESS; //Request Supported Platforms from OpenCL cl::vector<Platform> platformList; err = Platform::get(&platformList); Platform platform = platformList[0]; cl_context_properties cprops[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platform)(), 0}; //Query OpenCL for present devices (I.e. CPU, GPU, etc.) cl::vector<Device> & CPU_DEVICES = *new cl::vector<Device>(); cl::vector<Device> & GPU_DEVICES = *new cl::vector<Device>(); err = platform.getDevices(CL_DEVICE_TYPE_CPU, &CPU_DEVICES); err = platform.getDevices(CL_DEVICE_TYPE_GPU, &GPU_DEVICES); CPU_DEVICE = &CPU_DEVICES[0]; GPU_DEVICE = &GPU_DEVICES[0]; //Create Contexts CPU_CONTEXT = new Context(CPU_DEVICES, cprops, NULL, NULL, &err); GPU_CONTEXT = new Context(GPU_DEVICES, cprops, NULL, NULL, &err); //Create Queues CPU_QUEUE = new CommandQueue(*CPU_CONTEXT, *CPU_DEVICE, 0, &err); GPU_QUEUE = new CommandQueue(*GPU_CONTEXT, *GPU_DEVICE, 0, &err); //Load Kernel from file into String Object ifstream file("src/kernels/kernels.cl"); std::string prog(istreambuf_iterator<char>(file),(istreambuf_iterator<char>())); //Create ProgramSource object from String Source Program::Sources source(1, make_pair(prog.c_str(), prog.length())); //Create Program object from ProgramSource object CPU_PROGRAM = new Program(*CPU_CONTEXT, source, &err); GPU_PROGRAM = new Program(*GPU_CONTEXT, source, &err); //--CPU //Compile CPU Program #ifdef DISABLE_CL_OPTIMIZATIONS err = (*CPU_PROGRAM).build(CPU_DEVICES, "-g -cl-opt-disable"); #else err = (*CPU_PROGRAM).build(CPU_DEVICES, ""); #endif if (err) { std::string log = (*CPU_PROGRAM).getBuildInfo<CL_PROGRAM_BUILD_LOG>(NULL); std::cerr<<"CPU - Build Log:\n"<<log<<endl; exit(EXIT_FAILURE); } //--GPU //Compile GPU Program #ifdef DISABLE_CL_OPTIMIZATIONS err = (*GPU_PROGRAM).build(GPU_DEVICES, "-g -cl-opt-disable"); #else err = (*GPU_PROGRAM).build(GPU_DEVICES, ""); #endif if (err) { std::string log = (*GPU_PROGRAM).getBuildInfo<CL_PROGRAM_BUILD_LOG>(NULL); std::cerr<<"GPU - Build Log:\n"<<log<<endl; exit(EXIT_FAILURE); } cout<<"OpenCL Initialized!"<<endl; runDemo(Kernel(*CPU_PROGRAM, "cpu_printf_test", &err), CPU_QUEUE); runDemo(Kernel(*GPU_PROGRAM, "gpu_printf_test", &err), GPU_QUEUE); } void runDemo(Kernel kernel, CommandQueue * queue) { cl_int err = CL_SUCCESS; cl_float4 f; f.s0 = 1.0f; f.s1 = 2.0f; f.s2 = 3.0f; f.s3 = 4.0f; //Enqueue Kernel Execution Event event; err = kernel.setArg(0, f); err = (*queue).enqueueNDRangeKernel(kernel, NullRange, NDRange(1), NDRange(1), NULL, &event); //Wait for kernel to finish event.wait(); }