3 Replies Latest reply on May 4, 2010 12:47 PM by empol

    Porting to bindings

    empol

      Hello. I'm beginer so probably it's simple question, but i'm stuck.

      I wrote simple test program. it works fine. But when i ported it to bindings i have weird result.

       

      No errors in compilation of broken code, but wrong results. -1163005939 for CPU ant 0 for GPU.

       

      Where i made mistake?

      1.cl #pragma OPENCL EXTENSION cl_amd_printf : enable __kernel void hello(__global uint * in, __global uint * out) { unsigned int tid = get_global_id(0); uint4 v; uint w; v.s0 = in[0] * 2; v.S1 = in[1]; v.S2 = in[2]; v.S3 = in[3]; w = max(v.S0, v.S1); out[0] = w; } Working code: // test.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include <utility> #define __NO_STD_VECTOR // Use cl::vector and cl::string and #define __NO_STD_STRING // not STL versions, more on this later #include <malloc.h> #define alloca _alloca #include <CL/cl.hpp> #include <cstdio> #include <cstdlib> #include <fstream> #include <iostream> #include <string> #include <iterator> inline void checkErr(cl_int err, const char * name) { if (err != CL_SUCCESS) { std::cerr << "ERROR: " << name << " (" << err << ")" << std::endl; system("PAUSE"); exit(EXIT_FAILURE); } } int _tmain(int argc, _TCHAR* argv[]) { unsigned int tab[] = {14,50,8,2}; unsigned int *wynik; size_t global[] = {sizeof(tab)}; size_t local[] = {1}; cl_int err; cl_context context; cl_int status = CL_SUCCESS; cl_device_type dType; cl_uint numPlatforms; cl_mem inCL, outCL; size_t deviceListSize; cl_device_id *devices; cl_program program; cl_kernel kernel; dType = CL_DEVICE_TYPE_GPU; cl_platform_id platform = NULL; status = clGetPlatformIDs(0, NULL, &numPlatforms); checkErr(CL_SUCCESS, "cl::Platform::get"); if (0 < numPlatforms) { cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status = clGetPlatformIDs(numPlatforms, platforms, NULL); checkErr(CL_SUCCESS, "cl::Platform::get"); for (unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); platform = platforms[i]; if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) { break; } } delete[] platforms; } cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; /* Use NULL for backward compatibility */ cl_context_properties* cprops = (NULL == platform) ? NULL : cps; context = clCreateContextFromType( cprops, dType, NULL, NULL, &err); checkErr(err, "clCreateContextFromType failed"); inCL = clCreateBuffer( context, CL_MEM_READ_ONLY, sizeof(tab), NULL, &err); outCL = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof(unsigned int), NULL, &err); checkErr(err, "clCreateBuffer failed"); /* First, get the size of device list data */ err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); checkErr(err, "clGetContextInfo failed"); /* Now allocate memory for device list based on the size we got earlier */ devices = (cl_device_id*)malloc(deviceListSize); if(devices==NULL) { std::cerr << "ERROR: " << std::endl; exit(EXIT_FAILURE); } /* Now, get the device list data */ err = clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); checkErr(err, "clGetContextInfo failed"); std::ifstream file("1.cl"); checkErr(file.is_open() ? CL_SUCCESS:-1, "1.cl"); std::string source( std::istreambuf_iterator<char>(file), (std::istreambuf_iterator<char>())); const char* src; src = source.c_str(); //printf("%s",src); size_t sourceSize[] = {strlen(src)}; program = clCreateProgramWithSource(context, 1, &src, sourceSize, &err); checkErr(err, "clGetContextInfo failed"); /* create a cl program executable for all the devices specified */ err = clBuildProgram(program, 1, devices, NULL, NULL, NULL); if(err != CL_SUCCESS) { if(err == CL_BUILD_PROGRAM_FAILURE) { cl_int logStatus; char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo (program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize); checkErr(err, "GetProgramBuildInfo failed"); buildLog = (char*)malloc(buildLogSize); if(buildLog == NULL) { std::cout << "Failed to allocate host memory. (buildLog)"; exit(EXIT_FAILURE); } memset(buildLog, 0, buildLogSize); logStatus = clGetProgramBuildInfo (program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL); checkErr(err, "clGetProgramBuildInfo"); { free(buildLog); exit(EXIT_FAILURE); } std::cout << " \n\t\t\tBUILD LOG\n"; std::cout << " ************************************************\n"; std::cout << buildLog << std::endl; std::cout << " ************************************************\n"; free(buildLog); } checkErr(err, "clBuildProgram failed"); } kernel = clCreateKernel(program, "hello", &err); checkErr(err, "clCreateKernel failed"); err = clSetKernelArg(kernel, 0, sizeof(inCL), &inCL); checkErr(err, "clSetKernelArg"); err = clSetKernelArg(kernel, 1, sizeof(outCL), &outCL); checkErr(err, "clSetKernelArg"); cl_command_queue_properties prop = 0; prop = CL_QUEUE_PROFILING_ENABLE; cl_command_queue commandQueue = clCreateCommandQueue( context, devices[0], prop, &status); checkErr(err, "clCreateCommandQueue failed"); cl_event events; err = clEnqueueWriteBuffer( commandQueue, inCL, 1, 0, sizeof(tab), &tab, 0, 0, 0); checkErr(err, "clEnqueueWriteBuffer failed"); err = clEnqueueNDRangeKernel( commandQueue, kernel, 1, NULL, global, local, 0, NULL, &events); err = clEnqueueReadBuffer( commandQueue, outCL, 1, 0, sizeof(unsigned int), &wynik, 0, 0, 0); checkErr(err, "clEnqueueReadBuffer failed"); printf("Wynik to: %d\n", wynik); system("PAUSE"); return 0; } Broken code: // test.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include <utility> #define __NO_STD_VECTOR // Use cl::vector and cl::string and #define __NO_STD_STRING // not STL versions, more on this later #include <malloc.h> #define alloca _alloca #include <CL/cl.hpp> #include <cstdio> #include <cstdlib> #include <fstream> #include <iostream> #include <string> #include <iterator> #include <vector> inline void checkErr(cl_int err, const char * name) { if (err != CL_SUCCESS) { std::cerr << "ERROR: " << name << " (" << err << ")" << std::endl; system("PAUSE"); exit(EXIT_FAILURE); } } int _tmain(int argc, _TCHAR* argv[]) { unsigned int tab[] = {17,9,7,2}; unsigned int *wsk = tab; unsigned int *wynik = 0; const std::string hw("Hello World\n"); size_t global[] = {sizeof(tab)}; size_t local[] = {1}; cl::string str; cl_int err; cl::Context context; cl::Buffer inCL, outCL; cl::Program program; cl::Kernel kernel; cl::vector<cl::Device> devices; cl::CommandQueue commandQueue; cl::vector<cl::Platform> platformList; std::ifstream file; cl::Program::Sources source; cl::Platform::get(&platformList); checkErr(platformList.size()!=0 ? CL_SUCCESS : -1, "cl::Platform::get"); cl::string platformVendor; platformList[0].getInfo(CL_PLATFORM_VENDOR, &platformVendor); cl_context_properties cprops[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0}; context = cl::Context(CL_DEVICE_TYPE_CPU, cprops, NULL, NULL, &err); checkErr(err, "Conext::Context()"); devices = context.getInfo<CL_CONTEXT_DEVICES>(); commandQueue = cl::CommandQueue(context, devices[0]); file.open("1.cl"); checkErr(file.is_open() ? CL_SUCCESS:-1, "lesson1_kernel.cl"); std::string prog(std::istreambuf_iterator<char>(file), (std::istreambuf_iterator<char>())); source = cl::Program::Sources(1, std::make_pair(prog.c_str(), prog.length()+1)); program = cl::Program(context, source); program.build(devices); kernel = cl::Kernel(program, "hello"); str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]); inCL = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(tab), NULL, &err); outCL = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(unsigned int), NULL); commandQueue.enqueueWriteBuffer(inCL, 1, 0, sizeof(tab), &tab, NULL, NULL); kernel.setArg(0, inCL); kernel.setArg(1, outCL); commandQueue.enqueueNDRangeKernel(kernel, 0, cl::NDRange(sizeof(tab)), cl::NDRange(1), NULL, NULL); commandQueue.enqueueReadBuffer(outCL, 1, 0, sizeof(unsigned int), &wynik, NULL, NULL); printf("Wynik to: %d\n", wynik); system("PAUSE"); return 0; }

        • Porting to bindings
          empol

          Partially fixed:

           

          commandQueue.enqueueNDRangeKernel(kernel, 0, cl::NDRange(sizeof(tab)), cl::NDRange(1), NULL, NULL);

          changed to:

          commandQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(sizeof(tab)), cl::NDRange(1), NULL, NULL);

           

          For CPU works ok, but for GPU still 0 as result.

            • Porting to bindings
              genaganna

               

              Originally posted by: empol Partially fixed:

               

               commandQueue.enqueueNDRangeKernel(kernel, 0, cl::NDRange(sizeof(tab)), cl::NDRange(1), NULL, NULL);

               

              changed to:

               

              commandQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(sizeof(tab)), cl::NDRange(1), NULL, NULL);

               

               For CPU works ok, but for GPU still 0 as result.

               

              Empol,

                     cl_amd_printf extension is not support for GPU's.  That is way is failing on GPU.  Just removing #pragma OPENCL EXTENSION cl_amd_printf : enable from your kernel solves the problem.

                 Please make sure you are handling error codes returned by OpenCL API calls properly.