makhan

OpenCL tutorial - incorrect results on GPU

Discussion created by makhan on Mar 29, 2010
Latest reply on May 4, 2010 by genaganna

Hi,

 

I just started my adventure with OpenCL. After some time of setting everything up and fixing compilation errors, I managed to get my first program running. It's written according to this tutorial. Code included under the post.

 

When I pass CL_DEVICE_TYPE_CPU as first argument to context, it works just fine. But when I change it to CL_DEVICE_TYPE_GPU, not only just a few values are filled (usually 2-3), but they are often at wrong indexes.

 

My GPU is ATI Radeon HD5850, my OS is Windows 7 Pro x64, I have 10.03 Catalyst drivers and ATI Stream SDK v2.

 

Can you help me solve this problem?

 

 

//lesson1_kernels.cl #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable __constant char hw[] = "Hello World\n"; __kernel void hello(__global char * out) { size_t tid = get_global_id(0); out[tid] = hw[tid]; } //lesson1.cpp #include <utility> #define __NO_STD_VECTOR // Use cl::vector and cl::string and #define __NO_STD_STRING // not STL versions, more on this later #include <malloc.h> #define alloca _alloca #include <cstdio> #include <cstdlib> #include <fstream> #include <iostream> #include <string> #include <iterator> #include <CL/cl.hpp> const std::string hw("Hello World\n"); inline void checkErr(cl_int err, const char * name) { if (err != CL_SUCCESS) { std::cerr << "ERROR: " << name << " (" << err << ")" << std::endl; exit(EXIT_FAILURE); } } int main(void) { cl_int err; cl::vector< cl::Platform > platformList; cl::Platform::get(&platformList); checkErr(platformList.size()!=0 ? CL_SUCCESS : -1, "cl::Platform::get"); std::cout << "Platform number is: " << platformList.size() << std::endl; cl::STRING_CLASS platformVendor; platformList[0].getInfo(CL_PLATFORM_VENDOR, &platformVendor); std::cout << "Platform is by: " << platformVendor.c_str() << "\n"; cl_context_properties cprops[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0}; cl::Context context(CL_DEVICE_TYPE_CPU, cprops,NULL,NULL,&err); checkErr(err, "Conext::Context()"); char * outH = new char[hw.length()+1]; cl::Buffer outCL(context,CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,hw.length()+1,outH,&err); checkErr(err, "Buffer::Buffer()"); cl::vector<cl::Device> devices; devices = context.getInfo<CL_CONTEXT_DEVICES>(); checkErr(devices.size() > 0 ? CL_SUCCESS : -1, "devices.size() > 0"); std::ifstream file("lesson1_kernels.cl"); checkErr(file.is_open() ? CL_SUCCESS:-1, "lesson1_kernel.cl"); std::string prog(std::istreambuf_iterator<char>(file),(std::istreambuf_iterator<char>())); cl::Program::Sources source(1,std::make_pair(prog.c_str(), prog.length()+1)); cl::Program program(context, source); err = program.build(devices,""); checkErr(file.is_open() ? CL_SUCCESS : -1, "Program::build()"); cl::Kernel kernel(program, "hello", &err); checkErr(err, "Kernel::Kernel()"); err = kernel.setArg(0, outCL); checkErr(err, "Kernel::setArg()"); cl::CommandQueue queue(context, devices[0], 0, &err); checkErr(err, "CommandQueue::CommandQueue()"); cl::Event event; err = queue.enqueueNDRangeKernel(kernel, cl::NullRange,cl::NDRange(hw.length()+1),cl::NDRange(1, 1), NULL, &event); checkErr(err, "ComamndQueue::enqueueNDRangeKernel()"); event.wait(); err = queue.enqueueReadBuffer(outCL,CL_TRUE,0,hw.length()+1,outH); checkErr(err, "ComamndQueue::enqueueReadBuffer()"); for (int i=0; i<hw.length()+1; i++) std::cout<< "outH[" << i <<"]= " <<outH[i]<<"\n"; system("pause"); return EXIT_SUCCESS; }

Outcomes