Hi there, I'm pretty much a beginner with OpenCL and GPGPU's in general. For some reasons my very simple test app is crashing with the following output when calling the wait() member function of cl::Event.
First-chance exception at 0x000f1051 in sqrt_opencl_sse.exe: 0xC0000005: Access violation reading location 0x00000000.
First-chance exception at 0x000f1051 in sqrt_opencl_sse.exe: 0xC0000005: Access violation reading location 0x00000000.
First-chance exception at 0x000f1051 in sqrt_opencl_sse.exe: 0xC0000005: Access violation reading location 0x00000000.
First-chance exception at 0x000f1051 in sqrt_opencl_sse.exe: 0xC0000005: Access violation reading location 0x00000000.
Unhandled exception at 0x000f1051 in sqrt_opencl_sse.exe: 0xC0000005: Access violation reading location 0x00000000.
#ifndef _DEBUG #define _SECURE_SCL 0 #define _HAS_ITERATOR_DEBUGGING 0 #endif #include <algorithm> #include <cstdlib> #include <fstream> #include <functional> #include <iterator> #include <iostream> #include <vector> #include <boost\timer.hpp> #define __NO_STD_VECTOR // Use cl::vector instead of STL version #include "CL/cl.hpp" using namespace std; inline void checkErr(cl_int err, const char * name) { if (err != CL_SUCCESS) { std::cerr << "ERROR: " << name << " (" << err << ")" << std::endl; exit(EXIT_FAILURE); } } int _tmain(int argc, _TCHAR* argv[]) { vector<float> in ( 256); vector<float> out( 256 ); srand(10); generate_n( in.begin() , num_elements, &rand ); fill( out.begin(), out.end(), 0.0f ); cl_int err; cl::vector< cl::Platform > platformList; cl::Platform::get( &platformList ); checkErr( platformList.size()!=0 ? CL_SUCCESS : -1, "cl::Platform::get" ); std::cout << "Platform number is: " << platformList.size() << std::endl; std::string platformVendor; platformList[0].getInfo( (cl_platform_info)CL_PLATFORM_VENDOR , &platformVendor ); std::cout << "Platform is by: " << platformVendor << "\n"; // context properties list - must be terminated with 0 cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM , (cl_context_properties)(platformList[0])() , 0 }; cl::Context context( CL_DEVICE_TYPE_CPU , cprops , NULL , NULL , &err ); checkErr(err, "Conext::Context()"); cl::Buffer inCL( context , CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR , sizeof( float ) * 256 , &in.front() , &err ); cl::Buffer outCL( context , CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR , sizeof( float ) * 256 , &out.front() , &err ); checkErr(err, "Buffer::Buffer()"); cl::vector<cl::Device> devices; devices = context.getInfo<CL_CONTEXT_DEVICES>(); checkErr( devices.size() > 0 ? CL_SUCCESS : -1, "devices.size() > 0" ); ifstream file("./sqrt_array.cl"); checkErr(file.is_open() ? CL_SUCCESS:-1, "./sqrt_array.cl"); istreambuf_iterator<char> end_of_file; string kernel_code( istreambuf_iterator<char>( file ) , end_of_file ); cl::Program::Sources source( 1 , make_pair( kernel_code.c_str() , kernel_code.length() + 1 ) ); cl::Program program( context, source ); err = program.build( devices , "" // compiler options ); cl::Kernel kernel( program , "sqrt_array" , &err ); checkErr( err, "Kernel::Kernel()" ); err = kernel.setArg( 0, outCL ); checkErr(err, "Kernel::setArg()"); err = kernel.setArg( 1, inCL ); checkErr(err, "Kernel::setArg()"); cl::CommandQueue queue( context , devices[0] , 0 , &err ); checkErr( err, "CommandQueue::CommandQueue()" ); boost::timer now; cl::Event event; err = queue.enqueueNDRangeKernel( kernel , cl::NullRange , cl::NDRange( 256/ 4 ) // we are using vector data float4 , cl::NDRange( 1 ) , NULL , &event ); checkErr(err, "ComamndQueue::enqueueNDRangeKernel()"); event.wait(); err = queue.enqueueReadBuffer( outCL , CL_TRUE , 0 , sizeof( float ) * num_elements , &out.front() ); checkErr(err, "ComamndQueue::enqueueReadBuffer()"); cout << now.elapsed() << " seconds" << endl; /* ofstream out_file( "./results.txt" ); copy( out.begin(), out.end(), ostream_iterator<float>( out_file, "\n" )); */ return 0; } ------------------------------------------------------------------------- //OpenCL program __kernel void sqrt_array( __global float4* out, __global float4* in ) { size_t tid = get_global_id(0); out[tid] = sqrt( in[tid] ); }
Make sure thet host pointer passed to clCreateBuffer is properly aligned.See below code for fix.
#ifndef _DEBUG #define _SECURE_SCL 0 #define _HAS_ITERATOR_DEBUGGING 0 #endif #include <algorithm> #include <cstdlib> #include <fstream> #include <functional> #include <iterator> #include <iostream> #include <vector> //#include <boost\timer.hpp> #define __NO_STD_VECTOR // Use cl::vector instead of STL version #include "CL/cl.hpp" using namespace std; inline void checkErr(cl_int err, const char * name) { if (err != CL_SUCCESS) { std::cerr << "ERROR: " << name << " (" << err << ")" << std::endl; exit(EXIT_FAILURE); } } int main(int argc, char* argv[]) { vector<float> in ( 256); vector<float> out( 256 ); float* inAligned = (cl_float*)_aligned_malloc(256 * sizeof(cl_float), 16); float* outAligned = (cl_float*)_aligned_malloc(256 * sizeof(cl_float), 16); srand(10); int num_elements = 100; generate_n( in.begin() , num_elements, &rand ); fill( out.begin(), out.end(), 0.0f ); cl_int err; cl::vector< cl::Platform > platformList; cl::Platform::get( &platformList ); checkErr( platformList.size()!=0 ? CL_SUCCESS : -1, "cl::Platform::get" ); std::cout << "Platform number is: " << platformList.size() << std::endl; std::string platformVendor; platformList[0].getInfo( (cl_platform_info)CL_PLATFORM_VENDOR , &platformVendor ); std::cout << "Platform is by: " << platformVendor << "\n"; // context properties list - must be terminated with 0 cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM , (cl_context_properties)(platformList[0])() , 0 }; cl::Context context( CL_DEVICE_TYPE_CPU , cprops , NULL , NULL , &err ); checkErr(err, "Conext::Context()"); cl::Buffer inCL( context , CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR , sizeof( float ) * 256 , inAligned , &err ); cl::Buffer outCL( context , CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR , sizeof( float ) * 256 , outAligned , &err ); checkErr(err, "Buffer::Buffer()"); cl::vector<cl::Device> devices; devices = context.getInfo<CL_CONTEXT_DEVICES>(); checkErr( devices.size() > 0 ? CL_SUCCESS : -1, "devices.size() > 0" ); ifstream file("./sqrt_array.cl"); checkErr(file.is_open() ? CL_SUCCESS:-1, "./sqrt_array.cl"); istreambuf_iterator<char> end_of_file; string kernel_code( istreambuf_iterator<char>( file ) , end_of_file ); cl::Program::Sources source( 1 , make_pair( kernel_code.c_str() , kernel_code.length() + 1 ) ); cl::Program program( context, source ); err = program.build( devices , "" // compiler options ); cl::Kernel kernel( program , "sqrt_array" , &err ); checkErr( err, "Kernel::Kernel()" ); err = kernel.setArg( 0, outCL ); checkErr(err, "Kernel::setArg()"); err = kernel.setArg( 1, inCL ); checkErr(err, "Kernel::setArg()"); cl::CommandQueue queue( context , devices[0] , 0 , &err ); checkErr( err, "CommandQueue::CommandQueue()" ); // boost::timer now; cl::Event event; err = queue.enqueueNDRangeKernel( kernel , cl::NullRange , cl::NDRange( 256/ 4 ) // we are using vector data float4 , cl::NDRange( 1 ) , NULL , &event ); checkErr(err, "ComamndQueue::enqueueNDRangeKernel()"); printf("Before event.wait() call\n"); event.wait(); printf("After event.wait() call\n"); err = queue.enqueueReadBuffer( outCL , CL_TRUE , 0 , sizeof( float ) * num_elements , &out.front() ); checkErr(err, "ComamndQueue::enqueueReadBuffer()"); //cout << now.elapsed() << " seconds" << endl; /* ofstream out_file( "./results.txt" ); copy( out.begin(), out.end(), ostream_iterator<float>( out_file, "\n" )); */ return 0; }