2 Replies Latest reply on Mar 21, 2010 7:18 PM by notyou

    Can't read from buffer?

    notyou

      Hi everyone,

      I'd like to thank Illusio and Fr4nz for their help in getting me started with OpenCL.

      I've gotten as far as creating a basic program to get device info.

      Now I'm trying to read from one array and write that value into another, but only the first value of the array is ever changed (to something random, possibly the memory location).

       - I have the two buffers I want to read from (only using one currently though) set as read only when I create them.

      - I am able to change the value in the output array (set as write only)

      - If I try to read from one array (a) and put it into another (c), only the first item in array c is ever changed (to something random it seems)

      - I've tried changing the first array to make it writable (changing it from const) which does allow me to change the value, but I still can't read from it.

      - The changes that are made (if I directly assign a value to an array index) are done correctly because they appear when I write (using a read buffer) the results back into main memory.

      Attached is my code, can anyone see if there is something glaringly wrong with code, not allowing me to read from the buffer? I'm also open to suggestions for changing my coding style if someone can see something that I could do in a better way.

      //http://www.khronos.org/opencl/sdk/1.0/docs/man/xhtml/ //this page is extremely useful, you will be working almost entirely off of it //the basic outline starts from podcast 3 of Mac research #include <CL/cl.hpp> #include <iostream> #include <iomanip> #include <omp.h> using namespace std; //minimum local size must be 256, so NUM_ELEMENTS must be at least that #define NUM_ELEMENTS 256 //16777216 - max size of int array, otherwise we're trying to allocate more memory than we have int array1[NUM_ELEMENTS]; int array2[NUM_ELEMENTS]; int array3[NUM_ELEMENTS]; void printDeviceInfo(cl_device_id); bool errorCheck(cl_int, string); //returns true if there is an error (and prints appropriate message) int main(int argc, char *argv[]) { double start, end, cpu_sequential_elapsed, cpu_parallel_elapsed; cout<<setprecision(5)<<fixed; start = omp_get_wtime(); for(int i = 0; i < NUM_ELEMENTS; i++) array3[i] = 1; end = omp_get_wtime(); cpu_sequential_elapsed = end - start; start = omp_get_wtime(); #pragma omp parallel for schedule(static, NUM_ELEMENTS/omp_get_num_threads()) for(int i = 0; i < NUM_ELEMENTS; i++) array3[i] = 1; end = omp_get_wtime(); cpu_parallel_elapsed = end - start; for(int i = 0; i < NUM_ELEMENTS; i++) { array1[i] = 78; array2[i] = 0; array3[i] = 0; } //initialization cl_int error; //check to see if we error our during most steps cl_platform_id platform; cl_uint numPlatforms; cl_uint num_devices_returned = 0; //get a list of all the platforms error = clGetPlatformIDs(0, NULL, &numPlatforms); if (0 < numPlatforms) { cl_platform_id* platforms = new cl_platform_id[numPlatforms]; error = clGetPlatformIDs(numPlatforms, platforms, NULL); for (unsigned i = 0; i < numPlatforms; ++i) { //char pbuf[100]; //error = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); platform = platforms[i]; } delete[] platforms; } //get our GPU device cl_device_id device; error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); if(!errorCheck(error, "Getting device info")) printDeviceInfo(device); //create a context with our devices cl_context context; context = clCreateContext(NULL, 1, &device, NULL, NULL, &error); errorCheck(error, "Creating context"); //create a command queue cl_command_queue cmd_queue; cmd_queue = clCreateCommandQueue(context, device, 0, &error); errorCheck(error, "Creating command queue"); //create our program cl_program program; size_t length = 512; //amount of characters that our program will take up //set to be larger than how many we actually have const char *source = "__kernel void test( \ __global const int *a, \ __global int *b, \ __global int *c) \ { \ int x = get_global_id(0); \ c[x] = a[x]; \ }"; program = clCreateProgramWithSource(context, 1, &source, NULL, &error); errorCheck(error, "Creating program with source"); cout<<"Building Program"<<endl; //build our program error = clBuildProgram(program, 1, &device, "", NULL, NULL); errorCheck(error, "Building program"); char logFile[1024]; error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(logFile), &logFile, 0); if(error != CL_SUCCESS) cout<<"\n--------------------Build Log--------------------\n\n"<<logFile<<"\n--------------------End Build Log--------------------\n\n"<<endl; //create kernel objects for all kernel functions in the program object cl_kernel kernel; cl_uint numKernels; error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels); errorCheck(error, "Creating kernel in program"); //create memory buffer so we can transfer data to the GPU cl_mem gpu_input1Buffer; cl_mem gpu_input2Buffer; cl_mem gpu_outputBuffer; gpu_input1Buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(array1), NULL, &error); errorCheck(error, "Creating GPU input buffer 1"); gpu_input2Buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(array2), NULL, &error); errorCheck(error, "Creating GPU input buffer 2"); gpu_outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(array3), NULL, &error); errorCheck(error, "Creating GPU output buffer"); //write our input arrays into the device's memory cl_event cmd_event; error = clEnqueueWriteBuffer(cmd_queue, gpu_input1Buffer, CL_TRUE, 0, sizeof(gpu_input1Buffer), &gpu_input1Buffer, 0, NULL, &cmd_event); errorCheck(error, "Enqueuing write buffer for buffer 1"); error = clEnqueueWriteBuffer(cmd_queue, gpu_input2Buffer, CL_TRUE, 0, sizeof(gpu_input2Buffer), &gpu_input2Buffer, 0, NULL, &cmd_event); errorCheck(error, "Enqueuing write buffer for buffer 2"); error = clEnqueueWriteBuffer(cmd_queue, gpu_outputBuffer, CL_TRUE, 0, sizeof(gpu_outputBuffer), &gpu_outputBuffer, 0, NULL, &cmd_event); errorCheck(error, "Enqueuing write buffer for gpu_outputBuffer"); //set our kernel arguments to our previous arrays error = clSetKernelArg(kernel, 0, sizeof(gpu_input1Buffer), &gpu_input1Buffer); errorCheck(error, "Setting kernel arg [0]"); error = clSetKernelArg(kernel, 1, sizeof(gpu_input2Buffer), &gpu_input2Buffer); errorCheck(error, "Setting kernel arg [1]"); error = clSetKernelArg(kernel, 2, sizeof(gpu_outputBuffer), &gpu_outputBuffer); errorCheck(error, "Setting kernel arg [2]"); //get the maximum work group size for executing the kernel on the device size_t local; size_t global = NUM_ELEMENTS; error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(int), &local, NULL); errorCheck(error, "Getting kernel work group info"); //number of work items aka GLOBAL WORK SIZE //1D array, GLOBAL WORK SIZE = # elements //2D array, GLOBAL WORK SIZE = x * y (each being the # elements) //3D array, GLOBAL WORK SIZE = x * y * z (each being the # elements) start = omp_get_wtime(); //enqueue our kernel to execute on the device error = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); //wait for execution to finish clFinish(cmd_queue); end = omp_get_wtime(); cout<<"Sequential CPU Time elapsed: "<<cpu_sequential_elapsed<<endl; cout<<"Parallel CPU Time elapsed: "<<cpu_parallel_elapsed<<endl; cout<<"GPU Time elapsed: "<<end-start<<endl; errorCheck(error, "Enqueuing ND Range Kernel"); //read the results from the device cl_event resultsEvent; error = clEnqueueReadBuffer(cmd_queue, gpu_outputBuffer, CL_TRUE, 0, sizeof(array3), array3, 0, NULL, &resultsEvent); errorCheck(error, "Reading Results Buffer1"); //error = clEnqueueReadBuffer(cmd_queue, gpu_input1Buffer, CL_TRUE, 0, sizeof(array1), array1, 0, NULL, &resultsEvent); //errorCheck(error, "Reading Results Buffer2"); //error = clEnqueueReadBuffer(cmd_queue, gpu_input2Buffer, CL_TRUE, 0, sizeof(array2), array2, 0, NULL, &resultsEvent); //errorCheck(error, "Reading Results Buffer3"); clFinish(cmd_queue); //what's our output? for(int i = 0; i < 5; i++) { //cout<<"Array1["<<i<<"]: "<<array1[i]<<endl; //cout<<"Array2["<<i<<"]: "<<array2[i]<<endl; cout<<"Array3["<<i<<"]: "<<array3[i]<<endl; } system("pause"); clReleaseMemObject(gpu_input1Buffer); clReleaseMemObject(gpu_input2Buffer); clReleaseMemObject(gpu_outputBuffer); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return 0; } bool errorCheck(cl_int error, string dataPoint) { bool errorOccurred = true; if(error == CL_BUILD_PROGRAM_FAILURE) cout<<"CL_BUILD_PROGRAM_FAILURE"<<endl; else if(error == CL_COMPILER_NOT_AVAILABLE) cout<<"CL_COMPILER_NOT_AVAILABLE"<<endl; else if(error == CL_DEVICE_NOT_AVAILABLE) cout<<"CL_DEVICE_NOT_AVAILABLE"<<endl; else if(error == CL_DEVICE_NOT_FOUND) cout<<"CL_DEVICE_NOT_FOUND"<<endl; else if(error == CL_INVALID_ARG_INDEX) cout<<"CL_INVALID_ARG_INDEX"<<endl; else if(error == CL_INVALID_ARG_SIZE) cout<<"CL_INVALID_ARG_SIZE"<<endl; else if(error == CL_INVALID_ARG_VALUE) cout<<"CL_INVALID_ARG_VALUE"<<endl; else if(error == CL_INVALID_BINARY) cout<<"CL_INVALID_BINARY"<<endl; else if(error == CL_INVALID_BUFFER_SIZE) cout<<"CL_INVALID_BUFFER_SIZE"<<endl; else if(error == CL_INVALID_BUILD_OPTIONS) cout<<"CL_INVALID_BUILD_OPTIONS"<<endl; else if(error == CL_INVALID_COMMAND_QUEUE) cout<<"CL_INVALID_COMMAND_QUEUE"<<endl; else if(error == CL_INVALID_CONTEXT) cout<<"CL_INVALID_CONTEXT"<<endl; else if(error == CL_INVALID_DEVICE) cout<<"CL_INVALID_DEVICE"<<endl; else if(error == CL_INVALID_DEVICE_TYPE) cout<<"CL_INVALID_DEVICE_TYPE"<<endl; else if(error == CL_INVALID_EVENT) cout<<"CL_INVALID_EVENT"<<endl; else if(error == CL_INVALID_EVENT_WAIT_LIST) cout<<"CL_INVALID_EVENT_WAIT_LIST"<<endl; else if(error == CL_INVALID_GLOBAL_OFFSET) cout<<"CL_INVALID_GLOBAL_OFFSET"<<endl; else if(error == CL_INVALID_HOST_PTR) cout<<"CL_INVALID_HOST_PTR"<<endl; else if(error == CL_INVALID_KERNEL) cout<<"CL_INVALID_KERNEL"<<endl; else if(error == CL_INVALID_KERNEL_ARGS) cout<<"CL_INVALID_KERNEL_ARGS"<<endl; else if(error == CL_INVALID_MEM_OBJECT) cout<<"CL_INVALID_MEM_OBJECT"<<endl; else if(error == CL_INVALID_OPERATION) cout<<"CL_INVALID_OPERATION"<<endl; else if(error == CL_INVALID_PLATFORM) cout<<"CL_INVALID_PLATFORM"<<endl; else if(error == CL_INVALID_PROGRAM) cout<<"CL_INVALID_PROGRAM"<<endl; else if(error == CL_INVALID_PROGRAM_EXECUTABLE) cout<<"CL_INVALID_PROGRAM_EXECUTABLE"<<endl; else if(error == CL_INVALID_QUEUE_PROPERTIES) cout<<"CL_INVALID_QUEUE_PROPERTIES"<<endl; else if(error == CL_INVALID_SAMPLER) cout<<"CL_INVALID_SAMPLER"<<endl; else if(error == CL_INVALID_VALUE) cout<<"CL_INVALID_VALUE"<<endl; else if(error == CL_INVALID_WORK_DIMENSION) cout<<"CL_INVALID_WORK_DIMENSION"<<endl; else if(error == CL_INVALID_WORK_GROUP_SIZE) cout<<"CL_INVALID_WORK_GROUP_SIZE"<<endl; else if(error == CL_MEM_COPY_HOST_PTR) cout<<"CL_MEM_COPY_HOST_PTR"<<endl; else if(error == CL_MEM_OBJECT_ALLOCATION_FAILURE) cout<<"CL_MEM_OBJECT_ALLOCATION_FAILURE"<<endl; else if(error == CL_MEM_USE_HOST_PTR) cout<<"CL_MEM_USE_HOST_PTR"<<endl; else if(error == CL_OUT_OF_HOST_MEMORY) cout<<"CL_OUT_OF_HOST_MEMORY"<<endl; else if(error == CL_OUT_OF_RESOURCES) cout<<"CL_OUT_OF_RESOURCES"<<endl; else { //cout<<"No error at: "+dataPoint<<endl<<endl; return false; } cout<<"Error at: "+dataPoint<<endl<<endl; return errorOccurred; } void printDeviceInfo(cl_device_id device) { cl_uint error; size_t size; char deviceName[512] = {0}; char vendor[512] = {0}; char driverVersion[512] = {0}; char deviceVersion[512] = {0}; cl_uint cacheSize = 0; cl_ulong globalMemSize = 0; cl_uint maxClockFrequency = 0; cl_uint maxComputeUnits = 0; cl_platform_id platformID = 0; size_t maxWorkGroupSize; cl_uint maxWorkItemDimensions = 0; size_t maxWorkItemSizes[3]; //get the device name error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName), deviceName, &size); if(error == CL_SUCCESS) cout<<"Device Name: "<<deviceName<<endl; else cout<<"Error getting device name"<<endl; //get the vendor error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor), vendor, &size); if(error == CL_SUCCESS) cout<<"Vendor: "<<vendor<<endl; else cout<<"Error getting vendor"<<endl; //get the driver version error = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(driverVersion), driverVersion, &size); if(error == CL_SUCCESS) cout<<"Driver Version: "<<driverVersion<<endl; else cout<<"Error getting driver version"<<endl; //get the device version error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion), deviceVersion, &size); if(error == CL_SUCCESS) cout<<"Device Version: "<<deviceVersion<<endl; else cout<<"Error getting device version"<<endl; //get the global memory size error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalMemSize), &globalMemSize, &size); if(error == CL_SUCCESS) cout<<"Global memory size: "<<globalMemSize<<endl; else cout<<"Error getting global memory size"<<endl; //get the max clock frequency error = clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(maxClockFrequency), &maxClockFrequency, NULL); if(error == CL_SUCCESS) { cout<<"Max clock frequency: "; if(maxClockFrequency > 1000) { maxClockFrequency /= 1000; cout<<maxClockFrequency<<" GHz"<<endl; } else cout<<maxClockFrequency<<" MHz"<<endl; } else cout<<"Error getting max clock frequency"<<endl; //get the number of compute units error = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL); if(error == CL_SUCCESS) cout<<"Max compute units: "<<maxComputeUnits<<endl; else cout<<"Error getting max compute units"<<endl; //get the platform error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platformID), &platformID, NULL); if(error == CL_SUCCESS) cout<<"Platform: "<<platformID<<endl; else cout<<"Error getting platform"<<endl; error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL); if(error == CL_SUCCESS) cout<<"Max Work Group Size: "<<maxWorkGroupSize<<endl; else cout<<"Error getting max work group size"<<endl; error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(maxWorkItemDimensions), &maxWorkItemDimensions, NULL); if(error == CL_SUCCESS) cout<<"Max Work Item Dimensions: "<<maxWorkItemDimensions<<endl; else cout<<"Error getting max work item dimensions"<<endl; error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(maxWorkItemSizes), &maxWorkItemSizes, NULL); if(error == CL_SUCCESS) cout<<"Max Work Item Sizes: "<<maxWorkItemSizes<<endl; else cout<<"Error getting max work item sizes"<<endl; cout<<endl<<endl; }

        • Can't read from buffer?
          omkaranathan

          notyou,

          You are not passing any input to your kernel.

          The 6th argument of clEnqueueWriteBuffer() is the pointer to buffer in host memory which is to be read into your device. You are passing cl_mem object instead.

            • Can't read from buffer?
              notyou

               

              Originally posted by: omkaranathan notyou,

               

              You are not passing any input to your kernel.

               

              The 6th argument of clEnqueueWriteBuffer() is the pointer to buffer in host memory which is to be read into your device. You are passing cl_mem object instead.

               

               

              Thanks omkaranathan. I now see where I got confused.