6 Replies Latest reply on Mar 10, 2013 11:51 AM by apcool

    copy program in opencl....


      hi...i m trying to write a opencl program for copy an array into another array....

      this is the Kernel which i have written.....

      __kernel void vector_add(__global const int *A, __global const int *B) {


          // Get the index of the current element to be processed

          int i = get_global_id(0);


          // Do the operation

          B[i] = A[i];


      and the host program for this....

      //this program is to copy one arry to another..........

      #include <stdio.h>

      #include <stdlib.h>

      #include <iostream>


      #ifdef __APPLE__

      #include <OpenCL/opencl.h>


      #include <CL/cl.h>



      #define MAX_SOURCE_SIZE (0x100000)

      double timeDifference = 0;

      struct timespec start, stop;

      int size=1024;


      //#define LIST_SIZE 1024

      using namespace std;

      int * mean(int c[],int list_size)


         int i;

          int *A = (int*)malloc(sizeof(int)*(list_size));

          int *B = (int*)malloc(sizeof(int)*(list_size));


         // int *C = (int*)malloc(sizeof(int)*(list_size/2));


          // Load the kernel source code into the array source_str

          FILE *fp;

          char *source_str;

          size_t source_size;


          fp = fopen("copy_array_kernel.cl", "r");

          if (!fp) {

              fprintf(stderr, "Failed to load kernel.\n");



          source_str = (char*)malloc(MAX_SOURCE_SIZE);

          source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);

          fclose( fp );


          // Get platform and device information

          cl_platform_id platform_id = NULL;

          cl_device_id device_id = NULL;  

          cl_uint ret_num_devices;

          cl_uint ret_num_platforms;

          cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);

          ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1,

                  &device_id, &ret_num_devices);


          // Create an OpenCL context

          cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);


          // Create a command queue

          cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);


          // Create memory buffers on the device for each vector

          cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,

                  (list_size) * sizeof(int), NULL, &ret);

          cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,

                  (list_size) * sizeof(int), NULL, &ret);

        //  cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,

                //  (list_size) * sizeof(int), NULL, &ret);


          // Copy the lists A and B to their respective memory buffers

          ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,

                  (list_size) * sizeof(int), c, 0, NULL, NULL);

          //ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,

                //  (list_size) * sizeof(int), B, 0, NULL, NULL);


          // Create a program from the kernel source

          cl_program program = clCreateProgramWithSource(context, 1,

                  (const char **)&source_str, (const size_t *)&source_size, &ret);


          // Build the program

          ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);


          // Create the OpenCL kernel

          cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);


          // Set the arguments of the kernel

          ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);

          ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);

         // ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);


            clock_gettime(CLOCK_REALTIME, &start);



          // Execute the OpenCL kernel on the list

          size_t global_item_size = (list_size); // Process the entire lists

          size_t local_item_size = 512; // Divide work items into groups of 64

          ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,

                  &global_item_size, &local_item_size, 0, NULL, NULL);


          // Read the memory buffer C on the device to the local variable B


          ret = clEnqueueReadBuffer(command_queue, b_mem_obj, CL_TRUE, 0,

                  (list_size) * sizeof(int), B, 0, NULL, NULL);


      /*   // Display the result to the screen

          for(i = 0; i <list_size/2; i++)


         /* for(i=0;i<size/2;i++)



            // printf(" %d",C[i]);



            clock_gettime(CLOCK_REALTIME, &stop);



          timeDifference += (double) (stop.tv_sec - start.tv_sec)

                  + (double) (stop.tv_nsec - start.tv_nsec) / 1000000000;


           //printf("\nsum of array %d\n",sum);


          // Clean up

          ret = clFlush(command_queue);

          ret = clFinish(command_queue);

          ret = clReleaseKernel(kernel);

          ret = clReleaseProgram(program);

          ret = clReleaseMemObject(a_mem_obj);

          ret = clReleaseMemObject(b_mem_obj);

         // ret = clReleaseMemObject(c_mem_obj);

          ret = clReleaseCommandQueue(command_queue);

          ret = clReleaseContext(context);


         // free(B);


          return B;


      int main()


        int i;

      int *c = (int*)malloc(sizeof(int)*(size));int *f;



          // if(i<128)

            // c[i]=0;

          // else







          printf(" %d",f[i]);


      cout << "parallel Total Required Time:" << timeDifference << endl;


        return 0;


      but it is giving different output......can anybody tell me.....where is the mistake in this program........

        • Re: copy program in opencl....



          I'm not sure if this is the error, but all I can find there is that you're writing into a CL_MEM_READ_ONLY buffer. (B)

          But maybe you should check the 'ret' values at least with the debugger to ensure all OpenCL commands executed properly.

          • Re: copy program in opencl....

            Hi Ankit,

            You wrote: "__kernel void vector_add(__global const int *A, __global const int *B) {"

            I don't think B can be a const array, as you have to write into it. Also as suggested by realhet, it is recommended to check the error status from all OpenCL APIs. Check Some simple SDK Samples for more information.

              • Re: copy program in opencl....



                That __global const thing sounded weird to me as well, I tested it a bit:

                __global const  : wrong

                __global : good

                __constant :good

                __constant const : good

                __constant const const : also working (compiler compiles it without problem haha)


                So the 'const' keyword is not needed here. And in my understanding it is only there to make the pointer read only, not the data. But that theory was failed when I checked it with __global.