1 Reply Latest reply on Apr 26, 2013 5:25 AM by himanshu.gautam

    opencl program for sorting......

    apcool

      i m trying to write opencl program for sorting.........i m having code which i have posted below.......can anyone tell me why it is showing wrong output.....

      Host code:

      #include <stdio.h>

      #include <stdlib.h>

      #include <iostream>

      #include<time.h>

      #ifdef __APPLE__

      #include <OpenCL/opencl.h>

      #else

      #include <CL/cl.h>

      #endif

       

      #define MAX_SOURCE_SIZE (0x100000)

      double timeDifference = 0;

      struct timespec start, stop;

      long int size=256;

       

      //#define LIST_SIZE 1024

      using namespace std;

      float * copy(float c[],long int list_size)

      {

         int i;

      /*  for(i=0;i<list_size;i++)

         {

          printf(" %f",c[i]);

         }*/

         // int *A = (int*)malloc(sizeof(int)*(list_size));

          float *B = (float*)malloc(sizeof(float)*(list_size));

         // int *C = (int*)malloc(sizeof(int)*(list_size/2));

        

          // Load the kernel source code into the array source_str

          FILE *fp;

          char *source_str;

          size_t source_size;

         cout<<"\n"<<"array in opencl program"<<"\n";

          for(i=0;i<list_size-200;i++)

         {

          printf(" %f",c[i]);

         }

       

          fp = fopen("sorting_array_kernel1.cl", "r");

          if (!fp) {

              fprintf(stderr, "Failed to load kernel.\n");

              exit(1);

          }

          source_str = (char*)malloc(MAX_SOURCE_SIZE);

          source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);

          fclose( fp );

       

          // Get platform and device information

          cl_platform_id platform_id = NULL;

          cl_device_id device_id = NULL;  

          cl_uint ret_num_devices;

          cl_uint ret_num_platforms;

          cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);

          ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1,

                  &device_id, &ret_num_devices);

       

          // Create an OpenCL context

          cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);

       

          // Create a command queue

          cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

       

          // Create memory buffers on the device for each vector

          cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,

                  (list_size) * sizeof(float), NULL, &ret);

          cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,

                  (list_size) * sizeof(float), NULL, &ret);

        //  cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,

                //  (list_size) * sizeof(int), NULL, &ret);

       

          // Copy the lists A and B to their respective memory buffers

          ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,

                  (list_size) * sizeof(float), c, 0, NULL, NULL);

          //ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,

                //  (list_size) * sizeof(int), B, 0, NULL, NULL);

       

          // Create a program from the kernel source

          cl_program program = clCreateProgramWithSource(context, 1,

                  (const char **)&source_str, (const size_t *)&source_size, &ret);

       

          // Build the program

          ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

       

          // Create the OpenCL kernel

          cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);

       

          // Set the arguments of the kernel

          ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);

          ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);

         // ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);

           

            clock_gettime(CLOCK_REALTIME, &start);

        

       

          // Execute the OpenCL kernel on the list

          size_t global_item_size = (list_size); // Process the entire lists

          size_t local_item_size = 256; // Divide work items into groups of 512

          ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,

                  &global_item_size, &local_item_size, 0, NULL, NULL);

       

          // Read the memory buffer C on the device to the local variable C

       

          ret = clEnqueueReadBuffer(command_queue, b_mem_obj, CL_TRUE, 0,

                  (list_size) * sizeof(float), B, 0, NULL, NULL);

         cout<<"\n"<<"array in opencl program"<<"\n";

       

         // Display the result to the screen

        for(i = 0; i <list_size-200; i++)

              printf(" %f",B[i]);

       

         

           clock_gettime(CLOCK_REALTIME, &stop);

       

        

         timeDifference += (double) (stop.tv_sec - start.tv_sec)

                  + (double) (stop.tv_nsec - start.tv_nsec) / 1000000000;

          

           //printf("\nsum of array %d\n",sum);

              //getchar();    

          // Clean up

          ret = clFlush(command_queue);

          ret = clFinish(command_queue);

          ret = clReleaseKernel(kernel);

          ret = clReleaseProgram(program);

          ret = clReleaseMemObject(a_mem_obj);

          ret = clReleaseMemObject(b_mem_obj);

         // ret = clReleaseMemObject(c_mem_obj);

          ret = clReleaseCommandQueue(command_queue);

          ret = clReleaseContext(context);

        //  free(A);

         // free(B);

       

          return B;

      }

      int main()

      {

        int i;

      float *c = (float*)malloc(sizeof(float)*(size));float *f;

        for(i=0;i<size;i++)

        {

        

             c[i]=256-i;

        }

        for(i=0;i<size-200;i++)

         {

          printf(" %f",c[i]);

         }

        cout<<"array before sorting.................."<<"/n";

        f=copy(c,size);

        cout<<"\n";

        for(i=0;i<size-200;i++)

         {

          printf(" %f",f[i]);

         }

      cout << "parallel Total Required Time:" << timeDifference << endl;

       

        return 0;

      }

      Kernel code:

      __kernel void ParallelSelection(__global const data_t * in,__global data_t * out)

      {

        int i = get_global_id(0); // current thread

        int n = get_global_size(0); // input size

        data_t iData = in[i];

        uint iKey = keyValue(iData);

        // Compute position of in[i] in output

        int pos = 0;

        for (int j=0;j<n;j++)

        {

          uint jKey = keyValue(in[j]); // broadcasted

          bool smaller = (jKey < iKey) || (jKey == iKey && j < i);  // in[j] < in[i] ?

          pos += (smaller)?1:0;

        }

        out[pos] = iData;

      }

      will be a great help........if anyone reply soon.....

        • Re: opencl program for sorting......
          himanshu.gautam

          Some simple tips to help you:

          1. Check the error codes returned by all OpenCL APIs. Otherwise you never which API returned an error, and you would keep on going forward with program execution.

          2. clFinish was not used after kernel execution, and again before reading the results.

          3. What is keyValue function in your kernel. The kernel itself does not compile as of now.

          4. Check APP SDK Samples on how to write OpenCL code properly. And I would recommend you to create a zip file with the files in your code, and attach it , instead of just pushing code here. That is much more handy.