AnsweredAssumed Answered

opencl program for sorting......

Question asked by apcool on Apr 25, 2013
Latest reply on Apr 26, 2013 by himanshu.gautam

i m trying to write opencl program for sorting.........i m having code which i have posted below.......can anyone tell me why it is showing wrong output.....

Host code:

#include <stdio.h>

#include <stdlib.h>

#include <iostream>

#include<time.h>

#ifdef __APPLE__

#include <OpenCL/opencl.h>

#else

#include <CL/cl.h>

#endif

 

#define MAX_SOURCE_SIZE (0x100000)

double timeDifference = 0;

struct timespec start, stop;

long int size=256;

 

//#define LIST_SIZE 1024

using namespace std;

float * copy(float c[],long int list_size)

{

   int i;

/*  for(i=0;i<list_size;i++)

   {

    printf(" %f",c[i]);

   }*/

   // int *A = (int*)malloc(sizeof(int)*(list_size));

    float *B = (float*)malloc(sizeof(float)*(list_size));

   // int *C = (int*)malloc(sizeof(int)*(list_size/2));

  

    // Load the kernel source code into the array source_str

    FILE *fp;

    char *source_str;

    size_t source_size;

   cout<<"\n"<<"array in opencl program"<<"\n";

    for(i=0;i<list_size-200;i++)

   {

    printf(" %f",c[i]);

   }

 

    fp = fopen("sorting_array_kernel1.cl", "r");

    if (!fp) {

        fprintf(stderr, "Failed to load kernel.\n");

        exit(1);

    }

    source_str = (char*)malloc(MAX_SOURCE_SIZE);

    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);

    fclose( fp );

 

    // Get platform and device information

    cl_platform_id platform_id = NULL;

    cl_device_id device_id = NULL;  

    cl_uint ret_num_devices;

    cl_uint ret_num_platforms;

    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);

    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1,

            &device_id, &ret_num_devices);

 

    // Create an OpenCL context

    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);

 

    // Create a command queue

    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

 

    // Create memory buffers on the device for each vector

    cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,

            (list_size) * sizeof(float), NULL, &ret);

    cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,

            (list_size) * sizeof(float), NULL, &ret);

  //  cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,

          //  (list_size) * sizeof(int), NULL, &ret);

 

    // Copy the lists A and B to their respective memory buffers

    ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,

            (list_size) * sizeof(float), c, 0, NULL, NULL);

    //ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,

          //  (list_size) * sizeof(int), B, 0, NULL, NULL);

 

    // Create a program from the kernel source

    cl_program program = clCreateProgramWithSource(context, 1,

            (const char **)&source_str, (const size_t *)&source_size, &ret);

 

    // Build the program

    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

 

    // Create the OpenCL kernel

    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);

 

    // Set the arguments of the kernel

    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);

    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);

   // ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);

     

      clock_gettime(CLOCK_REALTIME, &start);

  

 

    // Execute the OpenCL kernel on the list

    size_t global_item_size = (list_size); // Process the entire lists

    size_t local_item_size = 256; // Divide work items into groups of 512

    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,

            &global_item_size, &local_item_size, 0, NULL, NULL);

 

    // Read the memory buffer C on the device to the local variable C

 

    ret = clEnqueueReadBuffer(command_queue, b_mem_obj, CL_TRUE, 0,

            (list_size) * sizeof(float), B, 0, NULL, NULL);

   cout<<"\n"<<"array in opencl program"<<"\n";

 

   // Display the result to the screen

  for(i = 0; i <list_size-200; i++)

        printf(" %f",B[i]);

 

   

     clock_gettime(CLOCK_REALTIME, &stop);

 

  

   timeDifference += (double) (stop.tv_sec - start.tv_sec)

            + (double) (stop.tv_nsec - start.tv_nsec) / 1000000000;

    

     //printf("\nsum of array %d\n",sum);

        //getchar();    

    // Clean up

    ret = clFlush(command_queue);

    ret = clFinish(command_queue);

    ret = clReleaseKernel(kernel);

    ret = clReleaseProgram(program);

    ret = clReleaseMemObject(a_mem_obj);

    ret = clReleaseMemObject(b_mem_obj);

   // ret = clReleaseMemObject(c_mem_obj);

    ret = clReleaseCommandQueue(command_queue);

    ret = clReleaseContext(context);

  //  free(A);

   // free(B);

 

    return B;

}

int main()

{

  int i;

float *c = (float*)malloc(sizeof(float)*(size));float *f;

  for(i=0;i<size;i++)

  {

  

       c[i]=256-i;

  }

  for(i=0;i<size-200;i++)

   {

    printf(" %f",c[i]);

   }

  cout<<"array before sorting.................."<<"/n";

  f=copy(c,size);

  cout<<"\n";

  for(i=0;i<size-200;i++)

   {

    printf(" %f",f[i]);

   }

cout << "parallel Total Required Time:" << timeDifference << endl;

 

  return 0;

}

Kernel code:

__kernel void ParallelSelection(__global const data_t * in,__global data_t * out)

{

  int i = get_global_id(0); // current thread

  int n = get_global_size(0); // input size

  data_t iData = in[i];

  uint iKey = keyValue(iData);

  // Compute position of in[i] in output

  int pos = 0;

  for (int j=0;j<n;j++)

  {

    uint jKey = keyValue(in[j]); // broadcasted

    bool smaller = (jKey < iKey) || (jKey == iKey && j < i);  // in[j] < in[i] ?

    pos += (smaller)?1:0;

  }

  out[pos] = iData;

}

will be a great help........if anyone reply soon.....

Outcomes