In the radix sort code that I have written, the second kernel is not working as want it to work..., in the first kernel I have successfully managed to sort no.s ending with LSB 1 and LSB 0 into different arrays named g_ones and g_zero for the first pass..(entire host code and kernel code is below) for the first pass.. In the second kernel , the elements of new_ones array(array obtained from g_ones in the host code by eliminating the invalid elements from g_ones) are not correctly transferred into the data array whereas the elements of new_zero are correctly tranferred... I am reading both the arrays i.e. the data array and the new_ones array in my host code... the new_ones array gets printed properly but the data_array gets printed properly just for the transfer of new_zero array into it... The 2nd kernel is as follows and below that I have inserted the image of the output....
__kernel void radix2(__global int *data, __global int *new_ones,
__global int *new_zero, int n)
{
int id=get_global_id(0);
if (id>=0 && id<n) // n is the size of new_zero array.
data[id]=new_zero[id];
else if (id>=n && id < 15)
data[id]=new_ones[n-id];
}
I have tested all other parts in the code and I am sure that there isn't any problem with any of those parts... I think the problem may be with the 2nd kernel code itself ...
I am posting the entire code below...
HOST CODE
#include <iostream>
#include <fstream>
#include <sstream>
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
// Create an OpenCL context on the first available platform using
// either a GPU or CPU depending on what is available.
//
cl_context CreateContext()
{
cl_int errNum;
cl_uint numPlatforms;
cl_platform_id firstPlatformId;
cl_context context = NULL;
errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
if (errNum != CL_SUCCESS || numPlatforms <= 0)
{
std::cerr << "Failed to find any OpenCL platforms." << std::endl;
return NULL;
}
cl_context_properties contextProperties[] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)firstPlatformId,
0
};
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cout << "Could not create GPU context, trying CPU..." << std::endl;
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,
NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
return NULL;
}
}
return context;
}
///
// Create a command queue on the first device available on the
// context
//
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{
cl_int errNum;
cl_device_id *devices;
cl_command_queue commandQueue = NULL;
size_t deviceBufferSize = -1;
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";
return NULL;
}
if (deviceBufferSize <= 0)
{
std::cerr << "No devices available.";
return NULL;
}
devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
if (errNum != CL_SUCCESS)
{
delete [] devices;
std::cerr << "Failed to get device IDs";
return NULL;
}
commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
if (commandQueue == NULL)
{
delete [] devices;
std::cerr << "Failed to create commandQueue for device 0";
return NULL;
}
*device = devices[0];
delete [] devices;
return commandQueue;
}
///
// Create an OpenCL program from the kernel source file
//
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
if (program == NULL)
{
std::cerr << "Failed to create CL program from source." << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in kernel: " << std::endl;
std::cerr << buildLog;
system("pause");
clReleaseProgram(program);
return NULL;
}
return program;
}
// Cleanup any created OpenCL resources
//
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel1, cl_kernel kernel2 )
{
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel1 != 0)
clReleaseKernel(kernel1);
if (kernel2 != 0)
clReleaseKernel(kernel2);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
}
///
// main() for radix sort
//
int main(int argc, char** argv)
{
cl_context context = 0;
cl_command_queue commandQueue = 0;
cl_program program = 0;
cl_device_id device = 0;
cl_kernel kernel1 = 0;
cl_kernel kernel2 = 0;
cl_mem memObject[3] = {0,0,0};
cl_int errNum;
// Create an OpenCL context on first available platform
context = CreateContext();
if (context == NULL)
{
std::cerr << "Failed to create OpenCL context." << std::endl;
return 1;
}
// Create a command-queue on the first device available
// on the created context
commandQueue = CreateCommandQueue(context, &device);
if (commandQueue == NULL)
{
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
// Create OpenCL program from HelloWorld.cl kernel source
program = CreateProgram(context, device, "HelloWorld.cl");
if (program == NULL)
{
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
// Create OpenCL kernel
kernel1 = clCreateKernel(program, "radix1", NULL);
if (kernel1 == NULL)
{
std::cerr << "Failed to create kernel" << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
kernel2 = clCreateKernel(program, "radix2", NULL);
if (kernel2 == NULL)
{
std::cerr << "Failed to create kernel" << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
int *data =(int*)malloc(sizeof(int)*15);
int *g_ones=(int*)malloc(sizeof(int)*15);
int *g_zero =(int*)malloc(sizeof(int)*15);
int j,k,q,p,cmp_val;
for (int i = 0; i < 15; i++)
{
data=i;
}
cmp_val=1;
size_t globalWorkSize[1] = { 15};
size_t localWorkSize[1] = { 5 };
memObject[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(int) * 15, data, NULL);
memObject[1] = clCreateBuffer(context, CL_MEM_READ_WRITE ,
sizeof(int) * 15, NULL, NULL);
memObject[2] = clCreateBuffer(context, CL_MEM_READ_WRITE ,
sizeof(int) * 15, NULL, NULL);
// Actual sequential loop for radix sort for the bit pass...
for(int i=0;i<4;i++)
{
j=0;k=0;
int m,n;
m=0;n=0;
errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[0]);
errNum = clSetKernelArg(kernel1, 1, sizeof(cl_mem), &memObject[1]);
errNum = clSetKernelArg(kernel1, 2, sizeof(cl_mem), &memObject[2]);
errNum = clSetKernelArg(kernel1, 3, sizeof(int), &cmp_val);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error setting kernel arguments." << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
errNum = clEnqueueNDRangeKernel(commandQueue, kernel1, 1, NULL,
globalWorkSize, localWorkSize,
0, NULL, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error queuing kernel one for execution." << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
errNum = clEnqueueReadBuffer(commandQueue, memObject[1], CL_TRUE,
0, 15 * sizeof(int), g_ones,
0, NULL, NULL);
errNum = clEnqueueReadBuffer(commandQueue, memObject[2], CL_TRUE,
0, 15 * sizeof(int), g_zero,
0, NULL, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error reading result buffers." << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
for( p=0;p<15;p++)
{
if (g_ones
!=-1)
{
m++;
}
if(g_zero
!=-1)
{
n++;
}
}
int *new_ones =(int*)malloc(sizeof(int)*m);
int *new_zero=(int*)malloc(sizeof(int)*n);
for( p=0;p<15;p++)
{
if (g_ones
!=-1)
{
new_ones[j++]=g_ones
;
}
if(g_zero
!=-1)
{
new_zero[k++]=g_zero
;
}
}
errNum= clEnqueueWriteBuffer(commandQueue, memObject[1], CL_TRUE, 0,
sizeof(int) * m,new_ones, 0, NULL, NULL);
errNum=clEnqueueWriteBuffer(commandQueue, memObject[2], CL_TRUE, 0,
sizeof(int) * n,new_zero, 0, NULL, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error writing new ones and zero buffer." << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[0]);
errNum = clSetKernelArg(kernel2, 1, sizeof(cl_mem), &memObject[1]);
errNum = clSetKernelArg(kernel2, 2, sizeof(cl_mem), &memObject[2]);
errNum = clSetKernelArg(kernel2, 3, sizeof(int), &n);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error setting kernel arguments." << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
errNum = clEnqueueNDRangeKernel(commandQueue, kernel2, 1, NULL,
globalWorkSize, localWorkSize,
0, NULL, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error queuing kernel two for execution." << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
errNum = clEnqueueReadBuffer(commandQueue, memObject[0], CL_TRUE,
0, 15 * sizeof(int), data,
0, NULL, NULL);
errNum = clEnqueueReadBuffer(commandQueue, memObject[1], CL_TRUE,
0, m * sizeof(int), new_ones,
0, NULL, NULL);
errNum = clEnqueueReadBuffer(commandQueue, memObject[2], CL_TRUE,
0, m * sizeof(int), new_zero,
0, NULL, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error reading final buffer." << std::endl;
Cleanup(context, commandQueue, program, kernel1,kernel2 );
return 1;
}
std::cout << "new_ones" <<std::endl;
for (int i = 0; i < m; i++)
{
std::cout << new_ones << " ";
}
std::cout <<std::endl;
std::cout << "new_zero" <<std::endl;
for (int i = 0; i < m; i++)
{
std::cout << new_zero << " ";
}
std::cout <<std::endl;
std::cout << "data" <<std::endl;
for (int i = 0; i < 15; i++)
{
std::cout << data << " ";
}
system("pause");
cmp_val<<=1;
delete[]new_zero;
delete[]new_ones;
}
}
Full Kernel Code
__kernel void radix1(__global int *data,__global int *g_ones,
__global int *g_zero, int cmp_value )
{
int gid=get_global_id(0);
if(data[gid] & cmp_value)
{
g_ones[gid]=data[gid];
g_zero[gid]=-1;
}
else
{
g_ones[gid]=-1;
g_zero[gid]=data[gid];
}
}
__kernel void radix2(__global int *data, __global int *new_ones,
__global int *new_zero, int n)
{
int id=get_global_id(0);
if (id>=0 && id<n)
data[id]=new_zero[id];
else if (id>=n && id < 15)
data[id]=new_ones[n-id];
}
Solved! Go to Solution.
The problem is when "id >=n" and "id <15" -- You are using "n-id" which is a -ve number. You should use "id-n".