Archives Discussions

shreedhar_pawar · ‎04-09-2013

I have attempted to write a radix sort code partly sequential and partly parallel. The code is giving a runtime error saying ..

My code is as follows, I have highlighted the actual sequential part of the radix sort in the host code(i.e. the for loop for the binary bit pass..)...

HOST CODE is..

#include <iostream>

#include <fstream>

#include <sstream>

#ifdef __APPLE__

#include <OpenCL/cl.h>

#else

#include <CL/cl.h>

#endif

// Create an OpenCL context on the first available platform using

// either a GPU or CPU depending on what is available.

//

cl_context CreateContext()

{

cl_int errNum;

cl_uint numPlatforms;

cl_platform_id firstPlatformId;

cl_context context = NULL;

// First, select an OpenCL platform to run on. For this example, we

// simply choose the first available platform. Normally, you would

// query for all available platforms and select the most appropriate one.

errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);

if (errNum != CL_SUCCESS || numPlatforms <= 0)

{

std::cerr << "Failed to find any OpenCL platforms." << std::endl;

return NULL;

}

// Next, create an OpenCL context on the platform. Attempt to

// create a GPU-based context, and if that fails, try to create

// a CPU-based context.

cl_context_properties contextProperties[] =

{

CL_CONTEXT_PLATFORM,

(cl_context_properties)firstPlatformId,

0

};

context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,

NULL, NULL, &errNum);

if (errNum != CL_SUCCESS)

{

std::cout << "Could not create GPU context, trying CPU..." << std::endl;

context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,

NULL, NULL, &errNum);

if (errNum != CL_SUCCESS)

{

std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;

return NULL;

}

return context;

}

///

// Create a command queue on the first device available on the

// context

//

cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)

{

cl_int errNum;

cl_device_id *devices;

cl_command_queue commandQueue = NULL;

size_t deviceBufferSize = -1;

// First get the size of the devices buffer

errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);

if (errNum != CL_SUCCESS)

{

std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";

return NULL;

}

if (deviceBufferSize <= 0)

{

std::cerr << "No devices available.";

return NULL;

}

// Allocate memory for the devices buffer

devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];

errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);

if (errNum != CL_SUCCESS)

{

delete [] devices;

std::cerr << "Failed to get device IDs";

return NULL;

}

// In this example, we just choose the first available device. In a

// real program, you would likely use all available devices or choose

// the highest performance device based on OpenCL device queries

commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

if (commandQueue == NULL)

{

delete [] devices;

std::cerr << "Failed to create commandQueue for device 0";

return NULL;

}

*device = devices[0];

delete [] devices;

return commandQueue;

}

///

// Create an OpenCL program from the kernel source file

//

cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)

{

cl_int errNum;

cl_program program;

std::ifstream kernelFile(fileName, std::ios::in);

if (!kernelFile.is_open())

{

std::cerr << "Failed to open file for reading: " << fileName << std::endl;

return NULL;

}

std::ostringstream oss;

oss << kernelFile.rdbuf();

std::string srcStdStr = oss.str();

const char *srcStr = srcStdStr.c_str();

program = clCreateProgramWithSource(context, 1,

(const char**)&srcStr,

NULL, NULL);

if (program == NULL)

{

std::cerr << "Failed to create CL program from source." << std::endl;

return NULL;

}

errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

if (errNum != CL_SUCCESS)

{

// Determine the reason for the error

char buildLog[16384];

clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,

sizeof(buildLog), buildLog, NULL);

std::cerr << "Error in kernel: " << std::endl;

std::cerr << buildLog;

system("pause");

clReleaseProgram(program);

return NULL;

}

return program;

}

// Cleanup any created OpenCL resources

//

void Cleanup(cl_context context, cl_command_queue commandQueue,

cl_program program, cl_kernel kernel1, cl_kernel kernel2 )

{

if (commandQueue != 0)

clReleaseCommandQueue(commandQueue);

if (kernel1 != 0)

clReleaseKernel(kernel1);

if (kernel2 != 0)

clReleaseKernel(kernel2);

if (program != 0)

clReleaseProgram(program);

if (context != 0)

clReleaseContext(context);

}

///

// main() for RadixSort example

//

int main(int argc, char** argv)

{

cl_context context = 0;

cl_command_queue commandQueue = 0;

cl_program program = 0;

cl_device_id device = 0;

cl_kernel kernel1 = 0;

cl_kernel kernel2 = 0;

cl_mem memObject[3] = {0,0,0};

cl_int errNum;

// Create an OpenCL context on first available platform

context = CreateContext();

if (context == NULL)

{

std::cerr << "Failed to create OpenCL context." << std::endl;

return 1;

}

// Create a command-queue on the first device available

// on the created context

commandQueue = CreateCommandQueue(context, &device);

if (commandQueue == NULL)

{

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

// Create OpenCL program from HelloWorld.cl kernel source

program = CreateProgram(context, device, "HelloWorld.cl");

if (program == NULL)

{

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

// Create OpenCL kernel

kernel1 = clCreateKernel(program, "radix1", NULL);

if (kernel1 == NULL)

{

std::cerr << "Failed to create kernel" << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

kernel2 = clCreateKernel(program, "radix2", NULL);

if (kernel2 == NULL)

{

std::cerr << "Failed to create kernel" << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

float * data =(float*)malloc(sizeof(float)*15);

float *g_ones=(float*)malloc(sizeof(float)*15);

float *g_zero =(float*)malloc(sizeof(float)*15);

int j,k,q,p,m,n,cmp_val;

data[15]=(8,4,13,7,12,9,1,11,3,5,6,2,15,14,10);

cmp_val=1;

size_t globalWorkSize[1] = { 15};

size_t localWorkSize[1] = { 5 };

memObject[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

sizeof(float) * 15, data, NULL);

memObject[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

sizeof(float) * 15, NULL, NULL);

memObject[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

sizeof(float) * 15, NULL, NULL);

for(int i=0;i<4;i++)

{

j=0;k=0;

errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[0]);

errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[1]);

errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[2]);

errNum = clSetKernelArg(kernel1, 0, sizeof(int), &cmp_val);

if (errNum != CL_SUCCESS)

{

std::cerr << "Error setting kernel arguments." << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

errNum = clEnqueueNDRangeKernel(commandQueue, kernel1, 1, NULL,

globalWorkSize, localWorkSize,

0, NULL, NULL);

if (errNum != CL_SUCCESS)

{

std::cerr << "Error queuing kernel for execution." << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

errNum = clEnqueueReadBuffer(commandQueue, memObject[1], CL_TRUE,

0, 15 * sizeof(float), g_ones,

0, NULL, NULL);

errNum = clEnqueueReadBuffer(commandQueue, memObject[2], CL_TRUE,

0, 15 * sizeof(float), g_zero,

0, NULL, NULL);

if (errNum != CL_SUCCESS)

{

std::cerr << "Error reading result buffers." << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

for( p=0;p<15;p++)

{

if (g_ones

!=-1)

{

m++;

}

if(g_zero

!=-1)

{

n++;

}

float *new_ones =(float*)malloc(sizeof(float)*m);

float *new_zero=(float*)malloc(sizeof(float)*n);

for( p=0;p<15;p++)

{

if (g_ones

!=-1)

{

new_ones[j++]=g_ones

;

}

if(g_zero

!=-1)

{

new_zero[k++]=g_zero

;

}

clEnqueueWriteBuffer(commandQueue, memObject[1], CL_TRUE, 0,

sizeof(float) * m,new_ones, 0, NULL, NULL);

clEnqueueWriteBuffer(commandQueue, memObject[2], CL_TRUE, 0,

sizeof(float) * n,new_zero, 0, NULL, NULL);

errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[0]);

errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[1]);

errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[2]);

errNum = clSetKernelArg(kernel2, 0, sizeof(int), &n);

if (errNum != CL_SUCCESS)

{

std::cerr << "Error setting kernel arguments." << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

errNum = clEnqueueNDRangeKernel(commandQueue, kernel2, 1, NULL,

globalWorkSize, localWorkSize,

0, NULL, NULL);

if (errNum != CL_SUCCESS)

{

std::cerr << "Error queuing kernel for execution." << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

errNum = clEnqueueReadBuffer(commandQueue, memObject[0], CL_TRUE,

0, 15 * sizeof(float), data,

0, NULL, NULL);

if (errNum != CL_SUCCESS)

{

std::cerr << "Error reading final buffer." << std::endl;

Cleanup(context, commandQueue, program, kernel1,kernel2 );

return 1;

}

cmp_val<<=1;

delete[ ]new_zero;

delete[ ]new_ones;

}

KERNEL CODE is ...

__kernel void radix1(__global float *data,__global float *g_ones,

__global float *g_zero,__global int cmp_value )

{

__local float ones[15],zero[15];

int tid;

//int id=get_global_id(0);

//data[id]=global_data[id];

//arrier(CLK_LOCAL_MEM_FENCE);

int gid=get_global_id(0);

if(data[gid] & cmp_value)

{

ones[gid]=data[gid];

zero[gid]=-1;

}

else

{

ones[gid]=-1;

zero[gid]=data[gid];

}

barrier(CLK_GLOBAL_MEM_FENCE);

tid=get_global_id(0);

g_ones[tid]=ones[tid];

g_zero[tid]=zero[tid];

}

__kernel void radix2(__global float *data, __global float *new_ones,

__global float *new_zero,__global int n)

{

int id=get_global_id(0);

if (id>=0 && id<n)

data[id]=new_zero[id];

else

data[id]=new_ones[n-id];

}

himanshu_gautam · ‎04-09-2013

1. Just say "int cmp_value" instead of "__global int cmp_value" in your kernel.

2. Also, In your clSetKernelArg() calls, I dont see the arg_index changing. You are always setting the 0th argument.

View solution in original post

himanshu_gautam · ‎04-09-2013

1. Just say "int cmp_value" instead of "__global int cmp_value" in your kernel.

2. Also, In your clSetKernelArg() calls, I dont see the arg_index changing. You are always setting the 0th argument.

shreedhar_pawar · ‎04-09-2013

Your solution works... but I am having an error in enqueing the 2nd kernel...!

himanshu_gautam · ‎04-09-2013

The clSetKernelArg() for 2nd kernel is also setting only the 1st argument. Please correct this for both the kernels.

You may want to tell more about the error you are seeing.....

but I feel that it is just a trivial error and you should be able to solve this yourself....

shreedhar_pawar · ‎04-09-2013

I think this is because I have defined the local_size to be 5, but during the 2nd kernel the size of the new_ones and new_zero changes...! Is this the reason...?

I also tried to make the local_size to be 1, even then it gives the same error..

I also want to ask that is it necessary for the local_size to be the factor of global_size, can this give a problem while enqueing the kernel or while reading the buffer...?

shreedhar_pawar · ‎04-09-2013

Yeah that fixed the error... had forgotten to rectify the last argument in the list...! But the code isn't sorting the no.s properly...! Can you tell me what could be the fault... I am not sure about the way I have written the kernel code... please tell me any mistakes in the kernel code...!

shreedhar_pawar · ‎04-09-2013

I am sorry that I asked so quickly.. I'll try to solve it myself before and then tell you if any errors...!

Archives Discussions

Error: Invalid Address Space