AnsweredAssumed Answered

Error: Invalid Address Space

Question asked by shreedhar_pawar on Apr 9, 2013
Latest reply on Apr 9, 2013 by shreedhar_pawar

I have attempted to write a radix sort code partly sequential and partly parallel. The code is giving a runtime error saying ..

My code is as follows, I have highlighted the actual sequential part of the radix sort in the host code(i.e. the for loop for the binary bit pass..)...

Invalid Address Space 3.PNG

 

 

HOST CODE is..

 

#include <iostream>

#include <fstream>

#include <sstream>

 

 

#ifdef __APPLE__

#include <OpenCL/cl.h>

#else

#include <CL/cl.h>

#endif

 

//  Create an OpenCL context on the first available platform using

//  either a GPU or CPU depending on what is available.

//

cl_context CreateContext()

{

    cl_int errNum;

    cl_uint numPlatforms;

    cl_platform_id firstPlatformId;

    cl_context context = NULL;

 

 

    // First, select an OpenCL platform to run on.  For this example, we

    // simply choose the first available platform.  Normally, you would

    // query for all available platforms and select the most appropriate one.

    errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);

    if (errNum != CL_SUCCESS || numPlatforms <= 0)

    {

        std::cerr << "Failed to find any OpenCL platforms." << std::endl;

        return NULL;

    }

 

 

    // Next, create an OpenCL context on the platform.  Attempt to

    // create a GPU-based context, and if that fails, try to create

    // a CPU-based context.

    cl_context_properties contextProperties[] =

    {

        CL_CONTEXT_PLATFORM,

        (cl_context_properties)firstPlatformId,

        0

    };

    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,

                                      NULL, NULL, &errNum);

    if (errNum != CL_SUCCESS)

    {

        std::cout << "Could not create GPU context, trying CPU..." << std::endl;

        context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,

                                          NULL, NULL, &errNum);

        if (errNum != CL_SUCCESS)

        {

            std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;

            return NULL;

        }

    }

 

 

    return context;

}

 

 

///

//  Create a command queue on the first device available on the

//  context

//

cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)

{

    cl_int errNum;

    cl_device_id *devices;

    cl_command_queue commandQueue = NULL;

    size_t deviceBufferSize = -1;

 

 

    // First get the size of the devices buffer

    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);

    if (errNum != CL_SUCCESS)

    {

        std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";

        return NULL;

    }

 

 

    if (deviceBufferSize <= 0)

    {

        std::cerr << "No devices available.";

        return NULL;

    }

 

 

    // Allocate memory for the devices buffer

    devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];

    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);

    if (errNum != CL_SUCCESS)

    {

        delete [] devices;

        std::cerr << "Failed to get device IDs";

        return NULL;

    }

 

 

    // In this example, we just choose the first available device.  In a

    // real program, you would likely use all available devices or choose

    // the highest performance device based on OpenCL device queries

    commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

    if (commandQueue == NULL)

    {

        delete [] devices;

        std::cerr << "Failed to create commandQueue for device 0";

        return NULL;

    }

 

 

    *device = devices[0];

    delete [] devices;

    return commandQueue;

}

 

 

///

//  Create an OpenCL program from the kernel source file

//

cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)

{

    cl_int errNum;

    cl_program program;

 

 

    std::ifstream kernelFile(fileName, std::ios::in);

    if (!kernelFile.is_open())

    {

        std::cerr << "Failed to open file for reading: " << fileName << std::endl;

        return NULL;

    }

 

 

    std::ostringstream oss;

    oss << kernelFile.rdbuf();

 

 

    std::string srcStdStr = oss.str();

    const char *srcStr = srcStdStr.c_str();

    program = clCreateProgramWithSource(context, 1,

                                        (const char**)&srcStr,

                                        NULL, NULL);

    if (program == NULL)

    {

        std::cerr << "Failed to create CL program from source." << std::endl;

        return NULL;

    }

 

 

    errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

    if (errNum != CL_SUCCESS)

    {

        // Determine the reason for the error

        char buildLog[16384];

        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,

                              sizeof(buildLog), buildLog, NULL);

 

 

        std::cerr << "Error in kernel: " << std::endl;

        std::cerr << buildLog;

                    system("pause");

        clReleaseProgram(program);

        return NULL;

    }

 

 

    return program;

}

 

 

//  Cleanup any created OpenCL resources

//

void Cleanup(cl_context context, cl_command_queue commandQueue,

             cl_program program, cl_kernel kernel1,  cl_kernel kernel2 )

{

    if (commandQueue != 0)

        clReleaseCommandQueue(commandQueue);

 

 

    if (kernel1 != 0)

        clReleaseKernel(kernel1);

 

 

          if (kernel2 != 0)

        clReleaseKernel(kernel2);

 

 

    if (program != 0)

        clReleaseProgram(program);

 

 

    if (context != 0)

        clReleaseContext(context);

 

 

}

 

 

///

//      main() for RadixSort example

//

int main(int argc, char** argv)

{

    cl_context context = 0;

    cl_command_queue commandQueue = 0;

    cl_program program = 0;

    cl_device_id device = 0;

    cl_kernel kernel1 = 0;

          cl_kernel kernel2 = 0;

          cl_mem memObject[3] = {0,0,0};

    cl_int errNum;

 

 

    // Create an OpenCL context on first available platform

    context = CreateContext();

    if (context == NULL)

    {

        std::cerr << "Failed to create OpenCL context." << std::endl;

        return 1;

    }

 

 

    // Create a command-queue on the first device available

    // on the created context

    commandQueue = CreateCommandQueue(context, &device);

    if (commandQueue == NULL)

    {

         Cleanup(context, commandQueue, program, kernel1,kernel2 );

        return 1;

    }

 

 

    // Create OpenCL program from HelloWorld.cl kernel source

    program = CreateProgram(context, device, "HelloWorld.cl");

    if (program == NULL)

    {

      Cleanup(context, commandQueue, program, kernel1,kernel2 );

        return 1;

    }

 

 

    // Create OpenCL kernel

    kernel1 = clCreateKernel(program, "radix1", NULL);

    if (kernel1 == NULL)

    {

        std::cerr << "Failed to create kernel" << std::endl;

        Cleanup(context, commandQueue, program, kernel1,kernel2 );

        return 1;

    }

 

 

           kernel2 = clCreateKernel(program, "radix2", NULL);

    if (kernel2 == NULL)

    {

        std::cerr << "Failed to create kernel" << std::endl;

        Cleanup(context, commandQueue, program, kernel1,kernel2 );

        return 1;

    }

 

    float * data =(float*)malloc(sizeof(float)*15);

           float *g_ones=(float*)malloc(sizeof(float)*15);

            float *g_zero  =(float*)malloc(sizeof(float)*15);

 

int j,k,q,p,m,n,cmp_val;

                    data[15]=(8,4,13,7,12,9,1,11,3,5,6,2,15,14,10);

          cmp_val=1;      

           size_t globalWorkSize[1] = { 15};

    size_t localWorkSize[1] = { 5 };

 

      memObject[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

                              sizeof(float) * 15, data, NULL);

                    memObject[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

                              sizeof(float) * 15, NULL, NULL);

                    memObject[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

                              sizeof(float) * 15, NULL, NULL);

 

 

    for(int i=0;i<4;i++)

          {

                    j=0;k=0;

errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[0]);

                                        errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[1]);

                                                            errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[2]);

                                                            errNum = clSetKernelArg(kernel1, 0, sizeof(int), &cmp_val);

                    if (errNum != CL_SUCCESS)

                     {

                              std::cerr << "Error setting kernel arguments." << std::endl;

                              Cleanup(context, commandQueue, program, kernel1,kernel2 );

                              return 1;

                     }

 

 

 

 

                    errNum = clEnqueueNDRangeKernel(commandQueue, kernel1, 1, NULL,

                              globalWorkSize, localWorkSize,

                              0, NULL, NULL);

        if (errNum != CL_SUCCESS)

                     {

                              std::cerr << "Error queuing kernel for execution." << std::endl;

                              Cleanup(context, commandQueue, program, kernel1,kernel2 );

                              return 1;

                     }

 

 

 

                    errNum = clEnqueueReadBuffer(commandQueue, memObject[1], CL_TRUE,

                                 0, 15 * sizeof(float), g_ones,

                                 0, NULL, NULL);

                    errNum = clEnqueueReadBuffer(commandQueue, memObject[2], CL_TRUE,

                                 0, 15 * sizeof(float), g_zero,

                                 0, NULL, NULL);

        if (errNum != CL_SUCCESS)

        {

          std::cerr << "Error reading result buffers." << std::endl;

          Cleanup(context, commandQueue, program, kernel1,kernel2 );

           return 1;

        }

 

 

 

              for( p=0;p<15;p++)

              {

                       if (g_ones[p]!=-1)

                       {

                                 m++;

                       }

                       if(g_zero[p]!=-1)

                       {

                                 n++;

                       }

              }

    float *new_ones =(float*)malloc(sizeof(float)*m);

          float *new_zero=(float*)malloc(sizeof(float)*n);

 

 

             for( p=0;p<15;p++)

             {

                       if (g_ones[p]!=-1)

                       {

                                 new_ones[j++]=g_ones[p];

                       }

                       if(g_zero[p]!=-1)

                       {

                                 new_zero[k++]=g_zero[p];

                       }

             }

 

          clEnqueueWriteBuffer(commandQueue,           memObject[1], CL_TRUE, 0,

sizeof(float) * m,new_ones, 0, NULL, NULL);

          clEnqueueWriteBuffer(commandQueue,           memObject[2], CL_TRUE, 0,

sizeof(float) * n,new_zero, 0, NULL, NULL);

 

 

 

                    errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[0]);

                                        errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[1]);

                                                            errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[2]);

 

                                                            errNum = clSetKernelArg(kernel2, 0, sizeof(int), &n);

                    if (errNum != CL_SUCCESS)

                     {

                              std::cerr << "Error setting kernel arguments." << std::endl;

                              Cleanup(context, commandQueue, program, kernel1,kernel2 );

                              return 1;

                     }

                    errNum = clEnqueueNDRangeKernel(commandQueue, kernel2, 1, NULL,

                              globalWorkSize, localWorkSize,

                              0, NULL, NULL);

 

 

                    if (errNum != CL_SUCCESS)

                     {

                              std::cerr << "Error queuing kernel for execution." << std::endl;

                              Cleanup(context, commandQueue, program, kernel1,kernel2 );

                              return 1;

                     }

 

 

 

 

                    errNum = clEnqueueReadBuffer(commandQueue, memObject[0], CL_TRUE,

                                 0, 15 * sizeof(float), data,

                                 0, NULL, NULL);

       if (errNum != CL_SUCCESS)

        {

          std::cerr << "Error reading final buffer." << std::endl;

          Cleanup(context, commandQueue, program, kernel1,kernel2 );

           return 1;

        }

              cmp_val<<=1;

                    delete[ ]new_zero;

                              delete[ ]new_ones;

          }

}

 

 

KERNEL CODE is ...

 

 

__kernel void radix1(__global float *data,__global float *g_ones,

          __global float *g_zero,__global int cmp_value )

{

 

  __local float  ones[15],zero[15];

          int tid;

 

 

          //int id=get_global_id(0);

          //data[id]=global_data[id];

          //arrier(CLK_LOCAL_MEM_FENCE);

 

 

          int gid=get_global_id(0);

            if(data[gid] & cmp_value)

            {

                      ones[gid]=data[gid];

                      zero[gid]=-1;

            }

            else

      {

                      ones[gid]=-1;

                      zero[gid]=data[gid];

            }

            barrier(CLK_GLOBAL_MEM_FENCE);

 

 

           tid=get_global_id(0);

             g_ones[tid]=ones[tid];

             g_zero[tid]=zero[tid];

 

}

 

 

__kernel void radix2(__global float *data, __global float *new_ones,

          __global float *new_zero,__global int n)

{

          int id=get_global_id(0);

            if (id>=0 && id<n)

                      data[id]=new_zero[id];

            else

                      data[id]=new_ones[n-id];

}

 


Outcomes