6 Replies Latest reply on Apr 9, 2013 7:22 AM by shreedhar_pawar

    Error: Invalid Address Space

    shreedhar_pawar

      I have attempted to write a radix sort code partly sequential and partly parallel. The code is giving a runtime error saying ..

      My code is as follows, I have highlighted the actual sequential part of the radix sort in the host code(i.e. the for loop for the binary bit pass..)...

      Invalid Address Space 3.PNG

       

       

      HOST CODE is..

       

      #include <iostream>

      #include <fstream>

      #include <sstream>

       

       

      #ifdef __APPLE__

      #include <OpenCL/cl.h>

      #else

      #include <CL/cl.h>

      #endif

       

      //  Create an OpenCL context on the first available platform using

      //  either a GPU or CPU depending on what is available.

      //

      cl_context CreateContext()

      {

          cl_int errNum;

          cl_uint numPlatforms;

          cl_platform_id firstPlatformId;

          cl_context context = NULL;

       

       

          // First, select an OpenCL platform to run on.  For this example, we

          // simply choose the first available platform.  Normally, you would

          // query for all available platforms and select the most appropriate one.

          errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);

          if (errNum != CL_SUCCESS || numPlatforms <= 0)

          {

              std::cerr << "Failed to find any OpenCL platforms." << std::endl;

              return NULL;

          }

       

       

          // Next, create an OpenCL context on the platform.  Attempt to

          // create a GPU-based context, and if that fails, try to create

          // a CPU-based context.

          cl_context_properties contextProperties[] =

          {

              CL_CONTEXT_PLATFORM,

              (cl_context_properties)firstPlatformId,

              0

          };

          context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,

                                            NULL, NULL, &errNum);

          if (errNum != CL_SUCCESS)

          {

              std::cout << "Could not create GPU context, trying CPU..." << std::endl;

              context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,

                                                NULL, NULL, &errNum);

              if (errNum != CL_SUCCESS)

              {

                  std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;

                  return NULL;

              }

          }

       

       

          return context;

      }

       

       

      ///

      //  Create a command queue on the first device available on the

      //  context

      //

      cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)

      {

          cl_int errNum;

          cl_device_id *devices;

          cl_command_queue commandQueue = NULL;

          size_t deviceBufferSize = -1;

       

       

          // First get the size of the devices buffer

          errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);

          if (errNum != CL_SUCCESS)

          {

              std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";

              return NULL;

          }

       

       

          if (deviceBufferSize <= 0)

          {

              std::cerr << "No devices available.";

              return NULL;

          }

       

       

          // Allocate memory for the devices buffer

          devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];

          errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);

          if (errNum != CL_SUCCESS)

          {

              delete [] devices;

              std::cerr << "Failed to get device IDs";

              return NULL;

          }

       

       

          // In this example, we just choose the first available device.  In a

          // real program, you would likely use all available devices or choose

          // the highest performance device based on OpenCL device queries

          commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

          if (commandQueue == NULL)

          {

              delete [] devices;

              std::cerr << "Failed to create commandQueue for device 0";

              return NULL;

          }

       

       

          *device = devices[0];

          delete [] devices;

          return commandQueue;

      }

       

       

      ///

      //  Create an OpenCL program from the kernel source file

      //

      cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)

      {

          cl_int errNum;

          cl_program program;

       

       

          std::ifstream kernelFile(fileName, std::ios::in);

          if (!kernelFile.is_open())

          {

              std::cerr << "Failed to open file for reading: " << fileName << std::endl;

              return NULL;

          }

       

       

          std::ostringstream oss;

          oss << kernelFile.rdbuf();

       

       

          std::string srcStdStr = oss.str();

          const char *srcStr = srcStdStr.c_str();

          program = clCreateProgramWithSource(context, 1,

                                              (const char**)&srcStr,

                                              NULL, NULL);

          if (program == NULL)

          {

              std::cerr << "Failed to create CL program from source." << std::endl;

              return NULL;

          }

       

       

          errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

          if (errNum != CL_SUCCESS)

          {

              // Determine the reason for the error

              char buildLog[16384];

              clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,

                                    sizeof(buildLog), buildLog, NULL);

       

       

              std::cerr << "Error in kernel: " << std::endl;

              std::cerr << buildLog;

                          system("pause");

              clReleaseProgram(program);

              return NULL;

          }

       

       

          return program;

      }

       

       

      //  Cleanup any created OpenCL resources

      //

      void Cleanup(cl_context context, cl_command_queue commandQueue,

                   cl_program program, cl_kernel kernel1,  cl_kernel kernel2 )

      {

          if (commandQueue != 0)

              clReleaseCommandQueue(commandQueue);

       

       

          if (kernel1 != 0)

              clReleaseKernel(kernel1);

       

       

                if (kernel2 != 0)

              clReleaseKernel(kernel2);

       

       

          if (program != 0)

              clReleaseProgram(program);

       

       

          if (context != 0)

              clReleaseContext(context);

       

       

      }

       

       

      ///

      //      main() for RadixSort example

      //

      int main(int argc, char** argv)

      {

          cl_context context = 0;

          cl_command_queue commandQueue = 0;

          cl_program program = 0;

          cl_device_id device = 0;

          cl_kernel kernel1 = 0;

                cl_kernel kernel2 = 0;

                cl_mem memObject[3] = {0,0,0};

          cl_int errNum;

       

       

          // Create an OpenCL context on first available platform

          context = CreateContext();

          if (context == NULL)

          {

              std::cerr << "Failed to create OpenCL context." << std::endl;

              return 1;

          }

       

       

          // Create a command-queue on the first device available

          // on the created context

          commandQueue = CreateCommandQueue(context, &device);

          if (commandQueue == NULL)

          {

               Cleanup(context, commandQueue, program, kernel1,kernel2 );

              return 1;

          }

       

       

          // Create OpenCL program from HelloWorld.cl kernel source

          program = CreateProgram(context, device, "HelloWorld.cl");

          if (program == NULL)

          {

            Cleanup(context, commandQueue, program, kernel1,kernel2 );

              return 1;

          }

       

       

          // Create OpenCL kernel

          kernel1 = clCreateKernel(program, "radix1", NULL);

          if (kernel1 == NULL)

          {

              std::cerr << "Failed to create kernel" << std::endl;

              Cleanup(context, commandQueue, program, kernel1,kernel2 );

              return 1;

          }

       

       

                 kernel2 = clCreateKernel(program, "radix2", NULL);

          if (kernel2 == NULL)

          {

              std::cerr << "Failed to create kernel" << std::endl;

              Cleanup(context, commandQueue, program, kernel1,kernel2 );

              return 1;

          }

       

          float * data =(float*)malloc(sizeof(float)*15);

                 float *g_ones=(float*)malloc(sizeof(float)*15);

                  float *g_zero  =(float*)malloc(sizeof(float)*15);

       

      int j,k,q,p,m,n,cmp_val;

                          data[15]=(8,4,13,7,12,9,1,11,3,5,6,2,15,14,10);

                cmp_val=1;      

                 size_t globalWorkSize[1] = { 15};

          size_t localWorkSize[1] = { 5 };

       

            memObject[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

                                    sizeof(float) * 15, data, NULL);

                          memObject[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

                                    sizeof(float) * 15, NULL, NULL);

                          memObject[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

                                    sizeof(float) * 15, NULL, NULL);

       

       

          for(int i=0;i<4;i++)

                {

                          j=0;k=0;

      errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[0]);

                                              errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[1]);

                                                                  errNum = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &memObject[2]);

                                                                  errNum = clSetKernelArg(kernel1, 0, sizeof(int), &cmp_val);

                          if (errNum != CL_SUCCESS)

                           {

                                    std::cerr << "Error setting kernel arguments." << std::endl;

                                    Cleanup(context, commandQueue, program, kernel1,kernel2 );

                                    return 1;

                           }

       

       

       

       

                          errNum = clEnqueueNDRangeKernel(commandQueue, kernel1, 1, NULL,

                                    globalWorkSize, localWorkSize,

                                    0, NULL, NULL);

              if (errNum != CL_SUCCESS)

                           {

                                    std::cerr << "Error queuing kernel for execution." << std::endl;

                                    Cleanup(context, commandQueue, program, kernel1,kernel2 );

                                    return 1;

                           }

       

       

       

                          errNum = clEnqueueReadBuffer(commandQueue, memObject[1], CL_TRUE,

                                       0, 15 * sizeof(float), g_ones,

                                       0, NULL, NULL);

                          errNum = clEnqueueReadBuffer(commandQueue, memObject[2], CL_TRUE,

                                       0, 15 * sizeof(float), g_zero,

                                       0, NULL, NULL);

              if (errNum != CL_SUCCESS)

              {

                std::cerr << "Error reading result buffers." << std::endl;

                Cleanup(context, commandQueue, program, kernel1,kernel2 );

                 return 1;

              }

       

       

       

                    for( p=0;p<15;p++)

                    {

                             if (g_ones[p]!=-1)

                             {

                                       m++;

                             }

                             if(g_zero[p]!=-1)

                             {

                                       n++;

                             }

                    }

          float *new_ones =(float*)malloc(sizeof(float)*m);

                float *new_zero=(float*)malloc(sizeof(float)*n);

       

       

                   for( p=0;p<15;p++)

                   {

                             if (g_ones[p]!=-1)

                             {

                                       new_ones[j++]=g_ones[p];

                             }

                             if(g_zero[p]!=-1)

                             {

                                       new_zero[k++]=g_zero[p];

                             }

                   }

       

                clEnqueueWriteBuffer(commandQueue,           memObject[1], CL_TRUE, 0,

      sizeof(float) * m,new_ones, 0, NULL, NULL);

                clEnqueueWriteBuffer(commandQueue,           memObject[2], CL_TRUE, 0,

      sizeof(float) * n,new_zero, 0, NULL, NULL);

       

       

       

                          errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[0]);

                                              errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[1]);

                                                                  errNum = clSetKernelArg(kernel2, 0, sizeof(cl_mem), &memObject[2]);

       

                                                                  errNum = clSetKernelArg(kernel2, 0, sizeof(int), &n);

                          if (errNum != CL_SUCCESS)

                           {

                                    std::cerr << "Error setting kernel arguments." << std::endl;

                                    Cleanup(context, commandQueue, program, kernel1,kernel2 );

                                    return 1;

                           }

                          errNum = clEnqueueNDRangeKernel(commandQueue, kernel2, 1, NULL,

                                    globalWorkSize, localWorkSize,

                                    0, NULL, NULL);

       

       

                          if (errNum != CL_SUCCESS)

                           {

                                    std::cerr << "Error queuing kernel for execution." << std::endl;

                                    Cleanup(context, commandQueue, program, kernel1,kernel2 );

                                    return 1;

                           }

       

       

       

       

                          errNum = clEnqueueReadBuffer(commandQueue, memObject[0], CL_TRUE,

                                       0, 15 * sizeof(float), data,

                                       0, NULL, NULL);

             if (errNum != CL_SUCCESS)

              {

                std::cerr << "Error reading final buffer." << std::endl;

                Cleanup(context, commandQueue, program, kernel1,kernel2 );

                 return 1;

              }

                    cmp_val<<=1;

                          delete[ ]new_zero;

                                    delete[ ]new_ones;

                }

      }

       

       

      KERNEL CODE is ...

       

       

      __kernel void radix1(__global float *data,__global float *g_ones,

                __global float *g_zero,__global int cmp_value )

      {

       

        __local float  ones[15],zero[15];

                int tid;

       

       

                //int id=get_global_id(0);

                //data[id]=global_data[id];

                //arrier(CLK_LOCAL_MEM_FENCE);

       

       

                int gid=get_global_id(0);

                  if(data[gid] & cmp_value)

                  {

                            ones[gid]=data[gid];

                            zero[gid]=-1;

                  }

                  else

            {

                            ones[gid]=-1;

                            zero[gid]=data[gid];

                  }

                  barrier(CLK_GLOBAL_MEM_FENCE);

       

       

                 tid=get_global_id(0);

                   g_ones[tid]=ones[tid];

                   g_zero[tid]=zero[tid];

       

      }

       

       

      __kernel void radix2(__global float *data, __global float *new_ones,

                __global float *new_zero,__global int n)

      {

                int id=get_global_id(0);

                  if (id>=0 && id<n)

                            data[id]=new_zero[id];

                  else

                            data[id]=new_ones[n-id];

      }