6 Replies Latest reply on Apr 15, 2013 5:07 AM by himanshu.gautam

    Initialize get_global_id(0) from 1 and not from 0

    shreedhar_pawar

      How to initialize the variable to which get_global_id(0) is given , from 1 and not from 0....??

        • Re: Initialize get_global_id(0) from 1 and not from 0
          himanshu.gautam

          int gid = get_global_id(0) + 1;

            • Re: Initialize get_global_id(0) from 1 and not from 0
              shreedhar_pawar

              I tried doing this ,.. but then after doing it my laptop goes off and a blue screen appears saying "Your PC recovered from a serious problem, Your display Driver is unable to reset, Contact your graphic vendor"... What do I do then , cause I tried  it thrice..!

               

              Actually I am doing it for initializing the image from the 1st row and the first column, so I do x=get_global_id(0)+1; and y=get_global_id(1)+1;... What can be the other way of doing this..?

                • Re: Initialize get_global_id(0) from 1 and not from 0
                  dmeiser

                  These lines are not what's causing your laptop to crash. Can you post the entire kernel? For instance you need to make sure that you're not writing to out of bounds memory locations. Schematically

                  x=get_global_id(0) + 1;

                  y=get_global_id(1) + 1;

                  if (x < dimx && y < dimy) {

                       // do stuff

                  }

                  1 of 1 people found this helpful
                    • Re: Initialize get_global_id(0) from 1 and not from 0
                      shreedhar_pawar

                      __kernel void Imagetest(__read_only image2d_t srcImg,

                                                           __write_only image2d_t dstImg,

                                                    sampler_t sampler,

                                                    int width, int height,

                                                                                              __global float4* src_arr,    //8*width*height

                                                                                              __global float4* dest_arr,   //8*width*height

                                                                                              __global float4* weight_arr,  //8*width*heigh

                                                                                               __global float4* min_src_arr,

                                                                                              __global float4* min_dest_arr,__global float4* min_weight_arr,

                                                                                             __global float* r_src_arr,    //8*width*height

                                                                                              __global float* r_dest_arr,   //8*width*height

                                                                                              __global float* r_weight_arr )

                                                                                            

                       

                      {

                       

                                float4 temp;

                                uint i;

                       

                                uint x= get_global_id(0);

                                 uint y=get_global_id(1);

                                unsigned int id=((width*x)+y);

                       

                                float4 p=(read_imagef(srcImg, sampler, (int2)(x, y)))*255;

                          float4 p1=(read_imagef(srcImg, sampler, (int2)(x-1, y-1)))*255;

                                float4 p2=(read_imagef(srcImg, sampler, (int2)(x-1, y)))*255;

                                float4 p3=(read_imagef(srcImg, sampler, (int2)(x-1, y+1)))*255;

                                float4 p4=(read_imagef(srcImg, sampler, (int2)(x, y-1)))*255;

                                float4 p5=(read_imagef(srcImg, sampler, (int2)(x, y+1)))*255;

                                float4 p6=(read_imagef(srcImg, sampler, (int2)(x+1, y-1)))*255;

                                float4 p7=(read_imagef(srcImg, sampler, (int2)(x+1, y)))*255;

                                float4 p8=(read_imagef(srcImg, sampler, (int2)(x+1, y+1)))*255;

                       

                       

                                src_arr[8*id]=id;

                                weight_arr[8*id]=fabs(p-p1);

                                dest_arr[8*id]=(((width)*(x-1))+(y-1));

                       

                       

                                r_src_arr[8*id]=src_arr[8*id].x;

                                r_weight_arr[8*id]=weight_arr[8*id].x;

                                r_dest_arr[8*id]=dest_arr[8*id].x;

                       

                                src_arr[(8*id)+1]=id;

                                weight_arr[(8*id)+1]=fabs(p-p2);

                                dest_arr[(8*id)+1]=(((width)*(x-1))+(y));

                       

                       

                                r_src_arr[(8*id)+1]=src_arr[(8*id)+1].x;

                                r_weight_arr[(8*id)+1]=weight_arr[(8*id)+1].x;

                                r_dest_arr[(8*id)+1]=dest_arr[(8*id)+1].x;

                       

                          src_arr[(8*id)+2]=id;

                                weight_arr[(8*id)+2]=fabs(p-p3);

                                dest_arr[(8*id)+2]=(((width)*(x-1))+(y+1));

                       

                       

                                r_src_arr[(8*id)+2]=src_arr[(8*id)+2].x;

                                r_weight_arr[(8*id)+2]=weight_arr[(8*id)+2].x;

                                r_dest_arr[(8*id)+2]=dest_arr[(8*id)+2].x;

                       

                                src_arr[(8*id)+3]=id;

                                weight_arr[(8*id)+3]=fabs(p-p4);

                                dest_arr[(8*id)+3]=(((width)*(x))+(y-1));

                       

                       

                                r_src_arr[(8*id)+3]=src_arr[(8*id)+3].x;

                                r_weight_arr[(8*id)+3]=weight_arr[(8*id)+3].x;

                                r_dest_arr[(8*id)+3]=dest_arr[(8*id)+3].x;

                       

                          src_arr[(8*id)+4]=id;

                                weight_arr[(8*id)+4]=fabs(p-p5);

                                          dest_arr[(8*id)+4]=(((width)*(x))+(y+1));

                       

                       

                                          r_src_arr[(8*id)+4]=src_arr[(8*id)+4].x;

                                r_weight_arr[(8*id)+4]=weight_arr[(8*id)+4].x;

                                r_dest_arr[(8*id)+4]=dest_arr[(8*id)+4].x;

                       

                       

                       

                       

                       

                          src_arr[(8*id)+5]=id;

                                weight_arr[(8*id)+5]=fabs(p-p6);

                                dest_arr[(8*id)+5]=(((width)*(x+1))+(y-1));

                       

                       

                                r_src_arr[(8*id)+5]=src_arr[(8*id)+5].x;

                                r_weight_arr[(8*id)+5]=weight_arr[(8*id)+5].x;

                                r_dest_arr[(8*id)+5]=dest_arr[(8*id)+5].x;

                       

                       

                       

                                 src_arr[(8*id)+6]=id;

                                weight_arr[(8*id)+6]=fabs(p-p7);

                                          dest_arr[(8*id)+6]=((width*(x+1))+(y));

                       

                       

                                          r_src_arr[(8*id)+6]=src_arr[(8*id)+6].x;

                                r_weight_arr[(8*id)+6]=weight_arr[(8*id)+6].x;

                                r_dest_arr[(8*id)+6]=dest_arr[(8*id)+6].x;

                       

                         src_arr[(8*id)+7]=id;

                                weight_arr[(8*id)+7]=fabs(p-p8);

                                          dest_arr[(8*id)+7]=(((width)*(x+1))+(y+1));

                       

                       

                                          r_src_arr[(8*id)+7]=src_arr[(8*id)+7].x;

                                r_weight_arr[(8*id)+7]=weight_arr[(8*id)+7].x;

                                r_dest_arr[(8*id)+7]=dest_arr[(8*id)+7].x;

                       

                        temp.x=weight_arr[8*id].x;

                                for(i=((8*id)+1);i<((8*id)+8);i++)

                                  {

                                           if (weight_arr[i].x<temp.x)

                                            {

                                                      temp.x=weight_arr[i].x;

                                                min_weight_arr[id].x=temp.x;

                                                min_src_arr[id].x=src_arr[i].x;

                                                min_dest_arr[id].x=dest_arr[i].x;

                                            }

                                  }

                                if (temp.x==weight_arr[8*id].y)

                                 {

                                   min_src_arr[id].x=src_arr[8*id].x;

                                   min_weight_arr[id].x=temp.x;

                                    min_dest_arr[id].x=dest_arr[8*id].x;

                                 }

                      //R_min_dest_arr[id]=min_dest_arr[id].x;

                       

                            temp.y=weight_arr[8*id].y;

                                for(i=((8*id)+1);i<((8*id)+8);i++)

                                  {

                                           if (weight_arr[i].y<temp.y)

                                            {

                                                      temp.y=weight_arr[i].y;

                                                min_weight_arr[id].y=temp.y;

                                                min_src_arr[id].y=src_arr[i].y;

                                                min_dest_arr[id].y=dest_arr[i].y;

                                            }

                                  }

                                if (temp.y==weight_arr[8*id].y)

                                 {

                                   min_src_arr[id].y=src_arr[8*id].y;

                                   min_weight_arr[id].y=temp.y;

                                    min_dest_arr[id].y=dest_arr[8*id].y;

                                 }

                      //G_min_dest_arr[id]=min_dest_arr[id].y;

                       

                       

                       

                       

                                temp.z=weight_arr[8*id].z;

                                for(i=((8*id)+1);i<((8*id)+8);i++)

                                  {

                                           if (weight_arr[i].z<temp.z)

                                            {

                                                      temp.z=weight_arr[i].z;

                                                min_weight_arr[id].z=temp.z;

                                                min_src_arr[id].z=src_arr[i].z;

                                                min_dest_arr[id].z=dest_arr[i].z;

                                            }

                                  }

                                if (temp.z==weight_arr[8*id].z)

                                 {

                                   min_src_arr[id].z=src_arr[8*id].z;

                                   min_weight_arr[id].z=temp.z;

                                    min_dest_arr[id].z=dest_arr[8*id].z;

                                 }

                      //B_min_dest_arr[id]=min_dest_arr[id].z;

                       

                       

                       

                       

                      }

                        • Re: Initialize get_global_id(0) from 1 and not from 0
                          shreedhar_pawar

                          I have made the sizes of all the buffers (width-2) * (height-2) and have given x and y as get_global_id + 1, Even then I'm getting error in reading the buffers...! The Host code and kernel code are as below... Where exactly is the memory problem...?

                           

                          KERNEL CODE

                           

                          __kernel void gaussian_filter(__read_only image2d_t srcImg,

                                                               __write_only image2d_t dstImg,

                                                        sampler_t sampler,

                                                        int width, int height,

                                                                                                  __global float4* src_arr,    //8*width*height

                                                                                                  __global float4* dest_arr,   //8*width*height

                                                                                                  __global float4* weight_arr,  //8*width*heigh

                                                                                                   __global float4* min_src_arr,

                                                                                                  __global float4* min_dest_arr,__global float4* min_weight_arr,

                                                                                                 __global float* r_src_arr,    //8*width*height

                                                                                                  __global float* r_dest_arr,   //8*width*height

                                                                                                  __global float* r_weight_arr )

                                                      //width*height   */

                          {

                           

                           

                           

                                    float4 temp;

                                    uint i;

                           

                                    uint x= get_global_id(0);

                                     uint y=get_global_id(1);

                                    unsigned int id=((width*x)+y);

                           

                                    float4 p=(read_imagef(srcImg, sampler, (int2)(x, y)))*255;

                              float4 p1=(read_imagef(srcImg, sampler, (int2)(x-1, y-1)))*255;

                                    float4 p2=(read_imagef(srcImg, sampler, (int2)(x-1, y)))*255;

                                    float4 p3=(read_imagef(srcImg, sampler, (int2)(x-1, y+1)))*255;

                                    float4 p4=(read_imagef(srcImg, sampler, (int2)(x, y-1)))*255;

                                    float4 p5=(read_imagef(srcImg, sampler, (int2)(x, y+1)))*255;

                                    float4 p6=(read_imagef(srcImg, sampler, (int2)(x+1, y-1)))*255;

                                    float4 p7=(read_imagef(srcImg, sampler, (int2)(x+1, y)))*255;

                                    float4 p8=(read_imagef(srcImg, sampler, (int2)(x+1, y+1)))*255;

                           

                           

                                    src_arr[8*id]=id;

                                    weight_arr[8*id]=fabs(p-p1);

                                    dest_arr[8*id]=(((width)*(x-1))+(y-1));

                           

                           

                                    r_src_arr[8*id]=src_arr[8*id].x;

                                    r_weight_arr[8*id]=weight_arr[8*id].x;

                                    r_dest_arr[8*id]=dest_arr[8*id].x;

                           

                           

                                    src_arr[(8*id)+1]=id;

                                    weight_arr[(8*id)+1]=fabs(p-p2);

                                    dest_arr[(8*id)+1]=(((width)*(x-1))+(y));

                           

                           

                                    r_src_arr[(8*id)+1]=src_arr[(8*id)+1].x;

                                    r_weight_arr[(8*id)+1]=weight_arr[(8*id)+1].x;

                                    r_dest_arr[(8*id)+1]=dest_arr[(8*id)+1].x;

                           

                           

                              src_arr[(8*id)+2]=id;

                                    weight_arr[(8*id)+2]=fabs(p-p3);

                                    dest_arr[(8*id)+2]=(((width)*(x-1))+(y+1));

                           

                           

                                    r_src_arr[(8*id)+2]=src_arr[(8*id)+2].x;

                                    r_weight_arr[(8*id)+2]=weight_arr[(8*id)+2].x;

                                    r_dest_arr[(8*id)+2]=dest_arr[(8*id)+2].x;

                           

                              

                                    src_arr[(8*id)+3]=id;

                                    weight_arr[(8*id)+3]=fabs(p-p4);

                                    dest_arr[(8*id)+3]=(((width)*(x))+(y-1));

                           

                           

                                    r_src_arr[(8*id)+3]=src_arr[(8*id)+3].x;

                                    r_weight_arr[(8*id)+3]=weight_arr[(8*id)+3].x;

                                    r_dest_arr[(8*id)+3]=dest_arr[(8*id)+3].x;

                           

                           

                              src_arr[(8*id)+4]=id;

                                    weight_arr[(8*id)+4]=fabs(p-p5);

                                              dest_arr[(8*id)+4]=(((width)*(x))+(y+1));

                           

                           

                                              r_src_arr[(8*id)+4]=src_arr[(8*id)+4].x;

                                    r_weight_arr[(8*id)+4]=weight_arr[(8*id)+4].x;

                                    r_dest_arr[(8*id)+4]=dest_arr[(8*id)+4].x;

                           

                          src_arr[(8*id)+5]=id;

                                    weight_arr[(8*id)+5]=fabs(p-p6);

                                    dest_arr[(8*id)+5]=(((width)*(x+1))+(y-1));

                           

                           

                                    r_src_arr[(8*id)+5]=src_arr[(8*id)+5].x;

                                    r_weight_arr[(8*id)+5]=weight_arr[(8*id)+5].x;

                                    r_dest_arr[(8*id)+5]=dest_arr[(8*id)+5].x;

                           

                           

                           

                                     src_arr[(8*id)+6]=id;

                                    weight_arr[(8*id)+6]=fabs(p-p7);

                                              dest_arr[(8*id)+6]=((width*(x+1))+(y));

                           

                           

                                              r_src_arr[(8*id)+6]=src_arr[(8*id)+6].x;

                                    r_weight_arr[(8*id)+6]=weight_arr[(8*id)+6].x;

                                    r_dest_arr[(8*id)+6]=dest_arr[(8*id)+6].x;

                           

                            src_arr[(8*id)+7]=id;

                                    weight_arr[(8*id)+7]=fabs(p-p8);

                                              dest_arr[(8*id)+7]=(((width)*(x+1))+(y+1));

                           

                           

                                              r_src_arr[(8*id)+7]=src_arr[(8*id)+7].x;

                                    r_weight_arr[(8*id)+7]=weight_arr[(8*id)+7].x;

                                    r_dest_arr[(8*id)+7]=dest_arr[(8*id)+7].x;

                           

                                              temp.x=weight_arr[8*id].x;

                                    for(i=((8*id)+1);i<((8*id)+8);i++)

                                      {

                                               if (weight_arr[i].x<temp.x)

                                                {

                                                          temp.x=weight_arr[i].x;

                                                    min_weight_arr[id].x=temp.x;

                                                    min_src_arr[id].x=src_arr[i].x;

                                                    min_dest_arr[id].x=dest_arr[i].x;

                                                }

                                      }

                                    if (temp.x==weight_arr[8*id].y)

                                     {

                                       min_src_arr[id].x=src_arr[8*id].x;

                                       min_weight_arr[id].x=temp.x;

                                        min_dest_arr[id].x=dest_arr[8*id].x;

                                     }

                          //R_min_dest_arr[id]=min_dest_arr[id].x;

                           

                           

                           

                           

                                    temp.y=weight_arr[8*id].y;

                                    for(i=((8*id)+1);i<((8*id)+8);i++)

                                      {

                                               if (weight_arr[i].y<temp.y)

                                                {

                                                          temp.y=weight_arr[i].y;

                                                    min_weight_arr[id].y=temp.y;

                                                    min_src_arr[id].y=src_arr[i].y;

                                                    min_dest_arr[id].y=dest_arr[i].y;

                                                }

                                      }

                                    if (temp.y==weight_arr[8*id].y)

                                     {

                                       min_src_arr[id].y=src_arr[8*id].y;

                                       min_weight_arr[id].y=temp.y;

                                        min_dest_arr[id].y=dest_arr[8*id].y;

                                     }

                          //G_min_dest_arr[id]=min_dest_arr[id].y;

                           

                                temp.z=weight_arr[8*id].z;

                                    for(i=((8*id)+1);i<((8*id)+8);i++)

                                      {

                                               if (weight_arr[i].z<temp.z)

                                                {

                                                          temp.z=weight_arr[i].z;

                                                    min_weight_arr[id].z=temp.z;

                                                    min_src_arr[id].z=src_arr[i].z;

                                                    min_dest_arr[id].z=dest_arr[i].z;

                                                }

                                      }

                                    if (temp.z==weight_arr[8*id].z)

                                     {

                                       min_src_arr[id].z=src_arr[8*id].z;

                                       min_weight_arr[id].z=temp.z;

                                        min_dest_arr[id].z=dest_arr[8*id].z;

                                     }

                          //B_min_dest_arr[id]=min_dest_arr[id].z;

                          }

                           

                          HOST CODE

                           

                          #include <iostream>

                          #include <fstream>

                          #include <sstream>

                          #include <string.h>

                           

                           

                          #ifdef __APPLE__

                          #include <OpenCL/cl.h>

                          #else

                          #include <CL/cl.h>

                          #endif

                           

                           

                          #include "FreeImage.h"

                           

                           

                          ///

                          //  Create an OpenCL context on the first available platform using

                          //  either a GPU or CPU depending on what is available.

                          //

                          cl_context CreateContext()

                          {

                              cl_int errNum;

                              cl_uint numPlatforms;

                              cl_platform_id firstPlatformId;

                              cl_context context = NULL;

                           

                           

                              // First, select an OpenCL platform to run on.  For this example, we

                              // simply choose the first available platform.  Normally, you would

                              // query for all available platforms and select the most appropriate one.

                              errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);

                              if (errNum != CL_SUCCESS || numPlatforms <= 0)

                              {

                                  std::cerr << "Failed to find any OpenCL platforms." << std::endl;

                                  return NULL;

                              }

                           

                           

                              // Next, create an OpenCL context on the platform.  Attempt to

                              // create a GPU-based context, and if that fails, try to create

                              // a CPU-based context.

                              cl_context_properties contextProperties[] =

                              {

                                  CL_CONTEXT_PLATFORM,

                                  (cl_context_properties)firstPlatformId,

                                  0

                              };

                              context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,

                                                                NULL, NULL, &errNum);

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cout << "Could not create GPU context, trying CPU..." << std::endl;

                                  context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,

                                                                    NULL, NULL, &errNum);

                                  if (errNum != CL_SUCCESS)

                                  {

                                      std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;

                                      return NULL;

                                  }

                              }

                           

                           

                              return context;

                          }

                           

                           

                          ///

                          //  Create a command queue on the first device available on the

                          //  context

                          //

                          cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)

                          {

                              cl_int errNum;

                              cl_device_id *devices;

                              cl_command_queue commandQueue = NULL;

                              size_t deviceBufferSize = -1;

                           

                           

                              // First get the size of the devices buffer

                              errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";

                                  return NULL;

                              }

                           

                           

                              if (deviceBufferSize <= 0)

                              {

                                  std::cerr << "No devices available.";

                                  return NULL;

                              }

                           

                           

                              // Allocate memory for the devices buffer

                              devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];

                              errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Failed to get device IDs";

                                  return NULL;

                              }

                           

                           

                              // In this example, we just choose the first available device.  In a

                              // real program, you would likely use all available devices or choose

                              // the highest performance device based on OpenCL device queries

                              commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

                              if (commandQueue == NULL)

                              {

                                  std::cerr << "Failed to create commandQueue for device 0";

                                  return NULL;

                              }

                           

                           

                              *device = devices[0];

                              delete [] devices;

                              return commandQueue;

                          }

                           

                           

                          ///

                          //  Create an OpenCL program from the kernel source file

                          //

                          cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)

                          {

                              cl_int errNum;

                              cl_program program;

                           

                           

                              std::ifstream kernelFile(fileName, std::ios::in);

                              if (!kernelFile.is_open())

                              {

                                  std::cerr << "Failed to open file for reading: " << fileName << std::endl;

                                  return NULL;

                              }

                           

                           

                              std::ostringstream oss;

                              oss << kernelFile.rdbuf();

                           

                           

                              std::string srcStdStr = oss.str();

                              const char *srcStr = srcStdStr.c_str();

                              program = clCreateProgramWithSource(context, 1,

                                                                  (const char**)&srcStr,

                                                                  NULL, NULL);

                              if (program == NULL)

                              {

                                  std::cerr << "Failed to create CL program from source." << std::endl;

                                  return NULL;

                              }

                           

                           

                              errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

                              if (errNum != CL_SUCCESS)

                              {

                                  // Determine the reason for the error

                                  char buildLog[16384];

                                  clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,

                                                        sizeof(buildLog), buildLog, NULL);

                           

                           

                                  std::cerr << "Error in kernel: " << std::endl;

                                  std::cerr << buildLog;

                            system("pause");

                                  clReleaseProgram(program);

                                  return NULL;

                              }

                           

                           

                              return program;

                          }

                           

                           

                           

                           

                          ///

                          //  Cleanup any created OpenCL resources

                          //

                          void Cleanup(cl_context context, cl_command_queue commandQueue,

                                       cl_program program, cl_kernel kernel, cl_mem imageObjects[2],

                                       cl_sampler sampler, cl_mem memObject[4])

                          {

                              for (int i = 0; i < 2; i++)

                              {

                                  if (imageObjects[i] != 0)

                                      clReleaseMemObject(imageObjects[i]);

                              }

                              if (commandQueue != 0)

                                  clReleaseCommandQueue(commandQueue);

                           

                           

                              if (kernel != 0)

                                  clReleaseKernel(kernel);

                           

                           

                              if (program != 0)

                                  clReleaseProgram(program);

                           

                           

                              if (sampler != 0)

                                  clReleaseSampler(sampler);

                           

                           

                              if (context != 0)

                                  clReleaseContext(context);

                          for (int i = 0; i < 6; i++)

                          {

                            if(memObject[i]!=0)

                            clReleaseMemObject(memObject[i]);

                          }

                          }

                           

                           

                          ///

                          //  Load an image using the FreeImage library and create an OpenCL

                          //  image out of it

                          //

                          cl_mem LoadImage(cl_context context, char *fileName, int &width, int &height)

                          {

                              FREE_IMAGE_FORMAT format = FreeImage_GetFileType(fileName, 0);

                              FIBITMAP* image = FreeImage_Load(format, fileName);

                           

                           

                              // Convert to 32-bit image

                              FIBITMAP* temp = image;

                              image = FreeImage_ConvertTo32Bits(image);

                              FreeImage_Unload(temp);

                           

                           

                              width = FreeImage_GetWidth(image);

                              height = FreeImage_GetHeight(image);

                           

                           

                              float *buffer = new float[width * height * 4];

                              memcpy(buffer, FreeImage_GetBits(image), width * height * 4);

                           

                           

                              FreeImage_Unload(image);

                           

                           

                              // Create OpenCL image

                              cl_image_format clImageFormat;

                              clImageFormat.image_channel_order = CL_RGBA;

                              clImageFormat.image_channel_data_type = CL_UNORM_INT8;

                           

                           

                              cl_int errNum;

                              cl_mem clImage;

                              clImage = clCreateImage2D(context,

                                                      CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,

                                                      &clImageFormat,

                                                      width,

                                                      height,

                                                      0,

                                                      buffer,

                                                      &errNum);

                           

                           

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Error creating CL image object" << std::endl;

                                  return 0;

                              }

                           

                           

                              return clImage;

                          }

                           

                           

                          ///

                          //  Save an image using the FreeImage library

                          //

                          bool SaveImage(char *fileName, char *buffer, int width, int height)

                          {

                              FREE_IMAGE_FORMAT format = FreeImage_GetFIFFromFilename(fileName);

                              FIBITMAP *image = FreeImage_ConvertFromRawBits((BYTE*)buffer, width,

                                                  height, width * 4, 32,

                                                  0xFF000000, 0x00FF0000, 0x0000FF00);

                              return (FreeImage_Save(format, image, fileName) == TRUE) ? true : false;

                          }

                           

                           

                          bool CreateMemObjects(cl_command_queue commandQueue,cl_context context, cl_mem memObject[],int width, int height)

                          {

                           

                             memObject[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*4*8, NULL, NULL) ;

                             memObject[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*4*8, NULL, NULL);

                              memObject[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*4*8, NULL, NULL);

                               memObject[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*4, NULL, NULL);

                             memObject[4] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*4, NULL, NULL) ;

                             memObject[5] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*4, NULL, NULL);

                           

                           

                              memObject[6] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*8, NULL, NULL) ;

                             memObject[7] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*8, NULL, NULL);

                              memObject[8] = clCreateBuffer(context, CL_MEM_READ_WRITE,

                                                             sizeof(float) * (width-2)*(height-2)*8, NULL, NULL);

                           

                           

                           

                            

                            

                            

                           

                           

                              if (memObject[0] == NULL || memObject[1] == NULL || memObject[2] == NULL ||memObject[3] == NULL ||

                            memObject[4] == NULL || memObject[5] == NULL )

                              {

                                  std::cerr << "Error creating memory objects." << std::endl;

                                  return false;

                              }

                           

                           

                              return true;

                          }

                           

                           

                          ///

                          //  Round up to the nearest multiple of the group size

                          //

                          size_t RoundUp(int groupSize, int globalSize)

                          {

                              int r = globalSize % groupSize;

                              if(r == 0)

                              {

                                  return globalSize;

                              }

                              else

                              {

                                  return globalSize + groupSize - r;

                              }

                          }

                           

                           

                           

                          int main(int argc, char** argv)

                          {

                              cl_context context = 0;

                              cl_command_queue commandQueue = 0;

                              cl_program program = 0;

                              cl_device_id device = 0;

                              cl_kernel kernel = 0;

                              cl_mem imageObjects[2] = { 0, 0 };

                            cl_mem memObject[11]= {0,0,0,0,0,0,0,0,0,0,0};

                              cl_sampler sampler = 0;

                              cl_int errNum;

                           

                           

                          //     Create an OpenCL context on first available platform

                              context = CreateContext();

                              if (context == NULL)

                              {

                                  std::cerr << "Failed to create OpenCL context." << std::endl;

                                  return 1;

                              }

                           

                           

                              // Create a command-queue on the first device available

                              // on the created context

                              commandQueue = CreateCommandQueue(context, &device);

                              if (commandQueue == NULL)

                              {

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                              // Make sure the device supports images, otherwise exit

                              cl_bool imageSupport = CL_FALSE;

                              clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),

                                              &imageSupport, NULL);

                              if (imageSupport != CL_TRUE)

                              {

                                  std::cerr << "OpenCL device does not support images." << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                              // Load input image from file and load it into

                              // an OpenCL image object

                              int width, height;

                           

                              imageObjects[0] = LoadImage(context,"C:\\Users\\Shreedhar\\Documents\\Visual Studio 2010\\Projects\\ImageFilter3\\lena.bmp", width, height);

                              if (imageObjects[0] == 0)

                              {

                                  std::cerr << "Error loading: " << std::string("lena") << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  system("Pause");

                            return 1;

                              }

                           

                               size_t origin[3] = { 0, 0, 0 };

                              size_t region[3] = { width, height, 1};

                          if (!CreateMemObjects(commandQueue,context, memObject, width, height))

                              {

                                    Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                              // Create ouput image object

                              cl_image_format clImageFormat;

                              clImageFormat.image_channel_order = CL_RGBA;

                              clImageFormat.image_channel_data_type = CL_UNORM_INT8;

                              imageObjects[1] = clCreateImage2D(context,

                                                                 CL_MEM_WRITE_ONLY,

                                                                 &clImageFormat,

                                                                 width,

                                                                 height,

                                                                 0,

                                                                 NULL,

                                                                 &errNum);

                           

                           

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Error creating CL output image object." << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                           

                           

                              // Create sampler for sampling image object

                              sampler = clCreateSampler(context,

                                                        CL_FALSE, // Non-normalized coordinates

                                                        CL_ADDRESS_CLAMP_TO_EDGE,

                                                        CL_FILTER_NEAREST,

                                                        &errNum);

                           

                           

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Error creating CL sampler object." << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                           

                           

                          // Create OpenCL program

                              program = CreateProgram(context, device, "ImageSegment.cl");

                              if (program == NULL)

                              {

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                              // Create OpenCL kernel

                              kernel = clCreateKernel(program, "gaussian_filter", NULL);

                              if (kernel == NULL)

                              {

                                  std::cerr << "Failed to create kernel" << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                              // Set the kernel arguments

                              errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imageObjects[0]);

                              errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &imageObjects[1]);

                              errNum |= clSetKernelArg(kernel, 2, sizeof(cl_sampler), &sampler);

                              errNum |= clSetKernelArg(kernel, 3, sizeof(cl_int), &width);

                              errNum |= clSetKernelArg(kernel, 4, sizeof(cl_int), &height);

                            errNum |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &memObject[0]);

                            errNum |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &memObject[1]);

                            errNum |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &memObject[2]);

                            errNum |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &memObject[3]);

                            errNum |= clSetKernelArg(kernel, 9, sizeof(cl_mem), &memObject[4]);

                            errNum |= clSetKernelArg(kernel, 10, sizeof(cl_mem), &memObject[5]);

                            errNum |= clSetKernelArg(kernel, 11, sizeof(cl_mem), &memObject[6]);

                            errNum |= clSetKernelArg(kernel, 12, sizeof(cl_mem), &memObject[7]);

                            errNum |= clSetKernelArg(kernel, 13, sizeof(cl_mem), &memObject[8]);

                           

                           

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Error setting kernel arguments." << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                              size_t localWorkSize[2] = { 10, 17 };

                              size_t globalWorkSize[2] =  { RoundUp(localWorkSize[0], width),

                                                            RoundUp(localWorkSize[1], height) };

                           

                           

                              // Queue the kernel up for execution

                              errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL,

                                                              globalWorkSize, localWorkSize,

                                                              0, NULL, NULL);

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Error queuing kernel for execution." << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                           

                           

                              // Read the output buffer back to the Host

                            float * min_src_arr=  new float [(width-2) * (height-2) * 4];

                              char *buffer = new char [(width) * (height) * 4];

                            float *min_weight_arr = new float[(width-2)*(height-2)*4];

                            float *min_dest_arr = new float[(width-2)*(height-2)*4];

                            float *weight_arr = new float[(width-2)*(height-2)*4*8];

                            float *src_arr = new float[(width-2)*(height-2)*4*8];

                            float *dest_arr = new float[(width-2)*(height-2)*4*8];

                          float *r_weight_arr = new float[(width-2)*(height-2)*8];

                            float *r_src_arr = new float[(width-2)*(height-2)*8];

                            float *r_dest_arr = new float[(width-2)*(height-2)*8];

                           

                           

                           

                           

                            // char *result;

                            // result= (char*)malloc(sizeof(char)*4*width*height);

                            //     size_t origin[3] = { 0, 0, 0 };

                             // size_t region[3] = { width, height, 1};

                           

                           

                           

                           

                           

                           

                           

                           

                           

                           

                           

                           

                          /*    errNum = clEnqueueReadImage(commandQueue, imageObjects[1], CL_TRUE,

                                                          origin, region, 0, 0, buffer,

                                                          0, NULL, NULL);

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Error reading result buffer." << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                          */

                           

                           

                            // Read the output min arrays back to the Host

                          /*errNum = clEnqueueReadBuffer(commandQueue, memObject[2], CL_TRUE,

                                                           0,  8*4*(width-2)*(height-2) * sizeof(float), weight_arr,

                                                           0, NULL, NULL);

                          errNum = clEnqueueReadBuffer(commandQueue, memObject[1], CL_TRUE,

                                                           0,  8*4*(width-2)*(height-2) * sizeof(float), dest_arr,

                                                           0, NULL, NULL);

                          errNum = clEnqueueReadBuffer(commandQueue, memObject[0], CL_TRUE,

                                                           0,  8*4*(width-2)*(height-2) * sizeof(float), src_arr,

                                                           0, NULL, NULL);*/

                              errNum = clEnqueueReadBuffer(commandQueue, memObject[3], CL_TRUE,

                                                           0,  4*(width-2)*(height-2) * sizeof(float), min_src_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[4], CL_TRUE,

                                                           0,  4*(width-2)*(height-2) * sizeof(float), min_dest_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[5], CL_TRUE,

                                                           0,  4*(width-2)*(height-2) * sizeof(float), min_weight_arr,

                                                           0, NULL, NULL);

                           

                           

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[6], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), r_src_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[7], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), r_dest_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[8], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), r_weight_arr,

                                                           0, NULL, NULL);

                           

                           

                          /* errNum = clEnqueueReadBuffer(commandQueue, memObject[6], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), g_src_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[7], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), g_dest_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[8], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), g_weight_arr,

                                                           0, NULL, NULL);

                           

                           

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[6], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), b_src_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[7], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), b_dest_arr,

                                                           0, NULL, NULL);

                            errNum = clEnqueueReadBuffer(commandQueue, memObject[8], CL_TRUE,

                                                           0,  8*(width-2)*(height-2) * sizeof(float), b_weight_arr,

                                                           0, NULL, NULL);

                          */

                           

                              if (errNum != CL_SUCCESS)

                              {

                                  std::cerr << "Error reading result buffer." << std::endl;

                                  Cleanup(context, commandQueue, program, kernel, imageObjects, sampler,memObject);

                                  return 1;

                              }

                            system("pause");

                              // Output the result buffer

                           

                           

                          /* for ( unsigned int i = 0; i < 64; i)

                                {

                                  printf("%d=%f\n",i,r_weight_arr[i]);

                                }

                            printf("\n\n");

                            */

                             for ( unsigned int i = 0; i < 64; i++)

                                {

                                  printf("%d=%f\n",i,r_src_arr[i]);

                           

                                }

                            printf("\n\n");

                           

                           

                            for ( unsigned int i = 4104; i < 4200; i++)

                                {

                                  printf("%d=%f\n",i,r_dest_arr[i]);

                                }

                            printf("\n\n");

                           

                           

                           

                           

                          /*  for ( unsigned int i = 0; i < 64; i=i+4)

                                {

                                  printf("%d=%0.0f\n",i,min_weight_arr[i]);

                                }

                            printf("\n\n");

                           

                           

                           

                           

                           

                           

                            

                            

                            for ( unsigned int i = 0; i < 16; i=i+4)

                                {

                                  printf("%d=%f\n",i,min_src_arr[i]);

                                }

                            printf("\n\n");

                           

                           

                           

                            

                           

                           

                              // Output the result buffer

                            

                            for ( unsigned int i = 0; i <  16; i+i+4)

                                {

                                  printf("%d=%f\n",i,min_dest_arr[i]);

                                }

                            printf("\n\n");

                           

                           

                           

                           

                                std::cout << std::endl;

                              std::cout << "Executed program succesfully." << std::endl;

                            system("pause");

                          }

                            • Re: Initialize get_global_id(0) from 1 and not from 0
                              himanshu.gautam

                              Hi shreedhar,

                              I tried running your code on my linux machine and it ran ok, without any modifications(except fixing the unterminated comment at the end of host code ). Shouldn't I be expecting some kind of segmentation fault??

                              Also I request you to attach code as zip files, specially when it is this long.

                              If the issue is still there, can you try disabling parts of your code, and try to figure out which particular buffer's reads are giving segmentation fault issue. Hope you can do some investigation, and post the results.