3 Replies Latest reply on Dec 22, 2014 9:49 AM by dipak

    vector-add program using SVM produces segement fault.

    yy1990cn

      I've written a simple vector add program using SVM in APPSDK 3.0, but it reports segement falt. the following is the code,  i kown it is long. but it's a simple opencl program, I've try my best to figure out what's wrong, but no help

       

      #include <stdio.h>
      #include <string.h>
      #include <stdlib.h>
      #include <stdbool.h>
      
      
      // OpenCL includes
      #include <CL/cl.h>
      
      
      // OpenCL kernel to perform an element-wise
      // add of two arrays
      const char* programSource =
      "__kernel                                            \n"
      "void vecadd(__global int *A,                        \n"
      "            __global int *B,                        \n"
      "            __global int *C)                        \n"
      "{                                                   \n"
      "                                                    \n"
      "   // Get the work-item’s unique ID                 \n"
      "   int idx = get_global_id(0);                      \n"
      "                                                    \n"
      "   // Add the corresponding locations of            \n"
      "   // 'A' and 'B', and store the result in 'C'.     \n"
      "   C[idx] = A[idx] + B[idx];                        \n"
      "}                                                   \n"
      ;
      
      
      int main() {
          // Elements in each array
          const int elements = 2048;
      
      
          // Compute the size of the data
          size_t datasize = sizeof(int)*elements;
          // Use this to check the output of each API call
          cl_int status;
      
      
          // Discover and initialize the platforms
          cl_uint numPlatforms = 0;
          cl_platform_id *platforms = NULL;
      
      
          // Use clGetPlatformIDs() to retrieve the number of
          // platforms
          status = clGetPlatformIDs(0, NULL, &numPlatforms);
      
      
          // Allocate enough space for each platform
          platforms =
                  (cl_platform_id*)malloc(
                      numPlatforms*sizeof(cl_platform_id));
      
      
          // Fill in platforms with clGetPlatformIDs()
          status = clGetPlatformIDs(numPlatforms, platforms,
                                    NULL);
      
      
          // STEP 2: Discover and initialize the devices
          cl_uint numDevices = 0;
          cl_device_id *devices = NULL;
      
      
          // Use clGetDeviceIDs() to retrieve the number of
          // devices present
          status = clGetDeviceIDs(
              platforms[0],
              CL_DEVICE_TYPE_ALL,
              0,
              NULL,
              &numDevices);
      
      
          // Allocate enough space for each device
          devices =
                  (cl_device_id*)malloc(
                      numDevices*sizeof(cl_device_id));
      
      
          // Fill in devices with clGetDeviceIDs()
          status = clGetDeviceIDs(
              platforms[0],
              CL_DEVICE_TYPE_ALL,
              numDevices,
              devices,
              NULL);
      
      
          // Create a context
          cl_context context = NULL;
      
      
          // Create a context using clCreateContext() and
          // associate it with the devices
          context = clCreateContext(
              NULL,
              numDevices,
              devices,
              NULL,
              NULL,
              &status);
      
      
          // Create a command queue
          cl_command_queue cmdQueue;
      
      
          // Create a command queue using clCreateCommandQueue(),
          // and associate it with the device you want to execute
          // on
          cl_queue_properties prop[] = {0};
          cmdQueue = clCreateCommandQueueWithProperties(
              context,
              devices[0],
              prop,
              &status);
      
      
          // Create SVM buffers
          void *bufferA = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
          void *bufferB = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
          void *bufferC = clSVMAlloc(context, CL_MEM_READ_WRITE, datasize, 0);
          if (bufferA == NULL || bufferB==NULL || bufferC==NULL) {
              fprintf(stderr, "can't create SVM buffers\n");
              exit(-1);
          }
          /* initialize bufferA and bufferB */
          status = clEnqueueSVMMap(cmdQueue,
                                   CL_TRUE, //blocking call
                                   CL_MAP_WRITE_INVALIDATE_REGION,
                                   bufferA,
                                   datasize,
                                   0,
                                   NULL,
                                   NULL);
          int *A = (int *)(bufferA);
          for (int i = 0; i < elements; i++) {
              A[i] = i;
          }
          status = clEnqueueSVMUnmap(cmdQueue, bufferA, 0, NULL, NULL);
          status = clEnqueueSVMMap(cmdQueue,
                                   CL_TRUE, //blocking call
                                   CL_MAP_WRITE_INVALIDATE_REGION,
                                   bufferB,
                                   datasize,
                                   0,
                                   NULL,
                                   NULL);
          int *B = (int *)(bufferB);
          for (int i = 0; i < elements; i++) {
              B[i] = i;
          }
          status = clEnqueueSVMUnmap(cmdQueue, bufferB, 0, NULL, NULL);
          // Create and compile the program
          cl_program program = clCreateProgramWithSource(
              context,
              1,
              (const char**)&programSource,
              NULL,
              &status);
      
      
          // Build (compile) the program for the devices with
          // clBuildProgram()
          status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
      
      
          // Create the kernel
           cl_kernel kernel = clCreateKernel(program, "vecadd", &status);
      
      
          // Set the kernel arguments
          status = clSetKernelArgSVMPointer(kernel, 0, (void *)(bufferA));
          status |= clSetKernelArgSVMPointer(kernel, 1, (void *)(bufferB));
          status |= clSetKernelArgSVMPointer(kernel, 2, (void *)(bufferC));
      
      
          size_t globalWorkSize[1];
          // There are 'elements' work-items
          globalWorkSize[0] = elements;
      
      
          status = clEnqueueNDRangeKernel(
              cmdQueue,
              kernel,
              1,
              NULL,
              globalWorkSize,
              NULL,
              0,
              NULL,
              NULL);
          clFinish(cmdQueue);
          status = clEnqueueSVMMap(cmdQueue,
                                   CL_TRUE, //blocking call
                                   CL_MAP_WRITE_INVALIDATE_REGION,
                                   bufferC,
                                   datasize,
                                   0,
                                   NULL,
                                   NULL);
          // Verify the output
          bool result = true;
          int *C = (int *)(bufferC);
          for(int i = 0; i < elements; i++) {
              if(C[i] != i+i) {
                  result = false;
                  break;
              }
          }
          if(result) {
              fprintf(stderr, "Output is correct\n");
          } else {
              fprintf(stderr, "Output is incorrect\n");
          }
      
      
          status = clEnqueueSVMUnmap(cmdQueue, bufferC, 0, NULL, NULL);
      
      
          // Release OpenCL resources
          clSVMFree(context, bufferA);
          clSVMFree(context, bufferB);
          clSVMFree(context, bufferC);
      
      
          clReleaseKernel(kernel);
          clReleaseProgram(program);
          clReleaseCommandQueue(cmdQueue);
          clReleaseContext(context);
      
      
          // Free host resources
          free(platforms);
          free(devices);
      }
      

      a very simple program, just add the elements in bufferA and bufferB, then store the result to bufferC, but when I run this program, i got a segement fault. I really don't kown why. anyone can help me? thx.