18 Replies Latest reply on Jul 16, 2015 5:43 AM by dipak

    clEnqueueWriteBufferRect does not work when region width is not equal to src pitch: broken again in Catalyst 14.12

    timchist

      I have already reported this problem in an earlier post: http://devgurus.amd.com/thread/160312 and the error has been fixed in Catalyst 13.4.

      However, the error seems to be reintroduced in Catalyst 14.12.

      The error can be reproduced on HD 7970 and R9 280X, but not on R9 290.

      This has been working correctly in Catalyst 14.9.

       

      I'm reposting the source code of a minimal sample demonstrating the error below:

      ---

       

      #include <stdio.h>

      #include <stdlib.h>

      #include <string.h>

       

      #include <CL/cl.h>

       

      //------------------------------------------------------------------------------

      void checkErr(char *func, cl_int err)

      {

          if(err != CL_SUCCESS)

          {

              fprintf( stderr, "%s(): ", func );

              switch( err )

              {

                  case CL_BUILD_PROGRAM_FAILURE:  fprintf (stderr, "CL_BUILD_PROGRAM_FAILURE"); break;

                  case CL_COMPILER_NOT_AVAILABLE: fprintf (stderr, "CL_COMPILER_NOT_AVAILABLE"); break;

                  case CL_DEVICE_NOT_AVAILABLE:   fprintf (stderr, "CL_DEVICE_NOT_AVAILABLE"); break;

                  case CL_DEVICE_NOT_FOUND:       fprintf (stderr, "CL_DEVICE_NOT_FOUND"); break;

                  case CL_INVALID_BINARY:         fprintf (stderr, "CL_INVALID_BINARY"); break;

                  case CL_INVALID_BUILD_OPTIONS:  fprintf (stderr, "CL_INVALID_BUILD_OPTIONS"); break;

                  case CL_INVALID_CONTEXT:        fprintf (stderr, "CL_INVALID_CONTEXT"); break;

                  case CL_INVALID_DEVICE:         fprintf (stderr, "CL_INVALID_DEVICE"); break;

                  case CL_INVALID_DEVICE_TYPE:    fprintf (stderr, "CL_INVALID_DEVICE_TYPE"); break;

                  case CL_INVALID_OPERATION:      fprintf (stderr, "CL_INVALID_OPERATION"); break;

                  case CL_INVALID_PLATFORM:       fprintf (stderr, "CL_INVALID_PLATFORM"); break;

                  case CL_INVALID_PROGRAM:        fprintf (stderr, "CL_INVALID_PROGRAM"); break;

                  case CL_INVALID_VALUE:          fprintf (stderr, "CL_INVALID_VALUE"); break;

                  case CL_OUT_OF_HOST_MEMORY:     fprintf (stderr, "CL_OUT_OF_HOST_MEMORY"); break;

                  default:                        fprintf (stderr, "Unknown error code: %d", (int)err); break;

              }

              fprintf (stderr, "\n");

              getchar();

              exit( err );

          }

      }

       

      int main(void)

      {

          ///////////////////////////////////////////////////////////////////////////

          // Initialization

          ///////////////////////////////////////////////////////////////////////////

          int i = 0;

          cl_int err = CL_SUCCESS;

       

          cl_uint nPlatforms = 0;

          cl_platform_id *platforms = NULL;

          cl_platform_id platform = (cl_platform_id)NULL;

          cl_context_properties cprops[3];

          size_t nDevices = 0;

          cl_device_id *devices = NULL;

          size_t binary_size = 0;

          char * binary = NULL;

          cl_device_id device_id = 0;

          cl_context context;

          cl_command_queue queue, queue2;

       

          /* figure out the number of platforms on this system. */

          err = clGetPlatformIDs(0, NULL, &nPlatforms);

          checkErr( "clGetPlatformIDs", err );

          printf( "Number of platforms found: %d\n", nPlatforms );

          if( nPlatforms == 0 )

          {

              fprintf( stderr, "Cannot continue without any platforms. Exiting.\n" );

              return( -1 );

          }

          platforms = (cl_platform_id *)malloc( sizeof(cl_platform_id) * nPlatforms );

          err = clGetPlatformIDs( nPlatforms, platforms, NULL );

          checkErr( "clGetPlatformIDs", err );

       

          puts("Platforms:");

          for(cl_uint i = 0; i < nPlatforms; i++ )

          {

              char pbuf[100];

              err = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,

                                       sizeof(pbuf), pbuf, NULL );

              checkErr( "clGetPlatformInfo", err );

              printf("#%d: %s\n", i, pbuf);

          }

       

          /* find the AMD platform. */

          for(cl_uint i = 0; i < nPlatforms; i++ )

          {

              char pbuf[100];

              err = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,

                                       sizeof(pbuf), pbuf, NULL );

              checkErr( "clGetPlatformInfo", err );

              if( strcmp(pbuf, "Advanced Micro Devices, Inc.") == 0 )

              {

                  printf( "Found AMD platform\n\n" );

                  platform = platforms[i];

                  break;

              }

          }

          if( platform == (cl_context_properties)NULL )

          {

              fprintf( stderr, "Could not find an AMD platform. Exiting.\n" );

              return( -1 );

          }

       

          cprops[0] = CL_CONTEXT_PLATFORM;

          cprops[1] = (cl_context_properties)platform;

          cprops[2] = (cl_context_properties)NULL; /* end of options list marker */

       

          /* create a context with all of the available devices. */

          context = clCreateContextFromType( cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &err );

          checkErr( "clCreateContextFromType", err );

       

          /* get a device count for this context. */

          err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &nDevices );

          checkErr( "clGetContextInfo", err );

          nDevices = nDevices / sizeof(cl_device_id); /* need to generate actual device count from size of required buffer. */

          printf( "Number of devices found: %d\n", nDevices );

          devices = (cl_device_id *)malloc( sizeof(cl_device_id) * nDevices );

          if (nDevices == 0) {

              fprintf( stderr, "Could not find GPU devices. Exiting.\n" );

              return( -1 );

          }

       

          /* grab the handles to all of the devices in the context. */

          err = clGetContextInfo( context, CL_CONTEXT_DEVICES, sizeof(cl_device_id)*nDevices, devices, NULL );

          checkErr( "clGetContextInfo", err );

       

          device_id = devices[0];

       

          queue = clCreateCommandQueue(context, device_id, 0, &err);

          checkErr("clCreateCommandQueue", err);

       

       

          ///////////////////////////////////////////////////////////////////////////

          // The actual test

          ///////////////////////////////////////////////////////////////////////////

          const int FullImageWidth = 256;

          const int FullImageHeight = 256;

          const int PartialImageWidth = 16;

          const int PartialImageHeight = 16;

       

          unsigned char* hostFullImage = new unsigned char[FullImageWidth * FullImageHeight];

       

          for(int y = 0; y < FullImageHeight; ++y)

              for(int x = 0; x < FullImageWidth; ++x)

                  hostFullImage[y * FullImageWidth + x] = y * FullImageWidth + x;

       

          cl_mem deviceBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, PartialImageWidth * PartialImageHeight, NULL, &err);

          checkErr("clCreateBuffer", err);

       

          unsigned char pattern = 0;

          err = clEnqueueFillBuffer(queue, deviceBuffer, &pattern, 1, 0, PartialImageWidth * PartialImageHeight, 0, NULL, NULL);

          checkErr("clEnqueueFillBuffer", err);

        

          size_t bufferOrigin[3], hostOrigin[3], region[3];

          bufferOrigin[0] = 0;

          bufferOrigin[1] = 0;

          bufferOrigin[2] = 0;

          hostOrigin[0] = 0;

          hostOrigin[1] = 0;

          hostOrigin[2] = 0;

          region[0] = PartialImageWidth;

          region[1] = PartialImageHeight;

          region[2] = 1;

          err = clEnqueueWriteBufferRect(queue, deviceBuffer, CL_TRUE, bufferOrigin, hostOrigin, region,

                                         PartialImageWidth, 0, FullImageWidth, 0, hostFullImage, 0, NULL, NULL);

          checkErr("clEnqueueWriteBufferRect", err);

       

          unsigned char* hostPartialImage = new unsigned char[PartialImageWidth * PartialImageHeight];

          err = clEnqueueReadBuffer(queue, deviceBuffer, CL_TRUE, 0, PartialImageWidth * PartialImageHeight, hostPartialImage, 0, NULL, NULL);

          checkErr("clEnqueueReadBuffer", err);

       

          bool testPassed = true;

          for(int y = 0; y < PartialImageHeight; ++y)

          {

              for(int x = 0; x < PartialImageWidth; ++x)

                  if(hostFullImage[y * FullImageWidth + x] != hostPartialImage[y * PartialImageWidth + x])

                  {

                      testPassed = false;

                      break;

                  }

              if(!testPassed)

                  break;

          }

       

          if(testPassed)

              puts("Test passed, all OK");

          else

          {

              puts("Test failed.\n");

        

              puts("Expected:");

              for(int y = 0; y < PartialImageHeight; ++y)

              {

                  for(int x = 0; x < PartialImageWidth; ++x)

                      printf("%3d ", (int)hostFullImage[y * FullImageWidth + x]);

                  puts("");

              }

       

              puts("\nActual:");

       

              for(int y = 0; y < PartialImageHeight; ++y)

              {

                  for(int x = 0; x < PartialImageWidth; ++x)

                      printf("%3d ", (int)hostPartialImage[y * PartialImageWidth + x]);

                  puts("");

              }

          }

       

          ///////////////////////////////////////////////////////////////////////////

          // Clean-up

          ///////////////////////////////////////////////////////////////////////////

       

          err = clReleaseMemObject(deviceBuffer);

          checkErr("clReleaseMemObject", err);

       

          err = clReleaseCommandQueue(queue);

          checkErr("clReleaseCommandQueue", err);

       

          err = clReleaseContext(context);

          checkErr("clReleaseContext", err);

       

          delete hostFullImage;

          delete hostPartialImage;

       

          return 0;

      }