6 Replies Latest reply on Aug 16, 2018 6:44 AM by dipak

    OpenCL linker hangs & terminates application on R9 200

    george72

      After shipping our application, some users with AMD R9 200 series cards report the application hangs up and then quits. After studying log files and minidumps it seems the issue is the with OpenCL linker on those systems as they show the application is executing a clLinkProgram command at the time of the "hang".

      As we handle and log both C++ and structured exceptions and the log doesn't show any of these, our conclusion is the linker somehow terminates the entire application (we have seen this behavior before with Intel OpenCL drivers).

       

      The issue occurs on the following system:

       

      Microsoft Windows 10 Home, Version 1803, OS Build 17134, Current Version 6.3

      Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, 8130 MB RAM

      CL Info:

       

       

      (1.0)   CL_PLATFORM_NAME              : AMD Accelerated Parallel Processing
      (1.0)   CL_PLATFORM_VERSION           : OpenCL 2.1 AMD-APP (2580.6)
      (1.0)   CL_PLATFORM_PROFILE           : FULL_PROFILE
      (1.0)   CL_PLATFORM_VENDOR            : Advanced Micro Devices, Inc.
          CL_PLATFORM_EXTENSIONS:
              cl_khr_icd
              cl_khr_d3d10_sharing
              cl_khr_d3d11_sharing
              cl_khr_dx9_media_sharing
              cl_amd_event_callback
              cl_amd_offline_devices
            

      CL_DEVICE[0]

      (1.0)   CL_DEVICE_TYPE                                : GPU
      (1.0)   CL_DEVICE_NAME                                : Tahiti
      (1.0)   CL_DEVICE_VERSION                             : OpenCL 1.2 AMD-APP (2580.6)
      (1.0)   CL_DEVICE_PROFILE                             : FULL_PROFILE
      (1.0)   CL_DEVICE_VENDOR                              : Advanced Micro Devices, Inc.
      (1.0)   CL_DEVICE_VENDOR_ID                           : 4098
      (1.0)   CL_DRIVER_VERSION                             : 2580.6
              CL_DEVICE_EXTENSIONS:
                  cl_khr_fp64
                  cl_amd_fp64
                  cl_khr_global_int32_base_atomics
                  cl_khr_global_int32_extended_atomics
                  cl_khr_local_int32_base_atomics
                  cl_khr_local_int32_extended_atomics
                  cl_khr_int64_base_atomics
                  cl_khr_int64_extended_atomics
                  cl_khr_3d_image_writes
                  cl_khr_byte_addressable_store
                  cl_khr_gl_sharing
                  cl_amd_device_attribute_query
                  cl_amd_vec3
                  cl_amd_printf
                  cl_amd_media_ops
                  cl_amd_media_ops2
                  cl_amd_popcnt
                  cl_khr_d3d10_sharing
                  cl_khr_d3d11_sharing
                  cl_khr_dx9_media_sharing
                  cl_khr_image2d_from_buffer
                  cl_khr_spir
                  cl_khr_gl_event
                  cl_amd_liquid_flash
                
      (AMD)   CL_DEVICE_PROFILING_TIMER_OFFSET_AMD          : 1532807364555193883
      (AMD)   CL_DEVICE_TOPOLOGY_AMD
      (AMD)     type                                        : CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
      (AMD)     bus                                         : 1
      (AMD)     device                                      : 0
      (AMD)     function                                    : 0
      (AMD)   CL_DEVICE_BOARD_NAME_AMD                      : AMD Radeon R9 200 Series
      (AMD)   CL_DEVICE_GLOBAL_FREE_MEMORY_AMD              : 3095289
      (AMD)   CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD           : 4
      (AMD)   CL_DEVICE_SIMD_WIDTH_AMD                      : 16
      (AMD)   CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD          : 1
      (AMD)   CL_DEVICE_WAVEFRONT_WIDTH_AMD                 : 64
      (AMD)   CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD             : 12
      (AMD)   CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD        : 16
      (AMD)   CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD   : 256
      (AMD)   CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD : 65536
      (AMD)   CL_DEVICE_LOCAL_MEM_BANKS_AMD                 : 32
      (AMD)   CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD          : 0
      (AMD)   CL_DEVICE_GFXIP_MAJOR_AMD                     : 6
      (AMD)   CL_DEVICE_GFXIP_MINOR_AMD                     : 0
      (AMD)   CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD          : 2
      (AMD)   CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD       : 256
      (AMD)   CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD             : 1024
      (AMD)   CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD  : 16384
      (1.0)   CL_DEVICE_MAX_COMPUTE_UNITS                   : 28
      (1.0)   CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS            : 3
      (1.0)   CL_DEVICE_MAX_WORK_ITEM_SIZES                 : [1024, 1024, 1024]
      (1.0)   CL_DEVICE_MAX_WORK_GROUP_SIZE                 : 256
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR         : 4
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT        : 2
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT          : 1
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG         : 1
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT        : 1
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE       : 1
      (1.1)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF         : 1
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR            : 4
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT           : 2
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_INT             : 1
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG            : 1
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT           : 1
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE          : 1
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF            : 1
      (1.0)   CL_DEVICE_MAX_CLOCK_FREQUENCY                 : 940 MHz
      (1.0)   CL_DEVICE_ADDRESS_BITS                        : 32
      (1.0)   CL_DEVICE_MAX_MEM_ALLOC_SIZE                  : 2509871513 bytes
      (1.0)   CL_DEVICE_IMAGE_SUPPORT                       : 1
      (1.0)   CL_DEVICE_MAX_READ_IMAGE_ARGS                 : 128
      (1.0)   CL_DEVICE_MAX_WRITE_IMAGE_ARGS                : 8
      (1.0)   CL_DEVICE_IMAGE2D_MAX_WIDTH                   : 16384 pixels
      (1.0)   CL_DEVICE_IMAGE2D_MAX_HEIGHT                  : 16384 pixels
      (1.0)   CL_DEVICE_IMAGE3D_MAX_WIDTH                   : 2048 pixels
      (1.0)   CL_DEVICE_IMAGE3D_MAX_HEIGHT                  : 2048 pixels
      (1.0)   CL_DEVICE_IMAGE3D_MAX_DEPTH                   : 2048 pixels
      (1.2)   CL_DEVICE_IMAGE_MAX_BUFFER_SIZE               : 134217728 pixels
      (1.2)   CL_DEVICE_IMAGE_MAX_ARRAY_SIZE                : 2048
      (1.0)   CL_DEVICE_MAX_SAMPLERS                        : 16
      (1.0)   CL_DEVICE_MAX_PARAMETER_SIZE                  : 1024 bytes
      (1.0)   CL_DEVICE_MEM_BASE_ADDR_ALIGN                 : 2048 bits
      (1.0)   CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE            : 128 bytes
      (1.0)   CL_DEVICE_SINGLE_FP_CONFIG                    : CL_FP_INF_NAN CL_FP_ROUND_TO_NEAREST CL_FP_ROUND_TO_ZERO CL_FP_ROUND_TO_INF CL_FP_FMA CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
      (1.2)   CL_DEVICE_DOUBLE_FP_CONFIG                    : CL_FP_DENORM CL_FP_INF_NAN CL_FP_ROUND_TO_NEAREST CL_FP_ROUND_TO_ZERO CL_FP_ROUND_TO_INF CL_FP_FMA
      (1.0)   CL_DEVICE_GLOBAL_MEM_CACHE_TYPE               : CL_READ_WRITE_CACHE
      (1.0)   CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE           : 64 bytes
      (1.0)   CL_DEVICE_GLOBAL_MEM_CACHE_SIZE               : 16384 bytes
      (1.0)   CL_DEVICE_GLOBAL_MEM_SIZE                     : 3221225472 bytes
      (1.0)   CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE            : 65536 bytes
      (1.0)   CL_DEVICE_MAX_CONSTANT_ARGS                   : 8
      (1.0)   CL_DEVICE_LOCAL_MEM_TYPE                      : CL_LOCAL
      (1.0)   CL_DEVICE_LOCAL_MEM_SIZE                      : 32768 bytes
      (1.0)   CL_DEVICE_ERROR_CORRECTION_SUPPORT            : 0
      (1.1)   CL_DEVICE_HOST_UNIFIED_MEMORY                 : 0
      (1.0)   CL_DEVICE_PROFILING_TIMER_RESOLUTION          : 1 ns
      (1.0)   CL_DEVICE_ENDIAN_LITTLE                       : 1
      (1.0)   CL_DEVICE_AVAILABLE                           : 1
      (1.0)   CL_DEVICE_COMPILER_AVAILABLE                  : 1
      (1.2)   CL_DEVICE_LINKER_AVAILABLE                    : 1
      (1.0)   CL_DEVICE_EXECUTION_CAPABILITIES              : CL_EXEC_KERNEL
      (1.0)   CL_DEVICE_QUEUE_PROPERTIES                    : CL_QUEUE_PROFILING_ENABLE
      (1.1)   CL_DEVICE_OPENCL_C_VERSION                    : OpenCL C 1.2
      (1.2)   CL_DEVICE_PRINTF_BUFFER_SIZE                  : 4194304 bytes
      (1.2)   CL_DEVICE_PREFERRED_INTEROP_USER_SYNC         : 1
      (1.2)   CL_DEVICE_PARENT_DEVICE                       : none (device is root)
      (1.2)   CL_DEVICE_PARTITION_MAX_SUB_DEVICES           : 28
      (1.2)   CL_DEVICE_PARTITION_PROPERTIES                :
      (1.2)   CL_DEVICE_PARTITION_AFFINITY_DOMAIN           :
      (1.2)   CL_DEVICE_PARTITION_TYPE                      :
      (1.2)   CL_DEVICE_REFERENCE_COUNT                     : 1

       

      CL_DEVICE[1]

      (1.0)   CL_DEVICE_TYPE                                : CPU
      (1.0)   CL_DEVICE_NAME                                : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
      (1.0)   CL_DEVICE_VERSION                             : OpenCL 1.2 AMD-APP (2580.6)
      (1.0)   CL_DEVICE_PROFILE                             : FULL_PROFILE
      (1.0)   CL_DEVICE_VENDOR                              : GenuineIntel
      (1.0)   CL_DEVICE_VENDOR_ID                           : 4098
      (1.0)   CL_DRIVER_VERSION                             : 2580.6 (sse2,avx)
              CL_DEVICE_EXTENSIONS:
                  cl_khr_fp64
                  cl_amd_fp64
                  cl_khr_global_int32_base_atomics
                  cl_khr_global_int32_extended_atomics
                  cl_khr_local_int32_base_atomics
                  cl_khr_local_int32_extended_atomics
                  cl_khr_int64_base_atomics
                  cl_khr_int64_extended_atomics
                  cl_khr_3d_image_writes
                  cl_khr_byte_addressable_store
                  cl_khr_gl_sharing
                  cl_ext_device_fission
                  cl_amd_device_attribute_query
                  cl_amd_vec3
                  cl_amd_printf
                  cl_amd_media_ops
                  cl_amd_media_ops2
                  cl_amd_popcnt
                  cl_khr_d3d10_sharing
                  cl_khr_spir
                  cl_khr_gl_event
                
      (1.0)   CL_DEVICE_MAX_COMPUTE_UNITS                   : 8
      (1.0)   CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS            : 3
      (1.0)   CL_DEVICE_MAX_WORK_ITEM_SIZES                 : [1024, 1024, 1024]
      (1.0)   CL_DEVICE_MAX_WORK_GROUP_SIZE                 : 1024
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR         : 16
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT        : 8
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT          : 4
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG         : 2
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT        : 8
      (1.0)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE       : 4
      (1.1)   CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF         : 4
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR            : 16
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT           : 8
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_INT             : 4
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG            : 2
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT           : 8
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE          : 4
      (1.1)   CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF            : 4
      (1.0)   CL_DEVICE_MAX_CLOCK_FREQUENCY                 : 3592 MHz
      (1.0)   CL_DEVICE_ADDRESS_BITS                        : 64
      (1.0)   CL_DEVICE_MAX_MEM_ALLOC_SIZE                  : 2147483648 bytes
      (1.0)   CL_DEVICE_IMAGE_SUPPORT                       : 1
      (1.0)   CL_DEVICE_MAX_READ_IMAGE_ARGS                 : 128
      (1.0)   CL_DEVICE_MAX_WRITE_IMAGE_ARGS                : 64
      (1.0)   CL_DEVICE_IMAGE2D_MAX_WIDTH                   : 8192 pixels
      (1.0)   CL_DEVICE_IMAGE2D_MAX_HEIGHT                  : 8192 pixels
      (1.0)   CL_DEVICE_IMAGE3D_MAX_WIDTH                   : 2048 pixels
      (1.0)   CL_DEVICE_IMAGE3D_MAX_HEIGHT                  : 2048 pixels
      (1.0)   CL_DEVICE_IMAGE3D_MAX_DEPTH                   : 2048 pixels
      (1.2)   CL_DEVICE_IMAGE_MAX_BUFFER_SIZE               : 65536 pixels
      (1.2)   CL_DEVICE_IMAGE_MAX_ARRAY_SIZE                : 2048
      (1.0)   CL_DEVICE_MAX_SAMPLERS                        : 16
      (1.0)   CL_DEVICE_MAX_PARAMETER_SIZE                  : 4096 bytes
      (1.0)   CL_DEVICE_MEM_BASE_ADDR_ALIGN                 : 1024 bits
      (1.0)   CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE            : 128 bytes
      (1.0)   CL_DEVICE_SINGLE_FP_CONFIG                    : CL_FP_DENORM CL_FP_INF_NAN CL_FP_ROUND_TO_NEAREST CL_FP_ROUND_TO_ZERO CL_FP_ROUND_TO_INF CL_FP_FMA CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
      (1.2)   CL_DEVICE_DOUBLE_FP_CONFIG                    : CL_FP_DENORM CL_FP_INF_NAN CL_FP_ROUND_TO_NEAREST CL_FP_ROUND_TO_ZERO CL_FP_ROUND_TO_INF CL_FP_FMA
      (1.0)   CL_DEVICE_GLOBAL_MEM_CACHE_TYPE               : CL_READ_WRITE_CACHE
      (1.0)   CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE           : 64 bytes
      (1.0)   CL_DEVICE_GLOBAL_MEM_CACHE_SIZE               : 32768 bytes
      (1.0)   CL_DEVICE_GLOBAL_MEM_SIZE                     : 8525291520 bytes
      (1.0)   CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE            : 65536 bytes
      (1.0)   CL_DEVICE_MAX_CONSTANT_ARGS                   : 8
      (1.0)   CL_DEVICE_LOCAL_MEM_TYPE                      : CL_GLOBAL
      (1.0)   CL_DEVICE_LOCAL_MEM_SIZE                      : 32768 bytes
      (1.0)   CL_DEVICE_ERROR_CORRECTION_SUPPORT            : 0
      (1.1)   CL_DEVICE_HOST_UNIFIED_MEMORY                 : 1
      (1.0)   CL_DEVICE_PROFILING_TIMER_RESOLUTION          : 285 ns
      (1.0)   CL_DEVICE_ENDIAN_LITTLE                       : 1
      (1.0)   CL_DEVICE_AVAILABLE                           : 1
      (1.0)   CL_DEVICE_COMPILER_AVAILABLE                  : 1
      (1.2)   CL_DEVICE_LINKER_AVAILABLE                    : 1
      (1.0)   CL_DEVICE_EXECUTION_CAPABILITIES              : CL_EXEC_KERNEL CL_EXEC_NATIVE_KERNEL
      (1.0)   CL_DEVICE_QUEUE_PROPERTIES                    : CL_QUEUE_PROFILING_ENABLE
      (1.1)   CL_DEVICE_OPENCL_C_VERSION                    : OpenCL C 1.2
      (1.2)   CL_DEVICE_PRINTF_BUFFER_SIZE                  : 65536 bytes
      (1.2)   CL_DEVICE_PREFERRED_INTEROP_USER_SYNC         : 1
      (1.2)   CL_DEVICE_PARENT_DEVICE                       : none (device is root)
      (1.2)   CL_DEVICE_PARTITION_MAX_SUB_DEVICES           : 8
      (1.2)   CL_DEVICE_PARTITION_PROPERTIES                : CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN
      (1.2)   CL_DEVICE_PARTITION_AFFINITY_DOMAIN           : CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE
      (1.2)   CL_DEVICE_PARTITION_TYPE                      :
      (1.2)   CL_DEVICE_REFERENCE_COUNT                     : 1