5 Replies Latest reply on Apr 17, 2012 3:07 PM by rouellet

    SKA problem! uav_raw_load_id(11)_cached_aligned




      There seems to be a bug with SKA or something else. I get "Error: IL Text Translator(line=86): parse error near _a " for all kernels that load float4 data from global memory. I've looked up the line that prevents successful compilation and it reads: "uav_raw_load_id(11)_cached_aligned". It seems that "_aligned" part is that causes the error. For example:


      __kernel void main(
                __global const float4 *buf_in,
                __global float *buf_out)
                uint global_id = get_global_id(0);
                buf_out[global_id] = length(buf_in[global_id]);


      produces this:


      Error: IL Text Translator(line=86): parse error near _a  
      mov r0, in0
      mov r1, in1
      div_zeroop(infinity) r0.x___, r0.x, r1.x
      mov out0, r0
      dcl_cb cb0[15] ; Constant buffer that holds ABI data
      dcl_literal l0, 0x00000004, 0x00000001, 0x00000002, 0x00000003
      dcl_literal l1, 0x00FFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD
      dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE, 0x000000FF, 0xFFFFFFFC
      dcl_literal l3, 0x00000018, 0x00000010, 0x00000008, 0xFFFFFFFF
      dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF
      dcl_literal l5, 0x00000000, 0x00000004, 0x00000008, 0x0000000C
      dcl_literal l6, 0x00000020, 0x00000020, 0x00000020, 0x00000020
      dcl_literal l7, 0x00000018, 0x0000001F, 0x00000010, 0x0000001F
      dcl_literal l8, 0x80000000, 0x80000000, 0x80000000, 0x80000000
      call 1024;$
      func 1024 ; __OpenCL_main_kernel
      mov r1013, cb0[8].x
      mov r1019, l1.0000
      dcl_max_thread_per_group 256 
      mov r0.__z_, vThreadGrpIdFlat0.x
      mov r1022.xyz0, vTidInGrp0.xyz
      mov r1023.xyz0, vThreadGrpId0.xyz
      imad r1021.xyz0, r1023.xyzz, cb0[1].xyzz, r1022.xyzz
      iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0
      iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0
      mov r1023.___w, r0.z
      ishl r1023.___w, r1023.w, l0.z
      mov r1018.x___, l0.0000
      udiv r1024.xyz_, r1021.xyzz, cb0[10].xyzz
      imad r1025.xyz0, r1023.xyzz, cb0[10].xyzz, r1022.xyzz
      dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f32:i32 0
      dcl_literal l18, 0x00000002, 0x00000002, 0x00000002, 0x00000002; f32:i32 2
      dcl_literal l11, 0x00000004, 0x00000004, 0x00000004, 0x00000004; f32:i32 4
      dcl_literal l22, 0x00000007, 0x00000007, 0x00000007, 0x00000007; f32:i32 7
      dcl_literal l20, 0x00000008, 0x00000008, 0x00000008, 0x00000008; f32:i32 8
      dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010; f32:i32 16
      dcl_literal l23, 0x00000020, 0x00000020, 0x00000020, 0x00000020; f32:i32 32
      dcl_literal l12, 0x00800000, 0x00800000, 0x00800000, 0x00800000; f32:i32 8388608
      dcl_literal l17, 0x14800000, 0x14800000, 0x14800000, 0x14800000; f32:i32 343932928
      dcl_literal l14, 0x1E800000, 0x1E800000, 0x1E800000, 0x1E800000; f32:i32 511705088
      dcl_literal l15, 0x60800000, 0x60800000, 0x60800000, 0x60800000; f32:i32 1619001344
      dcl_literal l16, 0x6A800000, 0x6A800000, 0x6A800000, 0x6A800000; f32:i32 1786773504
      dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000; f32:i32 2139095040
      dcl_cb cb1[2]
      ; Kernel arg setup: buf_in
      mov r1.x, cb1[0].x
      ; Kernel arg setup: buf_out
      mov r1.y, cb1[1].x
      dcl_cb cb2[3]
      call 1029 ; main
      endfunc ; __OpenCL_main_kernel
      func 1029 ; main                        ; @__OpenCL_main_kernel
      ; BB#0:                                 ; %entry
                mov r65.x___, r1.y
                mov r65._y__, r1.x
                mov r66, r1021.xyz0
                mov r65.__z_, r66.00x0
                mov r65.___w, l11
                ishl r65.___w, r65.z, r65.w
                iadd r65.___w, r65.y, r65.w
                mov r1010.x___, r65.w
                uav_raw_load_id(11)_cached_aligned r1011, r1010
                mov r66, r1011
                dp4 r65.___w, r66, r66
                mov r67.x___, l12
                ge r67._y__, r65.w, r67.x
                ne r67.x___, r67.x, r67.x
                ne r67.__z_, r65.w, r65.w
                ior r67.x___, r67.y, r67.x
                ior r67.x___, r67.z, r67.x
                if_logicalnz r67.x
                mov r65._y__, l13
                eq r65._y__, r65.w, r65.y
                if_logicalnz r65.y
                mov r65.___w, l14
                mov r67, r65.w
                mul_ieee r66, r66, r67
                dp4 r65.___w, r66, r66
                sqrt_vec r65.___w, r65.w
                mov r65._y__, l15
                mul_ieee r65.___w, r65.w, r65.y
                sqrt_vec r65.___w, r65.w
                mov r65.___w, l16
                mov r67, r65.w
                mul_ieee r66, r66, r67
                dp4 r65.___w, r66, r66
                sqrt_vec r65.___w, r65.w
                mov r65._y__, l17
                mul_ieee r65.___w, r65.w, r65.y
                mov r65._y__, l18
                ishl r65._y__, r65.z, r65.y
                iadd r65.x___, r65.x, r65.y
                mov r1011, r65.w
                mov r1010.x___, r65.x
                uav_raw_store_id(11) mem0.x___, r1010.x, r1011.x
      endfunc ; main


      I'm using 1.11 version of SKA. I have 12.4 Catalyst drivers and HD 6850 1GB video card. I've tried 1.10 version of SKA but it didn't help. Note that outside of SKA, everything is fine, kernel compiles with no problems. I've tried to dump IL by adding enviroment variable GPU_DUMP_DEVICE_KERNEL=3. The instruction "uav_raw_load_id(11)_cached_aligned" is still there, but it doesn't cause compilation error, and ISA file is produced as well.


      Does anyone have any advice.