AnsweredAssumed Answered

SKA problem! uav_raw_load_id(11)_cached_aligned

Question asked by aisesal@gmail.com on Apr 9, 2012
Latest reply on Apr 17, 2012 by rouellet

Hello,

 

There seems to be a bug with SKA or something else. I get "Error: IL Text Translator(line=86): parse error near _a " for all kernels that load float4 data from global memory. I've looked up the line that prevents successful compilation and it reads: "uav_raw_load_id(11)_cached_aligned". It seems that "_aligned" part is that causes the error. For example:

 

__kernel void main(
          __global const float4 *buf_in,
          __global float *buf_out)
{
          uint global_id = get_global_id(0);
          buf_out[global_id] = length(buf_in[global_id]);
}

 

produces this:

 

Error: IL Text Translator(line=86): parse error near _a  
mdef(16383)_out(1)_in(2)
mov r0, in0
mov r1, in1
div_zeroop(infinity) r0.x___, r0.x, r1.x
mov out0, r0
mend
il_cs_2_0
dcl_cb cb0[15] ; Constant buffer that holds ABI data
dcl_literal l0, 0x00000004, 0x00000001, 0x00000002, 0x00000003
dcl_literal l1, 0x00FFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD
dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE, 0x000000FF, 0xFFFFFFFC
dcl_literal l3, 0x00000018, 0x00000010, 0x00000008, 0xFFFFFFFF
dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF
dcl_literal l5, 0x00000000, 0x00000004, 0x00000008, 0x0000000C
dcl_literal l6, 0x00000020, 0x00000020, 0x00000020, 0x00000020
dcl_literal l7, 0x00000018, 0x0000001F, 0x00000010, 0x0000001F
dcl_literal l8, 0x80000000, 0x80000000, 0x80000000, 0x80000000
call 1024;$
endmain
func 1024 ; __OpenCL_main_kernel
mov r1013, cb0[8].x
mov r1019, l1.0000
dcl_max_thread_per_group 256 
dcl_raw_uav_id(11)
dcl_arena_uav_id(8)
mov r0.__z_, vThreadGrpIdFlat0.x
mov r1022.xyz0, vTidInGrp0.xyz
mov r1023.xyz0, vThreadGrpId0.xyz
imad r1021.xyz0, r1023.xyzz, cb0[1].xyzz, r1022.xyzz
iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0
iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0
mov r1023.___w, r0.z
ishl r1023.___w, r1023.w, l0.z
mov r1018.x___, l0.0000
udiv r1024.xyz_, r1021.xyzz, cb0[10].xyzz
imad r1025.xyz0, r1023.xyzz, cb0[10].xyzz, r1022.xyzz
dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f32:i32 0
dcl_literal l18, 0x00000002, 0x00000002, 0x00000002, 0x00000002; f32:i32 2
dcl_literal l11, 0x00000004, 0x00000004, 0x00000004, 0x00000004; f32:i32 4
dcl_literal l22, 0x00000007, 0x00000007, 0x00000007, 0x00000007; f32:i32 7
dcl_literal l20, 0x00000008, 0x00000008, 0x00000008, 0x00000008; f32:i32 8
dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010; f32:i32 16
dcl_literal l23, 0x00000020, 0x00000020, 0x00000020, 0x00000020; f32:i32 32
dcl_literal l12, 0x00800000, 0x00800000, 0x00800000, 0x00800000; f32:i32 8388608
dcl_literal l17, 0x14800000, 0x14800000, 0x14800000, 0x14800000; f32:i32 343932928
dcl_literal l14, 0x1E800000, 0x1E800000, 0x1E800000, 0x1E800000; f32:i32 511705088
dcl_literal l15, 0x60800000, 0x60800000, 0x60800000, 0x60800000; f32:i32 1619001344
dcl_literal l16, 0x6A800000, 0x6A800000, 0x6A800000, 0x6A800000; f32:i32 1786773504
dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000; f32:i32 2139095040
dcl_cb cb1[2]
; Kernel arg setup: buf_in
mov r1.x, cb1[0].x
; Kernel arg setup: buf_out
mov r1.y, cb1[1].x
dcl_cb cb2[3]
call 1029 ; main
ret
endfunc ; __OpenCL_main_kernel
;ARGSTART:__OpenCL_main_kernel
;version:3:1:104
;device:barts
;uniqueid:1024
;memory:hwprivate:0
;memory:hwregion:0
;memory:hwlocal:0
;pointer:buf_in:float:1:1:0:uav:11:16:RO:0:0
;constarg:0:buf_in
;pointer:buf_out:float:1:1:16:uav:11:4:RW:0:0
;memory:datareqd
;function:1:1029
;uavid:11
;privateid:1
;reflection:0:float4*
;reflection:1:float*
;ARGEND:__OpenCL_main_kernel
func 1029 ; main                        ; @__OpenCL_main_kernel
; BB#0:                                 ; %entry
          mov r65.x___, r1.y
          mov r65._y__, r1.x
          mov r66, r1021.xyz0
          mov r65.__z_, r66.00x0
          mov r65.___w, l11
          ishl r65.___w, r65.z, r65.w
          iadd r65.___w, r65.y, r65.w
          mov r1010.x___, r65.w
          uav_raw_load_id(11)_cached_aligned r1011, r1010
          mov r66, r1011
          dp4 r65.___w, r66, r66
          mov r67.x___, l12
          ge r67._y__, r65.w, r67.x
          ne r67.x___, r67.x, r67.x
          ne r67.__z_, r65.w, r65.w
          ior r67.x___, r67.y, r67.x
          ior r67.x___, r67.z, r67.x
          if_logicalnz r67.x
          mov r65._y__, l13
          eq r65._y__, r65.w, r65.y
          if_logicalnz r65.y
          mov r65.___w, l14
          mov r67, r65.w
          mul_ieee r66, r66, r67
          dp4 r65.___w, r66, r66
          sqrt_vec r65.___w, r65.w
          mov r65._y__, l15
          mul_ieee r65.___w, r65.w, r65.y
          else
          sqrt_vec r65.___w, r65.w
          endif
          else
          mov r65.___w, l16
          mov r67, r65.w
          mul_ieee r66, r66, r67
          dp4 r65.___w, r66, r66
          sqrt_vec r65.___w, r65.w
          mov r65._y__, l17
          mul_ieee r65.___w, r65.w, r65.y
          endif
          mov r65._y__, l18
          ishl r65._y__, r65.z, r65.y
          iadd r65.x___, r65.x, r65.y
          mov r1011, r65.w
          mov r1010.x___, r65.x
          uav_raw_store_id(11) mem0.x___, r1010.x, r1011.x
          ret
endfunc ; main
;ARGSTART:main
;uniqueid:1029
;memory:datareqd
;ARGEND:main
end

 

I'm using 1.11 version of SKA. I have 12.4 Catalyst drivers and HD 6850 1GB video card. I've tried 1.10 version of SKA but it didn't help. Note that outside of SKA, everything is fine, kernel compiles with no problems. I've tried to dump IL by adding enviroment variable GPU_DUMP_DEVICE_KERNEL=3. The instruction "uav_raw_load_id(11)_cached_aligned" is still there, but it doesn't cause compilation error, and ISA file is produced as well.

 

Does anyone have any advice.

Outcomes