cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

aisesal
Journeyman III

SKA problem! uav_raw_load_id(11)_cached_aligned

Hello,

There seems to be a bug with SKA or something else. I get "Error: IL Text Translator(line=86): parse error near _a " for all kernels that load float4 data from global memory. I've looked up the line that prevents successful compilation and it reads: "uav_raw_load_id(11)_cached_aligned". It seems that "_aligned" part is that causes the error. For example:

__kernel void main(

          __global const float4 *buf_in,

          __global float *buf_out)

{

          uint global_id = get_global_id(0);

          buf_out[global_id] = length(buf_in[global_id]);

}

produces this:

Error: IL Text Translator(line=86): parse error near _a 

mdef(16383)_out(1)_in(2)

mov r0, in0

mov r1, in1

div_zeroop(infinity) r0.x___, r0.x, r1.x

mov out0, r0

mend

il_cs_2_0

dcl_cb cb0[15] ; Constant buffer that holds ABI data

dcl_literal l0, 0x00000004, 0x00000001, 0x00000002, 0x00000003

dcl_literal l1, 0x00FFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD

dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE, 0x000000FF, 0xFFFFFFFC

dcl_literal l3, 0x00000018, 0x00000010, 0x00000008, 0xFFFFFFFF

dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF

dcl_literal l5, 0x00000000, 0x00000004, 0x00000008, 0x0000000C

dcl_literal l6, 0x00000020, 0x00000020, 0x00000020, 0x00000020

dcl_literal l7, 0x00000018, 0x0000001F, 0x00000010, 0x0000001F

dcl_literal l8, 0x80000000, 0x80000000, 0x80000000, 0x80000000

call 1024;$

endmain

func 1024 ; __OpenCL_main_kernel

mov r1013, cb0[8].x

mov r1019, l1.0000

dcl_max_thread_per_group 256

dcl_raw_uav_id(11)

dcl_arena_uav_id(8)

mov r0.__z_, vThreadGrpIdFlat0.x

mov r1022.xyz0, vTidInGrp0.xyz

mov r1023.xyz0, vThreadGrpId0.xyz

imad r1021.xyz0, r1023.xyzz, cb0[1].xyzz, r1022.xyzz

iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0

iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0

mov r1023.___w, r0.z

ishl r1023.___w, r1023.w, l0.z

mov r1018.x___, l0.0000

udiv r1024.xyz_, r1021.xyzz, cb0[10].xyzz

imad r1025.xyz0, r1023.xyzz, cb0[10].xyzz, r1022.xyzz

dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f32:i32 0

dcl_literal l18, 0x00000002, 0x00000002, 0x00000002, 0x00000002; f32:i32 2

dcl_literal l11, 0x00000004, 0x00000004, 0x00000004, 0x00000004; f32:i32 4

dcl_literal l22, 0x00000007, 0x00000007, 0x00000007, 0x00000007; f32:i32 7

dcl_literal l20, 0x00000008, 0x00000008, 0x00000008, 0x00000008; f32:i32 8

dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010; f32:i32 16

dcl_literal l23, 0x00000020, 0x00000020, 0x00000020, 0x00000020; f32:i32 32

dcl_literal l12, 0x00800000, 0x00800000, 0x00800000, 0x00800000; f32:i32 8388608

dcl_literal l17, 0x14800000, 0x14800000, 0x14800000, 0x14800000; f32:i32 343932928

dcl_literal l14, 0x1E800000, 0x1E800000, 0x1E800000, 0x1E800000; f32:i32 511705088

dcl_literal l15, 0x60800000, 0x60800000, 0x60800000, 0x60800000; f32:i32 1619001344

dcl_literal l16, 0x6A800000, 0x6A800000, 0x6A800000, 0x6A800000; f32:i32 1786773504

dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000; f32:i32 2139095040

dcl_cb cb1[2]

; Kernel arg setup: buf_in

mov r1.x, cb1[0].x

; Kernel arg setup: buf_out

mov r1.y, cb1[1].x

dcl_cb cb2[3]

call 1029 ; main

ret

endfunc ; __OpenCL_main_kernel

;ARGSTART:__OpenCL_main_kernel

;version:3:1:104

;device:barts

;uniqueid:1024

;memory:hwprivate:0

;memory:hwregion:0

;memory:hwlocal:0

;pointer:buf_in:float:1:1:0:uav:11:16:RO:0:0

;constarg:0:buf_in

;pointer:buf_out:float:1:1:16:uav:11:4:RW:0:0

;memory:datareqd

;function:1:1029

;uavid:11

;privateid:1

;reflection:0:float4*

;reflection:1:float*

;ARGEND:__OpenCL_main_kernel

func 1029 ; main                        ; @__OpenCL_main_kernel

; BB#0:                                 ; %entry

          mov r65.x___, r1.y

          mov r65._y__, r1.x

          mov r66, r1021.xyz0

          mov r65.__z_, r66.00x0

          mov r65.___w, l11

          ishl r65.___w, r65.z, r65.w

          iadd r65.___w, r65.y, r65.w

          mov r1010.x___, r65.w

          uav_raw_load_id(11)_cached_aligned r1011, r1010

          mov r66, r1011

          dp4 r65.___w, r66, r66

          mov r67.x___, l12

          ge r67._y__, r65.w, r67.x

          ne r67.x___, r67.x, r67.x

          ne r67.__z_, r65.w, r65.w

          ior r67.x___, r67.y, r67.x

          ior r67.x___, r67.z, r67.x

          if_logicalnz r67.x

          mov r65._y__, l13

          eq r65._y__, r65.w, r65.y

          if_logicalnz r65.y

          mov r65.___w, l14

          mov r67, r65.w

          mul_ieee r66, r66, r67

          dp4 r65.___w, r66, r66

          sqrt_vec r65.___w, r65.w

          mov r65._y__, l15

          mul_ieee r65.___w, r65.w, r65.y

          else

          sqrt_vec r65.___w, r65.w

          endif

          else

          mov r65.___w, l16

          mov r67, r65.w

          mul_ieee r66, r66, r67

          dp4 r65.___w, r66, r66

          sqrt_vec r65.___w, r65.w

          mov r65._y__, l17

          mul_ieee r65.___w, r65.w, r65.y

          endif

          mov r65._y__, l18

          ishl r65._y__, r65.z, r65.y

          iadd r65.x___, r65.x, r65.y

          mov r1011, r65.w

          mov r1010.x___, r65.x

          uav_raw_store_id(11) mem0.x___, r1010.x, r1011.x

          ret

endfunc ; main

;ARGSTART:main

;uniqueid:1029

;memory:datareqd

;ARGEND:main

end

I'm using 1.11 version of SKA. I have 12.4 Catalyst drivers and HD 6850 1GB video card. I've tried 1.10 version of SKA but it didn't help. Note that outside of SKA, everything is fine, kernel compiles with no problems. I've tried to dump IL by adding enviroment variable GPU_DUMP_DEVICE_KERNEL=3. The instruction "uav_raw_load_id(11)_cached_aligned" is still there, but it doesn't cause compilation error, and ISA file is produced as well.

Does anyone have any advice.

0 Likes
1 Solution
rouellet
Staff

I have checked your kernel against the soon-to-be-announced V1.12 update to AMD APP KernelAnalyzer.

That version does not have this problem.

It is possible that the CAL run time which comes with the Catalyst drivers is incompatible with the analysis modules used when "use Installed Driver" is not selected.

View solution in original post

0 Likes
5 Replies
clamport
Journeyman III

The parse error is the same as what I am getting.

0 Likes

Can you try whether the problem persists with the live driver mode of the tool?

Here are the steps to use that mode:
1.       Start the KernelAnalyzer tool
2.       Go to Edit->Options
3.       Under the Compiler and Statistics Options tab, change the CAL version drop down menu to “Use Installed Driver”.
4.       Close the Options panel.

0 Likes

I've tried it out, and it does indeed works. But now I don't get statistics of a compiled kernel.

0 Likes

Hi,

I'm getting the same error when I read a double from global memory. When I change it to float and it works fine.

I did the "Use Installed Driver" suggestion, and KernelAnalyzer is able to compile now except with no gpu stats.

The original problem still exists -- my own program still crashes when I try to compile the kernel at runtime.

thanks for your help.

0 Likes
rouellet
Staff

I have checked your kernel against the soon-to-be-announced V1.12 update to AMD APP KernelAnalyzer.

That version does not have this problem.

It is possible that the CAL run time which comes with the Catalyst drivers is incompatible with the analysis modules used when "use Installed Driver" is not selected.

0 Likes