Hello,
There seems to be a bug with SKA or something else. I get "Error: IL Text Translator(line=86): parse error near _a " for all kernels that load float4 data from global memory. I've looked up the line that prevents successful compilation and it reads: "uav_raw_load_id(11)_cached_aligned". It seems that "_aligned" part is that causes the error. For example:
__kernel void main(
__global const float4 *buf_in,
__global float *buf_out)
{
uint global_id = get_global_id(0);
buf_out[global_id] = length(buf_in[global_id]);
}
produces this:
Error: IL Text Translator(line=86): parse error near _a
mdef(16383)_out(1)_in(2)
mov r0, in0
mov r1, in1
div_zeroop(infinity) r0.x___, r0.x, r1.x
mov out0, r0
mend
il_cs_2_0
dcl_cb cb0[15] ; Constant buffer that holds ABI data
dcl_literal l0, 0x00000004, 0x00000001, 0x00000002, 0x00000003
dcl_literal l1, 0x00FFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD
dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE, 0x000000FF, 0xFFFFFFFC
dcl_literal l3, 0x00000018, 0x00000010, 0x00000008, 0xFFFFFFFF
dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF
dcl_literal l5, 0x00000000, 0x00000004, 0x00000008, 0x0000000C
dcl_literal l6, 0x00000020, 0x00000020, 0x00000020, 0x00000020
dcl_literal l7, 0x00000018, 0x0000001F, 0x00000010, 0x0000001F
dcl_literal l8, 0x80000000, 0x80000000, 0x80000000, 0x80000000
call 1024;$
endmain
func 1024 ; __OpenCL_main_kernel
mov r1013, cb0[8].x
mov r1019, l1.0000
dcl_max_thread_per_group 256
dcl_raw_uav_id(11)
dcl_arena_uav_id(8)
mov r0.__z_, vThreadGrpIdFlat0.x
mov r1022.xyz0, vTidInGrp0.xyz
mov r1023.xyz0, vThreadGrpId0.xyz
imad r1021.xyz0, r1023.xyzz, cb0[1].xyzz, r1022.xyzz
iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0
iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0
mov r1023.___w, r0.z
ishl r1023.___w, r1023.w, l0.z
mov r1018.x___, l0.0000
udiv r1024.xyz_, r1021.xyzz, cb0[10].xyzz
imad r1025.xyz0, r1023.xyzz, cb0[10].xyzz, r1022.xyzz
dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f32:i32 0
dcl_literal l18, 0x00000002, 0x00000002, 0x00000002, 0x00000002; f32:i32 2
dcl_literal l11, 0x00000004, 0x00000004, 0x00000004, 0x00000004; f32:i32 4
dcl_literal l22, 0x00000007, 0x00000007, 0x00000007, 0x00000007; f32:i32 7
dcl_literal l20, 0x00000008, 0x00000008, 0x00000008, 0x00000008; f32:i32 8
dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010; f32:i32 16
dcl_literal l23, 0x00000020, 0x00000020, 0x00000020, 0x00000020; f32:i32 32
dcl_literal l12, 0x00800000, 0x00800000, 0x00800000, 0x00800000; f32:i32 8388608
dcl_literal l17, 0x14800000, 0x14800000, 0x14800000, 0x14800000; f32:i32 343932928
dcl_literal l14, 0x1E800000, 0x1E800000, 0x1E800000, 0x1E800000; f32:i32 511705088
dcl_literal l15, 0x60800000, 0x60800000, 0x60800000, 0x60800000; f32:i32 1619001344
dcl_literal l16, 0x6A800000, 0x6A800000, 0x6A800000, 0x6A800000; f32:i32 1786773504
dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000; f32:i32 2139095040
dcl_cb cb1[2]
; Kernel arg setup: buf_in
mov r1.x, cb1[0].x
; Kernel arg setup: buf_out
mov r1.y, cb1[1].x
dcl_cb cb2[3]
call 1029 ; main
ret
endfunc ; __OpenCL_main_kernel
;ARGSTART:__OpenCL_main_kernel
;version:3:1:104
;device:barts
;uniqueid:1024
;memory:hwprivate:0
;memory:hwregion:0
;memory:hwlocal:0
;pointer:buf_in:float:1:1:0:uav:11:16:RO:0:0
;constarg:0:buf_in
;pointer:buf_out:float:1:1:16:uav:11:4:RW:0:0
;memory:datareqd
;function:1:1029
;uavid:11
;privateid:1
;reflection:0:float4*
;reflection:1:float*
;ARGEND:__OpenCL_main_kernel
func 1029 ; main ; @__OpenCL_main_kernel
; BB#0: ; %entry
mov r65.x___, r1.y
mov r65._y__, r1.x
mov r66, r1021.xyz0
mov r65.__z_, r66.00x0
mov r65.___w, l11
ishl r65.___w, r65.z, r65.w
iadd r65.___w, r65.y, r65.w
mov r1010.x___, r65.w
uav_raw_load_id(11)_cached_aligned r1011, r1010
mov r66, r1011
dp4 r65.___w, r66, r66
mov r67.x___, l12
ge r67._y__, r65.w, r67.x
ne r67.x___, r67.x, r67.x
ne r67.__z_, r65.w, r65.w
ior r67.x___, r67.y, r67.x
ior r67.x___, r67.z, r67.x
if_logicalnz r67.x
mov r65._y__, l13
eq r65._y__, r65.w, r65.y
if_logicalnz r65.y
mov r65.___w, l14
mov r67, r65.w
mul_ieee r66, r66, r67
dp4 r65.___w, r66, r66
sqrt_vec r65.___w, r65.w
mov r65._y__, l15
mul_ieee r65.___w, r65.w, r65.y
else
sqrt_vec r65.___w, r65.w
endif
else
mov r65.___w, l16
mov r67, r65.w
mul_ieee r66, r66, r67
dp4 r65.___w, r66, r66
sqrt_vec r65.___w, r65.w
mov r65._y__, l17
mul_ieee r65.___w, r65.w, r65.y
endif
mov r65._y__, l18
ishl r65._y__, r65.z, r65.y
iadd r65.x___, r65.x, r65.y
mov r1011, r65.w
mov r1010.x___, r65.x
uav_raw_store_id(11) mem0.x___, r1010.x, r1011.x
ret
endfunc ; main
;ARGSTART:main
;uniqueid:1029
;memory:datareqd
;ARGEND:main
end
I'm using 1.11 version of SKA. I have 12.4 Catalyst drivers and HD 6850 1GB video card. I've tried 1.10 version of SKA but it didn't help. Note that outside of SKA, everything is fine, kernel compiles with no problems. I've tried to dump IL by adding enviroment variable GPU_DUMP_DEVICE_KERNEL=3. The instruction "uav_raw_load_id(11)_cached_aligned" is still there, but it doesn't cause compilation error, and ISA file is produced as well.
Does anyone have any advice.
Solved! Go to Solution.
I have checked your kernel against the soon-to-be-announced V1.12 update to AMD APP KernelAnalyzer.
That version does not have this problem.
It is possible that the CAL run time which comes with the Catalyst drivers is incompatible with the analysis modules used when "use Installed Driver" is not selected.
The parse error is the same as what I am getting.
Can you try whether the problem persists with the live driver mode of the tool?
Here are the steps to use that mode:
1. Start the KernelAnalyzer tool
2. Go to Edit->Options
3. Under the Compiler and Statistics Options tab, change the CAL version drop down menu to “Use Installed Driver”.
4. Close the Options panel.
I've tried it out, and it does indeed works. But now I don't get statistics of a compiled kernel.
Hi,
I'm getting the same error when I read a double from global memory. When I change it to float and it works fine.
I did the "Use Installed Driver" suggestion, and KernelAnalyzer is able to compile now except with no gpu stats.
The original problem still exists -- my own program still crashes when I try to compile the kernel at runtime.
thanks for your help.
I have checked your kernel against the soon-to-be-announced V1.12 update to AMD APP KernelAnalyzer.
That version does not have this problem.
It is possible that the CAL run time which comes with the Catalyst drivers is incompatible with the analysis modules used when "use Installed Driver" is not selected.