IL compiler bug? (CAL CL 1.1 & 1.2)

Discussion created by on Sep 15, 2008
Latest reply on Sep 16, 2008 by
For the following IL code:

dcl_input_interp(linear) v0.xy__
dcl_output_generic o0.x___
; l0 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000
mov r0.__zw, l0
; l1 = (2.000000f 4.000000f 0.000000f 0.000000f)
dcl_literal l1, 0x40000000, 0x40800000, 0x00000000, 0x00000000
; l2 = (-0.500000f -1.500000f 0.000000f 0.000000f)
dcl_literal l2, 0xBF000000, 0xBFC00000, 0x00000000, 0x00000000
mad_ieee r1.xy__, v0.xyxx, l1, l2
sample_resource(0)_sampler(0) r1, r1.xyxx
; l3 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l3, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF
and r2, r1.zxyw, l3
; l4 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l4, 0x00000010, 0x00000010, 0x00000010, 0x00000010
ishr r1, r1.xwyz, l4
mov r0.x___, r2.y
mov r0._y__, r1.x
load_id(1) r0, r0
mov r3.x___, r2.z
mov r3._y__, r1.z
; l5 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000
mov r3.__zw, l5
load_id(1) r3, r3
add r0.x___, r0.x, r3.x
mov r1.x___, r2.w
mov r2._y__, r1.w
; l6 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000
mov r2.__zw, l6
load_id(1) r2, r2
add r0.x___, r0.x, r2.x
; l7 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x00000000
mov r1.__zw, l7
load_id(1) r1, r1
add r0.x___, r0.x, r1.x
ftou o0.x___, r0.x

Notice the bold line ishr r1, r1.xwyz, l4

The calclCompile() function (both version 1.1 and 1.2) produces the following ASM (for RV670 target, XP64):

;PS; -------- Disassembly --------------------
00 ALU: ADDR(32) CNT(7)
0 x: MULADD_e R0.x, R0.x, (0x40000000, 2.0f).x, -0.5
y: MULADD_e R0.y, R0.y, (0x40800000, 4.0f).z, (0xBFC00000, -1.5f).y
z: MOV R2.z, 0.0f
w: MOV R1.w, 0.0f
t: MOV R3.w, 0.0f
1 SAMPLE R0, R0.xyxx, t0, s0 UNNORM(XYZW)
02 ALU: ADDR(39) CNT(17)
2 x: AND_INT R3.x, R0.z, (0x0000FFFF, 9.183409486e-41f).x
y: AND_INT ____, R0.x, (0x0000FFFF, 9.183409486e-41f).x
z: AND_INT ____, R0.w, (0x0000FFFF, 9.183409486e-41f).x
w: AND_INT T0.w, R0.y, (0x0000FFFF, 9.183409486e-41f).x
t: ASHR R4.y, R0.w, (0x00000010, 2.242077543e-44f).y
3 x: MOV R1.x, PV2.y
z: MOV R4.z, PV2.z
w: MOV R4.w, 0.0f
t: ASHR T1.w, PS2, (0x00000010, 2.242077543e-44f).x
4 x: MOV R2.x, T0.w
y: MOV R2.y, PS3
t: ASHR R1.y, R0.x, (0x00000010, 2.242077543e-44f).x
5 t: ASHR R3.y, T1.w, (0x00000010, 2.242077543e-44f).x
6 LD R1.x___, R1.xyxw, t1, s0
7 LD R2.x___, R2.xyxz, t1, s0
8 LD R3.x___, R3.xyxw, t1, s0
9 LD R0.x___, R4.zyzw, t1, s0
04 ALU: ADDR(56) CNT(4)
10 z: ADD ____, R1.x, R2.x
11 z: ADD ____, PV10.z, R3.x
12 y: ADD ____, PV11.z, R0.x
13 t: F_TO_U R0.x, PV12.y
05 EXP_DONE: PIX0, R0.x___

Notice the 4 bold lines in the ASM output, which do not correspond to the IL code of ishr r1, r1.xwyz, l4. It is easy to spot the error: that the 2nd ASHR uses the 1st ASHR's output as input, and the 4th ASHR uses the 2nd ASHR's output as input.