2 Replies Latest reply on Sep 16, 2008 3:19 AM by bs_kl@yahoo.com

    IL compiler bug? (CAL CL 1.1 & 1.2)

    bs_kl@yahoo.com
      For the following IL code:

      il_ps_2_0
      dcl_input_interp(linear) v0.xy__
      dcl_output_generic o0.x___
      dcl_resource_id(0)_type(2d,unnorm)_fmtx(sint)_fmty(sint)_fmtz(sint)_fmtw(sint)
      dcl_resource_id(1)_type(2d)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)
      ; l0 = (0.000000f 0.000000f 0.000000f 0.000000f)
      dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000
      mov r0.__zw, l0
      ; l1 = (2.000000f 4.000000f 0.000000f 0.000000f)
      dcl_literal l1, 0x40000000, 0x40800000, 0x00000000, 0x00000000
      ; l2 = (-0.500000f -1.500000f 0.000000f 0.000000f)
      dcl_literal l2, 0xBF000000, 0xBFC00000, 0x00000000, 0x00000000
      mad_ieee r1.xy__, v0.xyxx, l1, l2
      sample_resource(0)_sampler(0) r1, r1.xyxx
      ; l3 = (0.000000f 0.000000f 0.000000f 0.000000f)
      dcl_literal l3, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF
      and r2, r1.zxyw, l3
      ; l4 = (0.000000f 0.000000f 0.000000f 0.000000f)
      dcl_literal l4, 0x00000010, 0x00000010, 0x00000010, 0x00000010
      ishr r1, r1.xwyz, l4
      mov r0.x___, r2.y
      mov r0._y__, r1.x
      load_id(1) r0, r0
      mov r3.x___, r2.z
      mov r3._y__, r1.z
      ; l5 = (0.000000f 0.000000f 0.000000f 0.000000f)
      dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000
      mov r3.__zw, l5
      load_id(1) r3, r3
      add r0.x___, r0.x, r3.x
      mov r1.x___, r2.w
      mov r2._y__, r1.w
      ; l6 = (0.000000f 0.000000f 0.000000f 0.000000f)
      dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000
      mov r2.__zw, l6
      load_id(1) r2, r2
      add r0.x___, r0.x, r2.x
      ; l7 = (0.000000f 0.000000f 0.000000f 0.000000f)
      dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x00000000
      mov r1.__zw, l7
      load_id(1) r1, r1
      add r0.x___, r0.x, r1.x
      ftou o0.x___, r0.x
      ret_dyn
      end


      Notice the bold line ishr r1, r1.xwyz, l4

      The calclCompile() function (both version 1.1 and 1.2) produces the following ASM (for RV670 target, XP64):


      ;PS; -------- Disassembly --------------------
      00 ALU: ADDR(32) CNT(7)
      0 x: MULADD_e R0.x, R0.x, (0x40000000, 2.0f).x, -0.5
      y: MULADD_e R0.y, R0.y, (0x40800000, 4.0f).z, (0xBFC00000, -1.5f).y
      z: MOV R2.z, 0.0f
      w: MOV R1.w, 0.0f
      t: MOV R3.w, 0.0f
      01 TEX: ADDR(64) CNT(1) VALID_PIX
      1 SAMPLE R0, R0.xyxx, t0, s0 UNNORM(XYZW)
      02 ALU: ADDR(39) CNT(17)
      2 x: AND_INT R3.x, R0.z, (0x0000FFFF, 9.183409486e-41f).x
      y: AND_INT ____, R0.x, (0x0000FFFF, 9.183409486e-41f).x
      z: AND_INT ____, R0.w, (0x0000FFFF, 9.183409486e-41f).x
      w: AND_INT T0.w, R0.y, (0x0000FFFF, 9.183409486e-41f).x
      t: ASHR R4.y, R0.w, (0x00000010, 2.242077543e-44f).y
      3 x: MOV R1.x, PV2.y
      z: MOV R4.z, PV2.z
      w: MOV R4.w, 0.0f
      t: ASHR T1.w, PS2, (0x00000010, 2.242077543e-44f).x
      4 x: MOV R2.x, T0.w
      y: MOV R2.y, PS3
      t: ASHR R1.y, R0.x, (0x00000010, 2.242077543e-44f).x
      5 t: ASHR R3.y, T1.w, (0x00000010, 2.242077543e-44f).x
      03 TEX: ADDR(66) CNT(4) VALID_PIX
      6 LD R1.x___, R1.xyxw, t1, s0
      7 LD R2.x___, R2.xyxz, t1, s0
      8 LD R3.x___, R3.xyxw, t1, s0
      9 LD R0.x___, R4.zyzw, t1, s0
      04 ALU: ADDR(56) CNT(4)
      10 z: ADD ____, R1.x, R2.x
      11 z: ADD ____, PV10.z, R3.x
      12 y: ADD ____, PV11.z, R0.x
      13 t: F_TO_U R0.x, PV12.y
      05 EXP_DONE: PIX0, R0.x___
      END_OF_PROGRAM


      Notice the 4 bold lines in the ASM output, which do not correspond to the IL code of ishr r1, r1.xwyz, l4. It is easy to spot the error: that the 2nd ASHR uses the 1st ASHR's output as input, and the 4th ASHR uses the 2nd ASHR's output as input.

        • IL compiler bug? (CAL CL 1.1 & 1.2)
          MicahVillmow
          bs_kl,
          Many of the operations in the IL code that is being generated are not supported by the CAL specific subset of IL, the load instruction is not legal. If you can generate a simpler test case without using these instructions, then I can verify and report the issue to the correct team to get fixed in a future release.

          Thanks,
          • IL compiler bug? (CAL CL 1.1 & 1.2)
            bs_kl@yahoo.com
            Hi Micah,

            I have modified the IL source code to not use the load instruction:

            il_ps_2_0
            dcl_input_interp(linear) v0.xy__
            dcl_output_generic o0.x___
            dcl_resource_id(0)_type(2d,unnorm)_fmtx(sint)_fmty(sint)_fmtz(sint)_fmtw(sint)
            dcl_resource_id(1)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)
            ; l0 = (2.000000f 4.000000f 0.000000f 0.000000f)
            dcl_literal l0, 0x40000000, 0x40800000, 0x00000000, 0x00000000
            ; l1 = (-0.500000f -1.500000f 0.000000f 0.000000f)
            dcl_literal l1, 0xBF000000, 0xBFC00000, 0x00000000, 0x00000000
            mad_ieee r0.xy__, v0.xyxx, l0, l1
            sample_resource(0)_sampler(0) r0, r0.xyxx
            ; l2 = (0.000000f 0.000000f 0.000000f 0.000000f)
            dcl_literal l2, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF
            and r1, r0.yxzw, l2
            ; l3 = (0.000000f 0.000000f 0.000000f 0.000000f)
            dcl_literal l3, 0x00000010, 0x00000010, 0x00000010, 0x00000010
            ishr r0, r0.xzyw, l3
            mov r2.x___, r1.y
            mov r2._y__, r0.x
            sample_resource(1)_sampler(0) r2, r2.xyxx
            mov r0.x___, r1.z
            sample_resource(1)_sampler(0) r3, r0.xyxx
            mov r2._y__, r3.x
            mov r0.x___, r1.w
            mov r1._y__, r0.z
            sample_resource(1)_sampler(0) r4097, r0.xwxx
            mov r0, r4097.yxzw
            sample_resource(1)_sampler(0) r1, r1.xyxx
            mov r0.x___, r1.x
            add r0.xy__, r2.xyxx, r0.xyxx
            add r0.x___, r0.y, r0.x
            ftou o0.x___, r0.x
            ret_dyn
            end


            And this is the generated ASM (by CAL CL 1.2, x64, RV670 target):

            ;PS; -------- Disassembly --------------------
            00 ALU: ADDR(32) CNT(4)
            0 x: MULADD_e R0.x, R0.x, (0x40000000, 2.0f).x, -0.5
            y: MULADD_e R0.y, R0.y, (0x40800000, 4.0f).z, (0xBFC00000, -1.5f)
            .y
            01 TEX: ADDR(64) CNT(1)
            1 SAMPLE R0, R0.xyxx, t0, s0 UNNORM(XYZW)
            02 ALU: ADDR(36) CNT(15)
            2 x: AND_INT ____, R0.w, (0x0000FFFF, 9.183409486e-41f).x
            z: AND_INT ____, R0.x, (0x0000FFFF, 9.183409486e-41f).x
            w: AND_INT ____, R0.z, (0x0000FFFF, 9.183409486e-41f).x
            t: ASHR R4.y, R0.z, (0x00000010, 2.242077543e-44f).y
            3 x: AND_INT R3.x, R0.y, (0x0000FFFF, 9.183409486e-41f).x
            y: MOV R2.y, PV2.x
            z: MOV R1.z, PV2.z
            w: MOV R4.w, PV2.w
            t: ASHR R2.w, R0.w, (0x00000010, 2.242077543e-44f).y
            4 t: ASHR R1.y, R0.x, (0x00000010, 2.242077543e-44f).x
            5 t: ASHR R3.y, R4.y, (0x00000010, 2.242077543e-44f).x
            03 TEX: ADDR(66) CNT(4) VALID_PIX
            6 SAMPLE R0.x___, R1.zyzz, t1, s0 UNNORM(XYZW)
            7 SAMPLE R2._x__, R2.ywyy, t1, s0 UNNORM(XYZW)
            8 SAMPLE R3.x___, R3.xyxx, t1, s0 UNNORM(XYZW)
            9 SAMPLE R4._x__, R4.wyww, t1, s0 UNNORM(XYZW)
            04 ALU: ADDR(51) CNT(4)
            10 z: ADD ____, R4.y, R2.y
            w: ADD ____, R0.x, R3.x
            11 y: ADD ____, PV10.w, PV10.z
            12 t: F_TO_U R0.x, PV11.y
            05 EXP_DONE: PIX0, R0.x___
            END_OF_PROGRAM

            Notice that the 4th ASHR instruction uses the output of the 1st ASHR instruction as the input, which is wrong as the IL source stated that ishr r0, r0.xzyw, l3 (each component must go through an independent shift right operation).


            Aside from this, if LOAD instruction cannot be used, then how does CAL provides method to load value from the buffer using integer index? Converting the integer index to floating point coordinate and use the SAMPLE instruction sounds silly (the ITOF instruction can only be done by the T unit, an expensive resource that should not be spent for workaround).


            Thanks.