hazeman

IL compiler/assembler bug ?

Discussion created by hazeman on Jan 2, 2011
Latest reply on Jan 27, 2011 by MicahVillmow

I have 2 il kernels. Second one differs from the first one by 2 extra dmuls. First one is working, second one gives wrong value on the .xy component of output variable.

PS. my card is ATI 5850, driver 10.12, ubuntu 9.04

 

 

KERNEL 1 ( WORKING ) il_ps_2_0 dcl_input_position_interp(linear_noperspective) vWinCoord0.xy__ dcl_cb cb0[1] dcl_output_generic o0 dcl_resource_id(0)_type(2d,unnorm)_fmtx(unknown)_fmty(unknown)_fmtz(unknown)_fmtw(unknown) dcl_literal l0, 0x0, 0x0, 0x0, 0x0 dcl_literal l4, 0x0, 0x1, 0x0, 0x0 dcl_literal l6, 0x0, 0x3ff80000, 0x0, 0x3ff80000 dcl_literal l2, 0x1, 0x0, 0x0, 0x0 dcl_literal l3, 0x8, 0x0, 0x0, 0x0 dcl_literal l5, 0xffffffff, 0xffffffff, 0x0, 0x0 dcl_literal l1, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff mov r0.xy,vWinCoord0.xy ftou r6.xy,r0.xy mov r9.xy,r6.xy mov r10.x,l2.x mov r15.x,l3.x ishl r20.x,r9.y,r15.x iadd r24.x,r20.x,r9.x ishl r28.x,r24.x,r10.x mov r1.x,r28.x mov r3,cb[0] mov r38.xy,l4.xy mov r41.x,r1.x mov r41._y__,r1.xxxx mov r42.xy,r41.xy mov r40.xy,r42.xy iadd r47.xy,r40.xy,r38.xy ult r52.xy,r47.xy,r3.xy mov r2.xy,r52.xy sample_resource(0)_sampler(0) r57,r0.xy mov r56,r57 mov r60,r56 mov r63,r60 dldexp r65.xy__,r60.xyxy,l5.x dldexp r65.__zw,r60.zwzw,l5.y mov r68,r65_neg(yw) mov r70,r68 mov r61,r70 mov r71,r61 drsq r72.xy__,r60.xyxy drsq r72.__zw,r60.zwzw mov r75,r72 mov r62,r75 mov r76,r62 mov r77,l6 dmul r79.xy__,r62.xyxy,r62.xyxy dmul r79.__zw,r62.zwzw,r62.zwzw dmad r84.xy__,r79.xyxy,r61.xyxy,r77.xyxy dmad r84.__zw,r79.zwzw,r61.zwzw,r77.zwzw dmul r91.xy__,r62.xyxy,r84.xyxy dmul r91.__zw,r62.zwzw,r84.zwzw mov r62,r91 mov r98,l6 dmad r100.xy__,r62.xyxy,r61.xyxy,r98.xyxy dmad r100.__zw,r62.zwzw,r61.zwzw,r98.zwzw mov r62,r100 mov r107,l0 cmov_logical r109,r2.xxyy,r62,r107 mov r116,r109 mov o0.xyzw,r116 end KERNEL 2 ( invalid result on the .xy component of output ) il_ps_2_0 dcl_input_position_interp(linear_noperspective) vWinCoord0.xy__ dcl_cb cb0[1] dcl_output_generic o0 dcl_resource_id(0)_type(2d,unnorm)_fmtx(unknown)_fmty(unknown)_fmtz(unknown)_fmtw(unknown) dcl_literal l0, 0x0, 0x0, 0x0, 0x0 dcl_literal l4, 0x0, 0x1, 0x0, 0x0 dcl_literal l6, 0x0, 0x3ff80000, 0x0, 0x3ff80000 dcl_literal l2, 0x1, 0x0, 0x0, 0x0 dcl_literal l3, 0x8, 0x0, 0x0, 0x0 dcl_literal l5, 0xffffffff, 0xffffffff, 0x0, 0x0 dcl_literal l1, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff mov r0.xy,vWinCoord0.xy ftou r6.xy,r0.xy mov r9.xy,r6.xy mov r10.x,l2.x mov r15.x,l3.x ishl r20.x,r9.y,r15.x iadd r24.x,r20.x,r9.x ishl r28.x,r24.x,r10.x mov r1.x,r28.x mov r3,cb[0] mov r38.xy,l4.xy mov r41.x,r1.x mov r41._y__,r1.xxxx mov r42.xy,r41.xy mov r40.xy,r42.xy iadd r47.xy,r40.xy,r38.xy ult r52.xy,r47.xy,r3.xy mov r2.xy,r52.xy sample_resource(0)_sampler(0) r57,r0.xy mov r56,r57 mov r60,r56 mov r63,r60 dldexp r65.xy__,r60.xyxy,l5.x dldexp r65.__zw,r60.zwzw,l5.y mov r68,r65_neg(yw) mov r70,r68 mov r61,r70 mov r71,r61 drsq r72.xy__,r60.xyxy drsq r72.__zw,r60.zwzw mov r75,r72 mov r62,r75 mov r76,r62 mov r77,l6 dmul r79.xy__,r62.xyxy,r62.xyxy dmul r79.__zw,r62.zwzw,r62.zwzw dmad r84.xy__,r79.xyxy,r61.xyxy,r77.xyxy dmad r84.__zw,r79.zwzw,r61.zwzw,r77.zwzw dmul r91.xy__,r62.xyxy,r84.xyxy dmul r91.__zw,r62.zwzw,r84.zwzw mov r62,r91 mov r98,l6 dmul r100.xy__,r62.xyxy,r62.xyxy <---- EXTRA INSTRUCTION dmul r100.__zw,r62.zwzw,r62.zwzw <---- EXTRA INSTRUCTION dmad r105.xy__,r100.xyxy,r61.xyxy,r98.xyxy dmad r105.__zw,r100.zwzw,r61.zwzw,r98.zwzw mov r62,r105 mov r112,l0 cmov_logical r114,r2.xxyy,r62,r112 mov r121,r114 mov o0.xyzw,r121 end

Outcomes