6 Replies Latest reply on Aug 8, 2009 6:34 PM by Raistmer

    Brook+ to IL translation - why so big differencies?

    Raistmer
      small modification in Brook+ kernel and big changes in IL code

      2 kernels differ only by one additional float4 value load and one add operation.
      But their IL translations differ much more.
      Why so many oprations were added ?
      Code:

      Kernel1: kernel void GPU_coadd_kernel6_size4_5_o2(float4 src[][],out float4 dest[][]) {//R: size in function name is size of input array //R:will perform last coadd level int threadID=instance().y; float4 i1; float4 o11; i1=src[threadID][0]; o11.xy=i1.xz+i1.yw; dest[threadID][0]=o11; } kernel2: kernel void GPU_coadd_kernel6_size6_7_o3(float4 src[][],out float4 dest[][]) {//R: size in function name is size of input array //R:will perform last coadd level int threadID=instance().y; float4 i1,i2; float4 o11; i1=src[threadID][0]; i2=src[threadID][1]; o11.xy=i1.xz+i1.yw; o11.z=i2.x+i2.y; dest[threadID][0]=o11; } IL for kernel 1: const char __GPU_coadd_kernel6_size4_5_o2_cal_desc_tech0_pass0[] = "il_ps_2_0\n" "dcl_literal l0,0x00000000,0x00000000,0x00000000,0x00000000\n" "dcl_literal l1,0x00000001,0x00000001,0x00000001,0x00000001\n" "dcl_literal l2,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF\n" "dcl_literal l3,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF\n" "dcl_literal l4,0x7F800000,0x7F800000,0x7F800000,0x7F800000\n" "dcl_literal l5,0x80000000,0x80000000,0x80000000,0x80000000\n" "dcl_literal l6,0x3E9A209B,0x3E9A209B,0x3E9A209B,0x3E9A209B\n" "dcl_literal l7,0x3F317218,0x3F317218,0x3F317218,0x3F317218\n" "dcl_literal l8,0x40490FDB,0x40490FDB,0x40490FDB,0x40490FDB\n" "dcl_literal l9,0x3FC90FDB,0x3FC90FDB,0x3FC90FDB,0x3FC90FDB\n" "dcl_literal l10,0x00000003,0x00000003,0x00000003,0x00000003\n" "dcl_literal l11,0x00000002,0x00000002,0x00000002,0x00000002\n" ";global (g) declared, size = 4096\n" "dcl_literal l12,0x00000000,0x00000000,0x00000000,0x00000000\n" "dcl_literal l13,0x00000001,0x00000001,0x00000001,0x00000001\n" "dcl_literal l14,0x00000000,0x3FE00000,0x00000000,0x3FE00000\n" "dcl_literal l15,0x3F800000,0x3F800000,0x3F800000,0x3F800000\n" "dcl_literal l16,0x00000002,0x00000002,0x00000002,0x00000002\n" "dcl_literal l17,0x00000002,0x00000002,0x00000002,0x00000002\n" "dcl_literal l18,0x00000003,0x00000003,0x00000003,0x00000003\n" "dcl_literal l19,0x00000001,0x00000001,0x00000001,0x00000001\n" "dcl_literal l20,0x00000004,0x00000004,0x00000004,0x00000004\n" "dcl_literal l21,0x00000003,0x00000003,0x00000003,0x00000003\n" "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n" "dcl_input_position_interp(linear_noperspective) v0.xy__\n" "dcl_literal l22,0x3F000000,0x3F000000,0x3F000000,0x3F000000\n" "dcl_literal l23,0x3F000000,0x3F000000,0x3F000000,0x3F000000\n" "dcl_cb cb0[1]\n" "mov r454.xy__,v0.xyzw\n" "mov r269.x___,cb0[l0.x + 0].x000\n" "mov r270.xyz_,cb0[l0.x + 0].yzw0\n" "call 49 \n" "call 0 \n" "endmain\n" "\n" "func 0\n" "ret\n" "\n" "func 2\n" "ieq r0.x___,r17.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r19.xyzw,r18.xy00\n" "endif\n" "mov r16.x___,r19.x000\n" "ret_dyn\n" "ret\n" "\n" "func 11\n" "ieq r0.x___,r62.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r64.xyzw,r63.xy00\n" "endif\n" "mov r61.xyzw,r64.xyzw\n" "ret_dyn\n" "ret\n" "\n" "func 14\n" "ieq r0.x___,r80.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r82.xyzw,r81.xy00\n" "endif\n" "mov r83.xy__,r82.xy00\n" "mov r84.xy__,r82.xy00\n" "mov r79.xy__,r83.xy00\n" "ret_dyn\n" "ret\n" "\n" "func 20\n" "ieq r0.x___,r144.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r146.xyzw,r145.xy00\n" "endif\n" "mov r147.x___,r146.x000\n" "mov r143.x___,r147.x000\n" "ret_dyn\n" "ret\n" "\n" "func 36\n" "mov r458.x___,r261.z000\n" "imul r263.x___,r458.x000,r262.x000\n" "mov r459.x___,r262.y000\n" "imul r264.x___,r263.x000,r459.x000\n" "mov r460.x___,r261.y000\n" "imul r265.x___,r460.x000,r262.x000\n" "iadd r266.x___,r264.x000,r265.x000\n" "iadd r267.x___,r266.x000,r261.x000\n" "mov r260.x___,r267.x000\n" "ret_dyn\n" "ret\n" "\n" "func 46\n" "mov r461.x___,r366.y000\n" "mov r367.x___,r461.x000\n" "itof r462.x___,l12.x000\n" "mov r370.x___,r462.x000\n" "itof r463.x___,r367.x000\n" "mov r370._y__,r463.0x00\n" "mov r62.x___,r365.x000\n" "mov r63.xy__,r370.xy00\n" "call 11 \n" "mov r464.xyzw,r61.xyzw\n" "mov r368.xyzw,r464.xyzw\n" "mov r465.xy__,r368.xz00\n" "mov r466.xy__,r368.yw00\n" "add r371.xy__,r465.xy00,r466.xy00\n" "mov r369.xy__,r371.xy00\n" "mov r372.x___,l12.x000\n" "mov r372._y__,r367.0x00\n" "mov r467.xyzw,r372.xy00\n" "mov r261.xyzw,r467.xyzw\n" "mov r468.xyzw,r269.xxxx\n" "mov r262.xyzw,r468.xyzw\n" "call 36 \n" "mov r469.x___,r260.x000\n" "umul r373.x___,l19.x000,r469.x000\n" "iadd r373.x___,r373.x000,l0.x000\n" "mov g[r373.x + 0].xyzw,r369.xyzw\n" "ret\n" "\n" "func 49\n" "mov r456.x___,l22.x000\n" "mov r456._y__,l23.0x00\n" "sub r457.xy__,r454.xy00,r456.xy00\n" "mov r470.xyzw,r457.xy00\n" "mov r455.xyzw,r470.xyzw\n" "mov r365.x___,l0.x000\n" "ftoi r471.xyzw,r455.xyzw\n" "mov r366.xyzw,r471.xyzw\n" "call 46 \n" "ret\n" "\n" "end\n" ""; IL for kernel2: const char __GPU_coadd_kernel6_size6_7_o3_cal_desc_tech0_pass0[] = "il_ps_2_0\n" "dcl_literal l0,0x00000000,0x00000000,0x00000000,0x00000000\n" "dcl_literal l1,0x00000001,0x00000001,0x00000001,0x00000001\n" "dcl_literal l2,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF\n" "dcl_literal l3,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF\n" "dcl_literal l4,0x7F800000,0x7F800000,0x7F800000,0x7F800000\n" "dcl_literal l5,0x80000000,0x80000000,0x80000000,0x80000000\n" "dcl_literal l6,0x3E9A209B,0x3E9A209B,0x3E9A209B,0x3E9A209B\n" "dcl_literal l7,0x3F317218,0x3F317218,0x3F317218,0x3F317218\n" "dcl_literal l8,0x40490FDB,0x40490FDB,0x40490FDB,0x40490FDB\n" "dcl_literal l9,0x3FC90FDB,0x3FC90FDB,0x3FC90FDB,0x3FC90FDB\n" "dcl_literal l10,0x00000003,0x00000003,0x00000003,0x00000003\n" "dcl_literal l11,0x00000002,0x00000002,0x00000002,0x00000002\n" ";global (g) declared, size = 4096\n" "dcl_literal l12,0x00000000,0x00000000,0x00000000,0x00000000\n" "dcl_literal l13,0x00000001,0x00000001,0x00000001,0x00000001\n" "dcl_literal l14,0x00000000,0x3FE00000,0x00000000,0x3FE00000\n" "dcl_literal l15,0x3F800000,0x3F800000,0x3F800000,0x3F800000\n" "dcl_literal l16,0x00000002,0x00000002,0x00000002,0x00000002\n" "dcl_literal l17,0x00000003,0x00000003,0x00000003,0x00000003\n" "dcl_literal l18,0x00000004,0x00000004,0x00000004,0x00000004\n" "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n" "dcl_input_position_interp(linear_noperspective) v0.xy__\n" "dcl_literal l19,0x3F000000,0x3F000000,0x3F000000,0x3F000000\n" "dcl_literal l20,0x3F000000,0x3F000000,0x3F000000,0x3F000000\n" "dcl_cb cb0[1]\n" "mov r457.xy__,v0.xyzw\n" "mov r269.x___,cb0[l0.x + 0].x000\n" "mov r270.xyz_,cb0[l0.x + 0].yzw0\n" "call 49 \n" "call 0 \n" "endmain\n" "\n" "func 0\n" "ret\n" "\n" "func 2\n" "ieq r0.x___,r17.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r19.xyzw,r18.xy00\n" "endif\n" "mov r16.x___,r19.x000\n" "ret_dyn\n" "ret\n" "\n" "func 11\n" "ieq r0.x___,r62.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r64.xyzw,r63.xy00\n" "endif\n" "mov r61.xyzw,r64.xyzw\n" "ret_dyn\n" "ret\n" "\n" "func 14\n" "ieq r0.x___,r80.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r82.xyzw,r81.xy00\n" "endif\n" "mov r83.xy__,r82.xy00\n" "mov r84.xy__,r82.xy00\n" "mov r79.xy__,r83.xy00\n" "ret_dyn\n" "ret\n" "\n" "func 20\n" "ieq r0.x___,r144.x000,l0.x000\n" "if_logicalnz r0.x000\n" "sample_resource(0)_sampler(0) r146.xyzw,r145.xy00\n" "endif\n" "mov r147.x___,r146.x000\n" "mov r143.x___,r147.x000\n" "ret_dyn\n" "ret\n" "\n" "func 36\n" "mov r461.x___,r261.z000\n" "imul r263.x___,r461.x000,r262.x000\n" "mov r462.x___,r262.y000\n" "imul r264.x___,r263.x000,r462.x000\n" "mov r463.x___,r261.y000\n" "imul r265.x___,r463.x000,r262.x000\n" "iadd r266.x___,r264.x000,r265.x000\n" "iadd r267.x___,r266.x000,r261.x000\n" "mov r260.x___,r267.x000\n" "ret_dyn\n" "ret\n" "\n" "func 46\n" "mov r464.x___,r366.y000\n" "mov r367.x___,r464.x000\n" "itof r465.x___,l12.x000\n" "mov r371.x___,r465.x000\n" "itof r466.x___,r367.x000\n" "mov r371._y__,r466.0x00\n" "mov r62.x___,r365.x000\n" "mov r63.xy__,r371.xy00\n" "call 11 \n" "mov r467.xyzw,r61.xyzw\n" "mov r369.xyzw,r467.xyzw\n" "itof r468.x___,l13.x000\n" "mov r372.x___,r468.x000\n" "itof r469.x___,r367.x000\n" "mov r372._y__,r469.0x00\n" "mov r62.x___,r365.x000\n" "mov r63.xy__,r372.xy00\n" "call 11 \n" "mov r470.xyzw,r61.xyzw\n" "mov r368.xyzw,r470.xyzw\n" "mov r471.xy__,r369.xz00\n" "mov r472.xy__,r369.yw00\n" "add r373.xy__,r471.xy00,r472.xy00\n" "mov r370.xy__,r373.xy00\n" "mov r473.x___,r368.y000\n" "add r374.x___,r368.x000,r473.x000\n" "mov r370.__z_,r374.00x0\n" "mov r375.x___,l12.x000\n" "mov r375._y__,r367.0x00\n" "mov r474.xyzw,r375.xy00\n" "mov r261.xyzw,r474.xyzw\n" "mov r475.xyzw,r269.xxxx\n" "mov r262.xyzw,r475.xyzw\n" "call 36 \n" "mov r476.x___,r260.x000\n" "umul r376.x___,l13.x000,r476.x000\n" "iadd r376.x___,r376.x000,l0.x000\n" "mov g[r376.x + 0].xyzw,r370.xyzw\n" "ret\n" "\n" "func 49\n" "mov r459.x___,l19.x000\n" "mov r459._y__,l20.0x00\n" "sub r460.xy__,r457.xy00,r459.xy00\n" "mov r477.xyzw,r460.xy00\n" "mov r458.xyzw,r477.xyzw\n" "mov r365.x___,l0.x000\n" "ftoi r478.xyzw,r458.xyzw\n" "mov r366.xyzw,r478.xyzw\n" "call 46 \n" "ret\n" "\n" "end\n" "";