cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

jfkong
Journeyman III

instruction cos and sin

According to IL documentation:

Cosine (cos), Sine (sin)
Instructions COS, SIN
Syntax Function Opcode Syntax Description Range
COS IL_OP_COS cos dst, src0 cosine, cos(src0.w) [-pi , pi]
SIN IL_OP_SIN sin dst, src0 sine, sin(src0.w) [-pi , pi]
Description Computes the trigonometric function of src0.w, where w is in radians. src0.w must be within
the specified range for each function; otherwise, the results are undefined.
The 32-bit floating
point result is placed in all elements of dst. The maximum absolute error is 0.002.

 

Instructions COS_VEC, SIN_VEC
Syntax Function Opcode Syntax Description
COS_VEC IL_OP_COS_VEC cos_vec dst, src0 cosine, cos(src0.xyzw)
SIN_VEC IL_OP_SIN_VEC sin_vec dst, src0 sine, sin(src0.xyzw)
Description Computes the trigonometric function of each element of src0. Each element of src0 must be
in the range [-100*pi , 100* pi].
The 32-bit floating point results are placed in the
corresponding elements of dst. The maximum absolute error is 0.0008

However I put brook+ kernel in GSA:

kernel void sum(float a<>, float b<>, out float c<>
{
c = cos(a) + b;
}

The generated IL is: (Basically there is no range adjustment)

il_ps_2_0
; l0 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000
; l1 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001
; l2 = (-1.#QNAN0f -1.#QNAN0f -1.#QNAN0f -1.#QNAN0f)
dcl_literal l2, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
; l3 = (1.#QNAN0f 1.#QNAN0f 1.#QNAN0f 1.#QNAN0f)
dcl_literal l3, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF
; l4 = (1.#INF00f 1.#INF00f 1.#INF00f 1.#INF00f)
dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000
; l5 = (-0.000000f -0.000000f -0.000000f -0.000000f)
dcl_literal l5, 0x80000000, 0x80000000, 0x80000000, 0x80000000
; l6 = (0.301030f 0.301030f 0.301030f 0.301030f)
dcl_literal l6, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B
; l7 = (0.693147f 0.693147f 0.693147f 0.693147f)
dcl_literal l7, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218
; l8 = (3.141593f 3.141593f 3.141593f 3.141593f)
dcl_literal l8, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB
; l9 = (1.570796f 1.570796f 1.570796f 1.570796f)
dcl_literal l9, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB
; l10 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l10, 0x00000003, 0x00000003, 0x00000003, 0x00000003
; l11 = (0.000000f 0.000000f 0.000000f 0.000000f)
dcl_literal l11, 0x00000002, 0x00000002, 0x00000002, 0x00000002
dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)
dcl_input_generic v0
dcl_resource_id(1)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)
dcl_input_generic v1
dcl_output_color o0
mov r270.xy__, v0
mov r271.xy__, v1
call 36
call 0
endmain
func 0
    mov o0, r272
ret
func 2
    ieq r0.x___, r17.x000, l0.x000
    if_logicalnz r0.x000
        sample_l_resource(0)_sampler(0) r19, r18.xy00, r18.0000
    endif
    ieq r0.x___, r17.x000, l1.x000
    if_logicalnz r0.x000
        sample_l_resource(1)_sampler(0) r19, r18.xy00, r18.0000
    endif
    mov r16.x___, r19.x000
    ret_dyn
ret
func 35
    cos_vec r268.x___, r265.x000
    add r269.x___, r268.x000, r266.x000
    mov r267.x___, r269.x000
ret
func 36
    mov r17.x___, l0.x000
    mov r18.xy__, r270.xy00
    call 2
    mov r277.x___, r16.x000
    mov r273.x___, r277.x000
    mov r17.x___, l1.x000
    mov r18.xy__, r271.xy00
    call 2
    mov r278.x___, r16.x000
    mov r274.x___, r278.x000
    mov r265.x___, r273.x000
    mov r266.x___, r274.x000
    call 35
    mov r275.x___, r267.x000
    mov r276.x___, r275.x000
    mov r276._y__, l0.0x00
    mov r276.__z_, l0.00x0
    mov r276.___w, l0.000x
    mov r272, r276
ret
end

Any explanation?

0 Likes
0 Replies