;*****************************************************************************
;* trellis-64.asm: x86_64 trellis quantization
;*****************************************************************************
;*****************************************************************************
;* trellis-64.asm: x86_64 trellis quantization
;*****************************************************************************
;-----------------------------------------------------------------------------
; int trellis_cabac_4x4_psy(
; const int *unquant_mf, const uint8_t *zigzag, int lambda2,
;-----------------------------------------------------------------------------
; int trellis_cabac_4x4_psy(
; const int *unquant_mf, const uint8_t *zigzag, int lambda2,
%assign pad 96 + level_tree_size + 16*SIZEOF_NODE + 16-gprsize-(stack_offset&15)
SUB rsp, pad
DEFINE_ARGS unquant_mf, zigzag, lambda2, ii, orig_coefs, quant_coefs, dct, cabac_state_sig, cabac_state_last
%assign pad 96 + level_tree_size + 16*SIZEOF_NODE + 16-gprsize-(stack_offset&15)
SUB rsp, pad
DEFINE_ARGS unquant_mf, zigzag, lambda2, ii, orig_coefs, quant_coefs, dct, cabac_state_sig, cabac_state_last
%define level_statem rsp+stack_offset+80 ; r9m, except that we need to index into it (and r10m) as an array
%else
%define level_statem rsp+stack_offset+32
%define level_statem rsp+stack_offset+80 ; r9m, except that we need to index into it (and r10m) as an array
%else
%define level_statem rsp+stack_offset+32
%define orig_coefsm [stack+16]
%define quant_coefsm [stack+24]
mov orig_coefsm, orig_coefsq
%define orig_coefsm [stack+16]
%define quant_coefsm [stack+24]
mov orig_coefsm, orig_coefsq
movzx r0, word [level_tree + r0*4]
psrld m0, 16
movd m1, [dctq + r2*SIZEOF_DCTCOEF]
movzx r0, word [level_tree + r0*4]
psrld m0, 16
movd m1, [dctq + r2*SIZEOF_DCTCOEF]
mov r4d, dword [dctq + r2*SIZEOF_DCTCOEF]
%else
movsx r4d, word [dctq + r2*SIZEOF_DCTCOEF]
mov r4d, dword [dctq + r2*SIZEOF_DCTCOEF]
%else
movsx r4d, word [dctq + r2*SIZEOF_DCTCOEF]
mov [dctq + r2*SIZEOF_DCTCOEF], r5d
%else
mov [dctq + r2*SIZEOF_DCTCOEF], r5w
mov [dctq + r2*SIZEOF_DCTCOEF], r5d
%else
mov [dctq + r2*SIZEOF_DCTCOEF], r5w
mov abs_leveld, dword [r6 + iiq*SIZEOF_DCTCOEF]
%else
movsx abs_leveld, word [r6 + iiq*SIZEOF_DCTCOEF]
mov abs_leveld, dword [r6 + iiq*SIZEOF_DCTCOEF]
%else
movsx abs_leveld, word [r6 + iiq*SIZEOF_DCTCOEF]
- movd m1, [r6 + zigzagiq*SIZEOF_DCTCOEF - 2]
- psrad m1, 16
+ LOAD_DUP m1, [r6 + zigzagiq*SIZEOF_DCTCOEF - 2]
+ psrad m1, 16 ; sign_coef
; int psy_weight = dct_weight_tab[zigzag[i]] * h->mb.i_psy_trellis;
; ssd1[k] -= psy_weight * psy_value;
mov r6, fenc_dctm
; int psy_weight = dct_weight_tab[zigzag[i]] * h->mb.i_psy_trellis;
; ssd1[k] -= psy_weight * psy_value;
mov r6, fenc_dctm
mov [nodes_curq + node_cabac_state(node_ctx) + coeff_abs_levelgt1_offs-6], r10b
%endif
mov r6d, [nodes_prevq + node_level_idx(j)]
mov [nodes_curq + node_cabac_state(node_ctx) + coeff_abs_levelgt1_offs-6], r10b
%endif
mov r6d, [nodes_prevq + node_level_idx(j)]
add bitsd, r5d ; bs_size_ue_big from COEFN_SUFFIX
; n.cabac_state[levelgt1_ctx]
%if j == 7 ; && compiling support for 4:2:2
add bitsd, r5d ; bs_size_ue_big from COEFN_SUFFIX
; n.cabac_state[levelgt1_ctx]
%if j == 7 ; && compiling support for 4:2:2