%macro IDCT_ADD_10 0
cglobal h264_idct_add_10, 3,3
+ movsxdifnidn r2, r2d
IDCT4_ADD_10 r0, r1, r2
RET
%endmacro
%macro IDCT_ADD16_10 0
cglobal h264_idct_add16_10, 5,6
+ movsxdifnidn r3, r3d
ADD16_OP 0, 4+1*8
ADD16_OP 1, 5+1*8
ADD16_OP 2, 4+2*8
INIT_MMX mmxext
cglobal h264_idct_dc_add_10,3,3
+ movsxdifnidn r2, r2d
movd m0, [r1]
mov dword [r1], 0
paddd m0, [pd_32]
;-----------------------------------------------------------------------------
%macro IDCT8_DC_ADD 0
cglobal h264_idct8_dc_add_10,3,4,7
+ movsxdifnidn r2, r2d
movd m0, [r1]
mov dword[r1], 0
paddd m0, [pd_32]
ret
cglobal h264_idct_add16intra_10,5,7,8
+ movsxdifnidn r3, r3d
ADD16_OP_INTRA 0, 4+1*8
ADD16_OP_INTRA 2, 4+2*8
ADD16_OP_INTRA 4, 6+1*8
;-----------------------------------------------------------------------------
%macro IDCT_ADD8 0
cglobal h264_idct_add8_10,5,8,7
+ movsxdifnidn r3, r3d
%if ARCH_X86_64
mov r7, r0
%endif
IDCT_ADD8
%endif
+;-----------------------------------------------------------------------------
+; void ff_h264_idct_add8_422_10(pixel **dst, const int *block_offset,
+; int16_t *block, int stride,
+; const uint8_t nnzc[6*8])
+;-----------------------------------------------------------------------------
+%assign last_block 44
+
+%macro IDCT_ADD8_422 0
+
+cglobal h264_idct_add8_422_10, 5, 8, 7
+ movsxdifnidn r3, r3d
+%if ARCH_X86_64
+ mov r7, r0
+%endif
+
+ add r2, 1024
+ mov r0, [r0]
+ ADD16_OP_INTRA 16, 4+ 6*8
+ ADD16_OP_INTRA 18, 4+ 7*8
+ ADD16_OP_INTRA 24, 4+ 8*8 ; i+4
+ ADD16_OP_INTRA 26, 4+ 9*8 ; i+4
+ add r2, 1024-128*4
+
+%if ARCH_X86_64
+ mov r0, [r7+gprsize]
+%else
+ mov r0, r0m
+ mov r0, [r0+gprsize]
+%endif
+
+ ADD16_OP_INTRA 32, 4+11*8
+ ADD16_OP_INTRA 34, 4+12*8
+ ADD16_OP_INTRA 40, 4+13*8 ; i+4
+ ADD16_OP_INTRA 42, 4+14*8 ; i+4
+REP_RET
+ AC 16
+ AC 18
+ AC 24 ; i+4
+ AC 26 ; i+4
+ AC 32
+ AC 34
+ AC 40 ; i+4
+ AC 42 ; i+4
+
+%endmacro
+
+INIT_XMM sse2
+IDCT_ADD8_422
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+IDCT_ADD8_422
+%endif
+
;-----------------------------------------------------------------------------
; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
%macro IDCT8_ADD 0
cglobal h264_idct8_add_10, 3,4,16
+ movsxdifnidn r2, r2d
%if UNIX64 == 0
%assign pad 16-gprsize-(stack_offset&15)
sub rsp, pad
%macro IDCT8_ADD4 0
cglobal h264_idct8_add4_10, 0,7,16
+ movsxdifnidn r3, r3d
%assign pad 16-gprsize-(stack_offset&15)
SUB rsp, pad
mov r5, r0mp