;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86util.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
SECTION .text
;-----------------------------------------------------------------------------
-; void h264_idct_add(pixel *dst, dctcoef *block, int stride)
+; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
%macro STORE_DIFFx2 6
psrad %1, 6
paddd m0, [pd_32]
IDCT4_1D d,0,1,2,3,4,5
pxor m5, m5
+ mova [%2+ 0], m5
+ mova [%2+16], m5
+ mova [%2+32], m5
+ mova [%2+48], m5
STORE_DIFFx2 m0, m1, m4, m5, %1, %3
lea %1, [%1+%3*2]
STORE_DIFFx2 m2, m3, m4, m5, %1, %3
INIT_XMM sse2
IDCT_ADD_10
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD_10
-%endif
;-----------------------------------------------------------------------------
-; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+; void ff_h264_idct_add16_10(pixel *dst, const int *block_offset,
+; int16_t *block, int stride,
+; const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro ADD4x4IDCT 0
paddd m0, [pd_32]
IDCT4_1D d,0,1,2,3,4,5
pxor m5, m5
+ mova [r2+ 0], m5
+ mova [r2+16], m5
+ mova [r2+32], m5
+ mova [r2+48], m5
STORE_DIFFx2 m0, m1, m4, m5, r5, r3
lea r5, [r5+r3*2]
STORE_DIFFx2 m2, m3, m4, m5, r5, r3
INIT_XMM sse2
ALIGN 16
ADD4x4IDCT
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
ALIGN 16
ADD4x4IDCT
-%endif
%macro ADD16_OP 2
cmp byte [r4+%2], 0
INIT_XMM sse2
IDCT_ADD16_10
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD16_10
-%endif
;-----------------------------------------------------------------------------
-; void h264_idct_dc_add(pixel *dst, dctcoef *block, int stride)
+; void ff_h264_idct_dc_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
%macro IDCT_DC_ADD_OP_10 3
pxor m5, m5
mova [%1+%3 ], m4
%endmacro
-INIT_MMX mmx2
+INIT_MMX mmxext
cglobal h264_idct_dc_add_10,3,3
movd m0, [r1]
+ mov dword [r1], 0
paddd m0, [pd_32]
psrad m0, 6
lea r1, [r2*3]
RET
;-----------------------------------------------------------------------------
-; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride)
+; void ff_h264_idct8_dc_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
%macro IDCT8_DC_ADD 0
-cglobal h264_idct8_dc_add_10,3,3,7
- mov r1d, [r1]
- add r1, 32
- sar r1, 6
- movd m0, r1d
+cglobal h264_idct8_dc_add_10,3,4,7
+ movd m0, [r1]
+ mov dword[r1], 0
+ paddd m0, [pd_32]
+ psrad m0, 6
lea r1, [r2*3]
SPLATW m0, m0, 0
mova m6, [pw_pixel_max]
INIT_XMM sse2
IDCT8_DC_ADD
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_DC_ADD
-%endif
;-----------------------------------------------------------------------------
-; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+; void ff_h264_idct_add16intra_10(pixel *dst, const int *block_offset,
+; int16_t *block, int stride,
+; const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
%macro AC 1
.ac%1:
add r5, r0
movq m0, [r2+ 0]
movhps m0, [r2+64]
+ mov dword [r2+ 0], 0
+ mov dword [r2+64], 0
paddd m0, [pd_32]
psrad m0, 6
pshufhw m0, m0, 0
INIT_XMM sse2
IDCT_ADD16INTRA_10
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD16INTRA_10
-%endif
%assign last_block 36
;-----------------------------------------------------------------------------
-; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+; void ff_h264_idct_add8_10(pixel **dst, const int *block_offset,
+; int16_t *block, int stride,
+; const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
%macro IDCT_ADD8 0
cglobal h264_idct_add8_10,5,8,7
INIT_XMM sse2
IDCT_ADD8
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD8
-%endif
;-----------------------------------------------------------------------------
-; void h264_idct8_add(pixel *dst, dctcoef *block, int stride)
+; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
%macro IDCT8_1D 2
SWAP 0, 1
packssdw m8, m0
paddsw m8, [r0]
pxor m0, m0
+ mova [r1+ 0], m0
+ mova [r1+ 16], m0
+ mova [r1+ 32], m0
+ mova [r1+ 48], m0
+ mova [r1+ 64], m0
+ mova [r1+ 80], m0
+ mova [r1+ 96], m0
+ mova [r1+112], m0
+ mova [r1+128], m0
+ mova [r1+144], m0
+ mova [r1+160], m0
+ mova [r1+176], m0
+ mova [r1+192], m0
+ mova [r1+208], m0
+ mova [r1+224], m0
+ mova [r1+240], m0
CLIPW m8, m0, [pw_pixel_max]
mova [r0], m8
mova m8, [pw_pixel_max]
lea r3, [r0+8]
IDCT8_ADD_SSE_END r0, rsp, r2
IDCT8_ADD_SSE_END r3, rsp+16, r2
+ mova [r1+ 0], m7
+ mova [r1+ 16], m7
+ mova [r1+ 32], m7
+ mova [r1+ 48], m7
+ mova [r1+ 64], m7
+ mova [r1+ 80], m7
+ mova [r1+ 96], m7
+ mova [r1+112], m7
+ mova [r1+128], m7
+ mova [r1+144], m7
+ mova [r1+160], m7
+ mova [r1+176], m7
+ mova [r1+192], m7
+ mova [r1+208], m7
+ mova [r1+224], m7
+ mova [r1+240], m7
%endif ; ARCH_X86_64
add rsp, pad
INIT_XMM sse2
IDCT8_ADD
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_ADD
-%endif
;-----------------------------------------------------------------------------
-; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+; void ff_h264_idct8_add4_10(pixel **dst, const int *block_offset,
+; int16_t *block, int stride,
+; const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro IDCT8_ADD4_OP 2
INIT_XMM sse2
IDCT8_ADD4
-%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_ADD4
-%endif