;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
cextern pw_3
cextern pw_4
cextern pw_8
-cextern pw_28
+pw_28: times 8 dw 28
cextern pw_32
cextern pw_64
%macro mv0_pixels_mc8 0
lea r4, [r2*2 ]
-.next4rows
+.next4rows:
movq mm0, [r1 ]
movq mm1, [r1+r2]
add r1, r4
jne .next4rows
%endmacro
-%macro chroma_mc8_mmx_func 3
+%macro chroma_mc8_mmx_func 2-3
%ifidn %2, rv40
%ifdef PIC
%define rnd_1d_rv40 r8
%else
%define extra_regs 0
%endif ; rv40
-; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
-; int stride, int h, int mx, int my)
-cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
+; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
+; int stride, int h, int mx, int my)
+cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
%if ARCH_X86_64
movsxd r2, r2d
%endif
mv0_pixels_mc8
REP_RET
-.at_least_one_non_zero
+.at_least_one_non_zero:
%ifidn %2, rv40
%if ARCH_X86_64
mov r7, r5
test r4d, r4d
mov r6, r2 ; dxy = x ? 1 : stride
jne .both_non_zero
-.my_is_zero
+.my_is_zero:
; mx == 0 XOR my == 0 - 1 dimensional filter only
or r4d, r5d ; x + y
pxor m7, m7
psubw m4, m5 ; mm4 = A = 8-x
-.next1drow
+.next1drow:
movq m0, [r1 ] ; mm0 = src[0..7]
movq m2, [r1+r6] ; mm1 = src[1..8]
jne .next1drow
REP_RET
-.both_non_zero ; general case, bilinear
+.both_non_zero: ; general case, bilinear
movd m4, r4d ; x
movd m6, r5d ; y
%ifidn %2, rv40
movq m0, [r1 ] ; mm0 = src[0..7]
movq m1, [r1+1] ; mm1 = src[1..8]
-.next2drow
+.next2drow:
add r1, r2
movq m2, m0
RET
%endmacro
-%macro chroma_mc4_mmx_func 3
+%macro chroma_mc4_mmx_func 2
%define extra_regs 0
%ifidn %2, rv40
%ifdef PIC
%define extra_regs 1
%endif ; PIC
%endif ; rv40
-cglobal %1_%2_chroma_mc4_%3, 6, 6 + extra_regs, 0
+cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
%if ARCH_X86_64
movsxd r2, r2d
%endif
pmullw m6, m2
paddw m6, m0
-.next2rows
+.next2rows:
movd m0, [r1 ]
movd m1, [r1+1]
add r1, r2
REP_RET
%endmacro
-%macro chroma_mc2_mmx_func 3
-cglobal %1_%2_chroma_mc2_%3, 6, 7, 0
+%macro chroma_mc2_mmx_func 2
+cglobal %1_%2_chroma_mc2, 6, 7, 0
%if ARCH_X86_64
movsxd r2, r2d
%endif
punpcklbw m2, m7
pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
-.nextrow
+.nextrow:
add r1, r2
movq m1, m2
pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
%macro NOTHING 2-3
%endmacro
%macro DIRECT_AVG 2
- PAVG %1, %2
+ PAVGB %1, %2
%endmacro
%macro COPY_AVG 3
movd %2, %3
- PAVG %1, %2
+ PAVGB %1, %2
%endmacro
-INIT_MMX
+INIT_MMX mmx
%define CHROMAMC_AVG NOTHING
%define CHROMAMC_AVG4 NOTHING
-chroma_mc8_mmx_func put, h264, mmx_rnd
-chroma_mc8_mmx_func put, vc1, mmx_nornd
-chroma_mc8_mmx_func put, rv40, mmx
-chroma_mc4_mmx_func put, h264, mmx
-chroma_mc4_mmx_func put, rv40, mmx
-chroma_mc2_mmx_func put, h264, mmx2
+chroma_mc8_mmx_func put, h264, _rnd
+chroma_mc8_mmx_func put, vc1, _nornd
+chroma_mc8_mmx_func put, rv40
+chroma_mc4_mmx_func put, h264
+chroma_mc4_mmx_func put, rv40
+
+INIT_MMX mmxext
+chroma_mc2_mmx_func put, h264
%define CHROMAMC_AVG DIRECT_AVG
%define CHROMAMC_AVG4 COPY_AVG
-%define PAVG pavgb
-chroma_mc8_mmx_func avg, h264, mmx2_rnd
-chroma_mc8_mmx_func avg, vc1, mmx2_nornd
-chroma_mc8_mmx_func avg, rv40, mmx2
-chroma_mc4_mmx_func avg, h264, mmx2
-chroma_mc4_mmx_func avg, rv40, mmx2
-chroma_mc2_mmx_func avg, h264, mmx2
-
-%define PAVG pavgusb
-chroma_mc8_mmx_func avg, h264, 3dnow_rnd
-chroma_mc8_mmx_func avg, vc1, 3dnow_nornd
-chroma_mc8_mmx_func avg, rv40, 3dnow
-chroma_mc4_mmx_func avg, h264, 3dnow
-chroma_mc4_mmx_func avg, rv40, 3dnow
-
-%macro chroma_mc8_ssse3_func 3
-cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
+chroma_mc8_mmx_func avg, h264, _rnd
+chroma_mc8_mmx_func avg, vc1, _nornd
+chroma_mc8_mmx_func avg, rv40
+chroma_mc4_mmx_func avg, h264
+chroma_mc4_mmx_func avg, rv40
+chroma_mc2_mmx_func avg, h264
+
+INIT_MMX 3dnow
+chroma_mc8_mmx_func avg, h264, _rnd
+chroma_mc8_mmx_func avg, vc1, _nornd
+chroma_mc8_mmx_func avg, rv40
+chroma_mc4_mmx_func avg, h264
+chroma_mc4_mmx_func avg, rv40
+
+%macro chroma_mc8_ssse3_func 2-3
+cglobal %1_%2_chroma_mc8%3, 6, 7, 8
%if ARCH_X86_64
movsxd r2, r2d
%endif
mv0_pixels_mc8
REP_RET
-.at_least_one_non_zero
+.at_least_one_non_zero:
test r5d, r5d
je .my_is_zero
test r4d, r4d
movlhps m7, m7
movlhps m6, m6
-.next2rows
+.next2rows:
movq m1, [r1+r2*1 ]
movq m2, [r1+r2*1+1]
movq m3, [r1+r2*2 ]
jg .next2rows
REP_RET
-.my_is_zero
+.my_is_zero:
mov r5d, r4d
shl r4d, 8
add r4, 8
pshuflw m7, m7, 0
movlhps m7, m7
-.next2xrows
+.next2xrows:
movq m0, [r1 ]
movq m1, [r1 +1]
movq m2, [r1+r2 ]
jg .next2xrows
REP_RET
-.mx_is_zero
+.mx_is_zero:
mov r4d, r5d
shl r5d, 8
add r5, 8
pshuflw m7, m7, 0
movlhps m7, m7
-.next2yrows
+.next2yrows:
movq m0, [r1 ]
movq m1, [r1+r2 ]
movdqa m2, m1
REP_RET
%endmacro
-%macro chroma_mc4_ssse3_func 3
-cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
+%macro chroma_mc4_ssse3_func 2
+cglobal %1_%2_chroma_mc4, 6, 7, 0
%if ARCH_X86_64
movsxd r2, r2d
%endif
punpcklbw m0, [r1+1]
pshufw m6, m6, 0
-.next2rows
+.next2rows:
movd m1, [r1+r2*1 ]
movd m3, [r1+r2*2 ]
punpcklbw m1, [r1+r2*1+1]
%endmacro
%define CHROMAMC_AVG NOTHING
-INIT_XMM
-chroma_mc8_ssse3_func put, h264, ssse3_rnd
-chroma_mc8_ssse3_func put, vc1, ssse3_nornd
-INIT_MMX
-chroma_mc4_ssse3_func put, h264, ssse3
+INIT_XMM ssse3
+chroma_mc8_ssse3_func put, h264, _rnd
+chroma_mc8_ssse3_func put, vc1, _nornd
+INIT_MMX ssse3
+chroma_mc4_ssse3_func put, h264
%define CHROMAMC_AVG DIRECT_AVG
-%define PAVG pavgb
-INIT_XMM
-chroma_mc8_ssse3_func avg, h264, ssse3_rnd
-chroma_mc8_ssse3_func avg, vc1, ssse3_nornd
-INIT_MMX
-chroma_mc4_ssse3_func avg, h264, ssse3
+INIT_XMM ssse3
+chroma_mc8_ssse3_func avg, h264, _rnd
+chroma_mc8_ssse3_func avg, vc1, _nornd
+INIT_MMX ssse3
+chroma_mc4_ssse3_func avg, h264