;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "libavutil/x86/x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
%macro MV0_PIXELS_MC8 0
lea r4, [r2*3 ]
lea r5, [r2*4 ]
-.next4rows
+.next4rows:
movu m0, [r1 ]
movu m1, [r1+r2 ]
CHROMAMC_AVG m0, [r0 ]
;-----------------------------------------------------------------------------
; void put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h, int mx, int my)
;-----------------------------------------------------------------------------
-%macro CHROMA_MC8 2
+%macro CHROMA_MC8 1
; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
; int stride, int h, int mx, int my)
-cglobal %1_h264_chroma_mc8_10_%2, 6,7,8
+cglobal %1_h264_chroma_mc8_10, 6,7,8
movsxdifnidn r2, r2d
mov r6d, r5d
or r6d, r4d
MV0_PIXELS_MC8
REP_RET
-.at_least_one_non_zero
+.at_least_one_non_zero:
mov r6d, 2
test r5d, r5d
je .x_interpolation
mov r6, r2 ; dxy = x ? 1 : stride
test r4d, r4d
jne .xy_interpolation
-.x_interpolation
+.x_interpolation:
; mx == 0 XOR my == 0 - 1 dimensional filter only
or r4d, r5d ; x + y
movd m5, r4d
SPLATW m5, m5 ; mm5 = B = x
psubw m4, m5 ; mm4 = A = 8-x
-.next1drow
+.next1drow:
movu m0, [r1 ] ; mm0 = src[0..7]
movu m2, [r1+r6] ; mm2 = src[1..8]
jne .next1drow
REP_RET
-.xy_interpolation ; general case, bilinear
+.xy_interpolation: ; general case, bilinear
movd m4, r4m ; x
movd m6, r5m ; y
movu m0, [r1 ] ; mm0 = src[0..7]
movu m1, [r1+2] ; mm1 = src[1..8]
-.next2drow
+.next2drow:
add r1, r2
pmullw m2, m0, m4
add r0, r2
%endmacro
-%macro CHROMA_MC4 2
-cglobal %1_h264_chroma_mc4_10_%2, 6,6,7
+%macro CHROMA_MC4 1
+cglobal %1_h264_chroma_mc4_10, 6,6,7
movsxdifnidn r2, r2d
movd m2, r4m ; x
movd m3, r5m ; y
pmullw m6, m2
paddw m6, m0
-.next2rows
+.next2rows:
MC4_OP m0, m6
MC4_OP m6, m0
sub r3d, 2
;-----------------------------------------------------------------------------
; void put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h, int mx, int my)
;-----------------------------------------------------------------------------
-%macro CHROMA_MC2 2
-cglobal %1_h264_chroma_mc2_10_%2, 6,7
+%macro CHROMA_MC2 1
+cglobal %1_h264_chroma_mc2_10, 6,7
movsxdifnidn r2, r2d
mov r6d, r4d
shl r4d, 16
pxor m7, m7
pshufw m2, [r1], 0x94 ; mm0 = src[0,1,1,2]
-.nextrow
+.nextrow:
add r1, r2
movq m1, m2
pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
%if %0==3
movq %2, %3
%endif
- PAVG %1, %2
+ pavgw %1, %2
%endmacro
%define CHROMAMC_AVG NOTHING
-INIT_XMM
-CHROMA_MC8 put, sse2
-%ifdef HAVE_AVX
-INIT_AVX
-CHROMA_MC8 put, avx
-%endif
-INIT_MMX
-CHROMA_MC4 put, mmxext
-CHROMA_MC2 put, mmxext
+INIT_XMM sse2
+CHROMA_MC8 put
+INIT_XMM avx
+CHROMA_MC8 put
+INIT_MMX mmxext
+CHROMA_MC4 put
+CHROMA_MC2 put
%define CHROMAMC_AVG AVG
-%define PAVG pavgw
-INIT_XMM
-CHROMA_MC8 avg, sse2
-%ifdef HAVE_AVX
-INIT_AVX
-CHROMA_MC8 avg, avx
-%endif
-INIT_MMX
-CHROMA_MC4 avg, mmxext
-CHROMA_MC2 avg, mmxext
+INIT_XMM sse2
+CHROMA_MC8 avg
+INIT_XMM avx
+CHROMA_MC8 avg
+INIT_MMX mmxext
+CHROMA_MC4 avg
+CHROMA_MC2 avg