%else
%define p16 [pw_16]
%endif
-.nextrow
+.nextrow:
%if %0 == 4
movu m2, [r1-4]
movu m3, [r1-2]
%macro MC10 3-4
cglobal_mc %1, %2, mc10, %3, 3,5,9
mov r4, r1
-.body
+.body:
mov r3d, %3
mova m1, [pw_pixel_max]
%if num_mmregs > 8
%else
%define p16 [pw_16]
%endif
-.nextrow
+.nextrow:
%if %0 == 4
movu m2, [r1-4]
movu m3, [r1-2]
%macro MC01 3
cglobal_mc %1, %2, mc01, %3, 3,5,8
mov r4, r1
-.body
+.body:
PRELOAD_V
sub r4, r2
; this REALLY needs x86_64
cglobal_mc %1, %2, mc11, %3, 3,6,8
mov r4, r1
-.body
+.body:
PRELOAD_V
sub r0, r2
call put_hv%3_10_%1
xor r4d, r4d
-.body
+.body:
mov r3d, %3
pxor m0, m0
mova m7, [pw_pixel_max]
mov r3d, %2
xor r4d, r4d
mova m6, [pad20]
-.nextrow
+.nextrow:
movu m2, [r5-4]
movu m3, [r5-2]
movu m4, [r5+0]
%macro MC21 3
cglobal_mc %1, %2, mc21, %3, 3,7,12
mov r5, r1
-.body
+.body:
%define PAD mmsize*8*3*2 ; SIZE*16*4*sizeof(pixel)
mov r6, rsp ; backup stack pointer
and rsp, ~(mmsize-1) ; align stack