br x10
.endm
-.macro smull1 a b c
+.macro smull1 a, b, c
smull \a, \b, \c
.endm
-.macro smlal1 a b c
+.macro smlal1 a, b, c
smlal \a, \b, \c
.endm
-.macro smlsl1 a b c
+.macro smlsl1 a, b, c
smlsl \a, \b, \c
.endm
-.macro idct_col4_top y1 y2 y3 y4 i l
- smull\i v7.4S, \y3\().\l, z2
- smull\i v16.4S, \y3\().\l, z6
- smull\i v17.4S, \y2\().\l, z1
+.macro idct_col4_top y1, y2, y3, y4, i, l
+ smull\i v7.4S, \y3\l, z1
+ smull\i v16.4S, \y3\l, z6
+ smull\i v17.4S, \y2\l, z1
add v19.4S, v23.4S, v7.4S
- smull\i v18.4S, \y2\().\l, z3
+ smull\i v18.4S, \y2\l, z3
add v20.4S, v23.4S, v16.4S
- smull\i v5.4S, \y2\().\l, z5
+ smull\i v5.4S, \y2\l, z5
sub v21.4S, v23.4S, v16.4S
- smull\i v6.4S, \y2\().\l, z7
+ smull\i v6.4S, \y2\l, z7
sub v22.4S, v23.4S, v7.4S
- smlal\i v17.4S, \y4\().\l, z3
- smlsl\i v18.4S, \y4\().\l, z7
- smlsl\i v5.4S, \y4\().\l, z1
- smlsl\i v6.4S, \y4\().\l, z5
+ smlal\i v17.4S, \y4\l, z3
+ smlsl\i v18.4S, \y4\l, z7
+ smlsl\i v5.4S, \y4\l, z1
+ smlsl\i v6.4S, \y4\l, z5
.endm
-.macro idct_row4_neon y1 y2 y3 y4 pass
+.macro idct_row4_neon y1, y2, y3, y4, pass
ld1 {\y1\().2D-\y2\().2D}, [x2], #32
movi v23.4S, #1<<2, lsl #8
orr v5.16B, \y1\().16B, \y2\().16B
mov x3, v5.D[1]
smlal v23.4S, \y1\().4H, z4
- idct_col4_top \y1 \y2 \y3 \y4 1 4H
+ idct_col4_top \y1, \y2, \y3, \y4, 1, .4H
cmp x3, #0
beq \pass\()f
trn2 \y4\().4S, v17.4S, v19.4S
.endm
-.macro declare_idct_col4_neon i l
+.macro declare_idct_col4_neon i, l
function idct_col4_neon\i
dup v23.4H, z4c
.if \i == 1
.endif
smull v23.4S, v23.4H, z4
- idct_col4_top v24 v25 v26 v27 \i \l
+ idct_col4_top v24, v25, v26, v27, \i, \l
mov x4, v28.D[\i - 1]
mov x5, v29.D[\i - 1]
cmp x4, #0
beq 1f
- smull\i v7.4S, v28.\l, z4
+ smull\i v7.4S, v28\l, z4
add v19.4S, v19.4S, v7.4S
sub v20.4S, v20.4S, v7.4S
sub v21.4S, v21.4S, v7.4S
cmp x5, #0
beq 2f
- smlal\i v17.4S, v29.\l, z5
- smlsl\i v18.4S, v29.\l, z1
- smlal\i v5.4S, v29.\l, z7
- smlal\i v6.4S, v29.\l, z3
+ smlal\i v17.4S, v29\l, z5
+ smlsl\i v18.4S, v29\l, z1
+ smlal\i v5.4S, v29\l, z7
+ smlal\i v6.4S, v29\l, z3
2: mov x5, v31.D[\i - 1]
cmp x4, #0
beq 3f
- smull\i v7.4S, v30.\l, z6
- smull\i v16.4S, v30.\l, z2
+ smull\i v7.4S, v30\l, z6
+ smull\i v16.4S, v30\l, z2
add v19.4S, v19.4S, v7.4S
sub v22.4S, v22.4S, v7.4S
sub v20.4S, v20.4S, v16.4S
3: cmp x5, #0
beq 4f
- smlal\i v17.4S, v31.\l, z7
- smlsl\i v18.4S, v31.\l, z5
- smlal\i v5.4S, v31.\l, z3
- smlsl\i v6.4S, v31.\l, z1
+ smlal\i v17.4S, v31\l, z7
+ smlsl\i v18.4S, v31\l, z5
+ smlal\i v5.4S, v31\l, z3
+ smlsl\i v6.4S, v31\l, z1
4: addhn v7.4H, v19.4S, v17.4S
addhn2 v7.8H, v20.4S, v18.4S
endfunc
.endm
-declare_idct_col4_neon 1 4H
-declare_idct_col4_neon 2 8H
+declare_idct_col4_neon 1, .4H
+declare_idct_col4_neon 2, .8H
function ff_simple_idct_put_neon, export=1
idct_start x2
- idct_row4_neon v24 v25 v26 v27 1
- idct_row4_neon v28 v29 v30 v31 2
+ idct_row4_neon v24, v25, v26, v27, 1
+ idct_row4_neon v28, v29, v30, v31, 2
bl idct_col4_neon1
sqshrun v1.8B, v7.8H, #COL_SHIFT-16
function ff_simple_idct_add_neon, export=1
idct_start x2
- idct_row4_neon v24 v25 v26 v27 1
- idct_row4_neon v28 v29 v30 v31 2
+ idct_row4_neon v24, v25, v26, v27, 1
+ idct_row4_neon v28, v29, v30, v31, 2
bl idct_col4_neon1
sshr v1.8H, V7.8H, #COL_SHIFT-16
idct_start x0
mov x2, x0
- idct_row4_neon v24 v25 v26 v27 1
- idct_row4_neon v28 v29 v30 v31 2
+ idct_row4_neon v24, v25, v26, v27, 1
+ idct_row4_neon v28, v29, v30, v31, 2
add x2, x2, #-128
bl idct_col4_neon1