#include "neon.S"
function ff_h264_idct_add_neon, export=1
+.L_ff_h264_idct_add_neon:
ld1 {v0.4H, v1.4H, v2.4H, v3.4H}, [x1]
sxtw x2, w2
movi v30.8H, #0
endfunc
function ff_h264_idct_dc_add_neon, export=1
+.L_ff_h264_idct_dc_add_neon:
sxtw x2, w2
mov w3, #0
ld1r {v2.8H}, [x1]
mov w9, w3 // stride
movrel x7, scan8
mov x10, #16
- movrel x13, X(ff_h264_idct_dc_add_neon)
- movrel x14, X(ff_h264_idct_add_neon)
+ movrel x13, .L_ff_h264_idct_dc_add_neon
+ movrel x14, .L_ff_h264_idct_add_neon
1: mov w2, w9
ldrb w3, [x7], #1
ldrsw x0, [x5], #4
mov w9, w3 // stride
movrel x7, scan8
mov x10, #16
- movrel x13, X(ff_h264_idct_dc_add_neon)
- movrel x14, X(ff_h264_idct_add_neon)
+ movrel x13, .L_ff_h264_idct_dc_add_neon
+ movrel x14, .L_ff_h264_idct_add_neon
1: mov w2, w9
ldrb w3, [x7], #1
ldrsw x0, [x5], #4
add x5, x1, #16*4 // block_offset
add x9, x2, #16*32 // block
mov w19, w3 // stride
- movrel x13, X(ff_h264_idct_dc_add_neon)
- movrel x14, X(ff_h264_idct_add_neon)
+ movrel x13, .L_ff_h264_idct_dc_add_neon
+ movrel x14, .L_ff_h264_idct_add_neon
movrel x7, scan8, 16
mov x10, #0
mov x11, #16
.endm
function ff_h264_idct8_add_neon, export=1
+.L_ff_h264_idct8_add_neon:
movi v19.8H, #0
sxtw x2, w2
ld1 {v24.8H, v25.8H}, [x1]
endfunc
function ff_h264_idct8_dc_add_neon, export=1
+.L_ff_h264_idct8_dc_add_neon:
mov w3, #0
sxtw x2, w2
ld1r {v31.8H}, [x1]
mov w2, w3
movrel x7, scan8
mov w10, #16
- movrel x13, X(ff_h264_idct8_dc_add_neon)
- movrel x14, X(ff_h264_idct8_add_neon)
+ movrel x13, .L_ff_h264_idct8_dc_add_neon
+ movrel x14, .L_ff_h264_idct8_add_neon
1: ldrb w9, [x7], #4
ldrsw x0, [x5], #16
ldrb w9, [x4, w9, UXTW]