#include "asm.S"
-.fpu neon
-
.section .rodata
.align 4
pmovmskb_byte:
vsub.s16 d3, d3, d0
vst1.64 {d3}, [r0,:64]
QUANT_END d3
-.endfunc
+endfunc
// quant_4x4_dc( int16_t dct[16], int mf, int bias )
function x264_quant_4x4_dc_neon
QUANT_TWO q0, q0, d4, d5, d4, d5, q0
vorr d0, d0, d1
QUANT_END d0
-.endfunc
+endfunc
// quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4_neon
QUANT_TWO q0, q1, d4, d5, d6, d7, q0
vorr d0, d0, d1
QUANT_END d0
-.endfunc
+endfunc
// quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4x4_neon
orrne r0, #8
vpop {d8-d15}
bx lr
-.endfunc
+endfunc
// quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
function x264_quant_8x8_neon
.endr
vorr d0, d0, d1
QUANT_END d0
-.endfunc
+endfunc
.macro DEQUANT_START mf_size offset dc=no
mov r3, #0x2b
bgt dequant_\size\()_rshift_loop
.endif
bx lr
-.endfunc
+endfunc
.endm
DEQUANT 4x4, 4
vmovn.s32 d3, q13
vst1.16 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
// int coeff_last( int16_t *l )
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
-.endfunc
+endfunc
function x264_coeff_last8_arm
ldrd r2, r3, [r0, #8]
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
-.endfunc
+endfunc
.macro COEFF_LAST_1x size
function x264_coeff_last\size\()_neon
subslt r0, r3, r0, lsr #2
movlt r0, #0
bx lr
-.endfunc
+endfunc
.endm
COEFF_LAST_1x 15
subslt r0, ip, r0
movlt r0, #0
bx lr
-.endfunc
+endfunc