ELF .eabi_attribute 25, \val
.endm
-.macro function name
- .global EXTERN_ASM\name
+.macro function name, export=1
.align 2
+.if \export == 1
+ .global EXTERN_ASM\name
+ELF .hidden EXTERN_ASM\name
+ELF .type EXTERN_ASM\name, %function
+ .func EXTERN_ASM\name
EXTERN_ASM\name:
+.else
ELF .hidden \name
ELF .type \name, %function
.func \name
\name:
+.endif
.endm
.macro movrel rd, val
#endif
.endm
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+#define X(s) JOIN(EXTERN_ASM, s)
+
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
// return: 0 on success
// 1 if counters were already enabled
// 9 if lo-res counters were already enabled
-function x264_cpu_enable_armv7_counter
+function x264_cpu_enable_armv7_counter, export=0
mrc p15, 0, r2, c9, c12, 0 // read PMNC
ands r0, r2, #1
andne r0, r2, #9
bx lr
.endfunc
-function x264_cpu_disable_armv7_counter
+function x264_cpu_disable_armv7_counter, export=0
mrc p15, 0, r0, c9, c12, 0 // read PMNC
bic r0, r0, #1 // disable counters
mcr p15, 0, r0, c9, c12, 0 // write PMNC
bx lr
.endfunc
-function x264_sub8x4_dct_neon
+function x264_sub8x4_dct_neon, export=0
vld1.64 {d0}, [r1,:64], r3
vld1.64 {d1}, [r2,:64], ip
vsubl.u8 q8, d0, d1
function x264_sub16x16_dct8_neon
push {lr}
- bl x264_sub8x8_dct8_neon
+ bl X(x264_sub8x8_dct8_neon)
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
- bl x264_sub8x8_dct8_neon
+ bl X(x264_sub8x8_dct8_neon)
sub r1, r1, #8
sub r2, r2, #8
- bl x264_sub8x8_dct8_neon
+ bl X(x264_sub8x8_dct8_neon)
pop {lr}
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
- b x264_sub8x8_dct8_neon
+ b X(x264_sub8x8_dct8_neon)
.endfunc
bx lr
.endfunc
-function x264_add8x4_idct_neon
+function x264_add8x4_idct_neon, export=0
vld1.64 {d0-d3}, [r1,:128]!
IDCT_1D d16, d18, d20, d22, d0, d1, d2, d3
vld1.64 {d4-d7}, [r1,:128]!
function x264_add16x16_idct8_neon
mov ip, lr
- bl x264_add8x8_idct8_neon
+ bl X(x264_add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
- bl x264_add8x8_idct8_neon
+ bl X(x264_add8x8_idct8_neon)
sub r0, r0, #8
- bl x264_add8x8_idct8_neon
+ bl X(x264_add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
mov lr, ip
- b x264_add8x8_idct8_neon
+ b X(x264_add8x8_idct8_neon)
.endfunc
.endfunc
.macro MEMCPY_ALIGNED srcalign dstalign
-function memcpy_aligned_\dstalign\()_\srcalign\()_neon
+function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0
mov r3, r0
.if \srcalign == 8 && \dstalign == 8
sub r2, #16
.endm
.macro AVG_WEIGHT ext
-function x264_pixel_avg_weight_w4_\ext\()_neon
+function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_weight_w8_\ext\()_neon
+function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #4
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_weight_w16_\ext\()_neon
+function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
AVG_WEIGHT add_sub
AVG_WEIGHT sub_add
-function x264_pixel_avg_w4_neon
+function x264_pixel_avg_w4_neon, export=0
subs lr, lr, #2
vld1.32 {d0[]}, [r2], r3
vld1.32 {d2[]}, [r4], r5
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_w8_neon
+function x264_pixel_avg_w8_neon, export=0
subs lr, lr, #4
vld1.64 {d0}, [r2], r3
vld1.64 {d2}, [r4], r5
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_w16_neon
+function x264_pixel_avg_w16_neon, export=0
subs lr, lr, #4
vld1.64 {d0-d1}, [r2], r3
vld1.64 {d2-d3}, [r4], r5
bgt var16_loop
.endfunc
-function x264_var_end
+function x264_var_end, export=0
vpaddl.u16 q8, q14
vpaddl.u16 q9, q15
vadd.u32 q1, q1, q8
SUMSUB_AB q10, q11, q2, q3
.endfunc
-function x264_satd_4x8_8x4_end_neon
+function x264_satd_4x8_8x4_end_neon, export=0
vadd.s16 q0, q8, q10
vadd.s16 q1, q9, q11
vsub.s16 q2, q8, q10
bx lr
.endfunc
-function x264_satd_8x8_neon
+function x264_satd_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
vld1.64 {d7}, [r2], r3
SUMSUB_AB q0, q1, q8, q9
.endfunc
// one vertical hadamard pass and two horizontal
-function x264_satd_8x4v_8x8h_neon
+function x264_satd_8x4v_8x8h_neon, export=0
SUMSUB_ABCD q0, q1, q2, q3, q12, q13, q14, q15
vtrn.16 q8, q9
SUMSUB_AB q12, q14, q0, q2
bx lr
.endfunc
-function x264_satd_16x4_neon
+function x264_satd_16x4_neon, export=0
vld1.64 {d2-d3}, [r2], r3
vld1.64 {d0-d1}, [r0,:128], r1
vsubl.u8 q8, d0, d2
SUMSUB_ABCD \r1, \r3, \r2, \r4, \t1, \t3, \t2, \t4
.endm
-function x264_sa8d_8x8_neon
+function x264_sa8d_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
vld1.64 {d7}, [r2], r3
SUMSUB_AB q0, q1, q8, q9
HADAMARD_AC 16, 16
// q4: satd q5: sa8d q6: mask_ac4 q7: mask_ac8
-function x264_hadamard_ac_8x8_neon
+function x264_hadamard_ac_8x8_neon, export=0
vld1.64 {d2}, [r0,:64], r1
vld1.64 {d3}, [r0,:64], r1
vaddl.u8 q0, d2, d3