The integrated assembler in llvm trunk (to be released as 3.5) is
otherwise capable enough to assemble the arm asm correctly.
+#if HAVE_AS_FUNC
+# define FUNC
+#else
+# define FUNC @
+#endif
+
.macro require8, val=1
ELF .eabi_attribute 24, \val
.endm
.macro require8, val=1
ELF .eabi_attribute 24, \val
.endm
.endm
.macro function name, export=1
.endm
.macro function name, export=1
+ .macro endfunc
+ELF .size \name, . - \name
+FUNC .endfunc
+ .purgem endfunc
+ .endm
.align 2
.if \export == 1
.global EXTERN_ASM\name
ELF .hidden EXTERN_ASM\name
ELF .type EXTERN_ASM\name, %function
.align 2
.if \export == 1
.global EXTERN_ASM\name
ELF .hidden EXTERN_ASM\name
ELF .type EXTERN_ASM\name, %function
+FUNC .func EXTERN_ASM\name
EXTERN_ASM\name:
.else
ELF .hidden \name
ELF .type \name, %function
EXTERN_ASM\name:
.else
ELF .hidden \name
ELF .type \name, %function
function x264_cpu_neon_test
vadd.i16 q0, q0, q0
bx lr
function x264_cpu_neon_test
vadd.i16 q0, q0, q0
bx lr
// return: 0 on success
// 1 if counters were already enabled
// return: 0 on success
// 1 if counters were already enabled
mov r2, #1 << 31 // enable cycle counter
mcr p15, 0, r2, c9, c12, 1 // write CNTENS
bx lr
mov r2, #1 << 31 // enable cycle counter
mcr p15, 0, r2, c9, c12, 1 // write CNTENS
bx lr
function x264_cpu_disable_armv7_counter, export=0
mrc p15, 0, r0, c9, c12, 0 // read PMNC
bic r0, r0, #1 // disable counters
mcr p15, 0, r0, c9, c12, 0 // write PMNC
bx lr
function x264_cpu_disable_armv7_counter, export=0
mrc p15, 0, r0, c9, c12, 0 // read PMNC
bic r0, r0, #1 // disable counters
mcr p15, 0, r0, c9, c12, 0 // write PMNC
bx lr
cmp r0, #10
movgt r0, #0
pop {r4-r6,pc}
cmp r0, #10
movgt r0, #0
pop {r4-r6,pc}
vrhadd.s16 d3, d6, d7
vst1.64 {d0-d3}, [r0,:128]
bx lr
vrhadd.s16 d3, d6, d7
vst1.64 {d0-d3}, [r0,:128]
bx lr
function x264_idct4x4dc_neon
vld1.64 {d0-d3}, [r0,:128]
function x264_idct4x4dc_neon
vld1.64 {d0-d3}, [r0,:128]
HADAMARD 2, sumsub, d3, d2, d6, d7
vst1.64 {d0-d3}, [r0,:128]
bx lr
HADAMARD 2, sumsub, d3, d2, d6, d7
vst1.64 {d0-d3}, [r0,:128]
bx lr
.macro DCT_1D d0 d1 d2 d3 d4 d5 d6 d7
.macro DCT_1D d0 d1 d2 d3 d4 d5 d6 d7
DCT_1D d4, d5, d6, d7, d0, d1, d2, d3
vst1.64 {d4-d7}, [r0,:128]
bx lr
DCT_1D d4, d5, d6, d7, d0, d1, d2, d3
vst1.64 {d4-d7}, [r0,:128]
bx lr
function x264_sub8x4_dct_neon, export=0
vld1.64 {d0}, [r1,:64], r3
function x264_sub8x4_dct_neon, export=0
vld1.64 {d0}, [r1,:64], r3
vst1.64 {d4-d5}, [r0,:128]!
vst1.64 {d6-d7}, [r0,:128]!
bx lr
vst1.64 {d4-d5}, [r0,:128]!
vst1.64 {d6-d7}, [r0,:128]!
bx lr
function x264_sub8x8_dct_neon
push {lr}
function x264_sub8x8_dct_neon
push {lr}
bl x264_sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
function x264_sub16x16_dct_neon
push {lr}
function x264_sub16x16_dct_neon
push {lr}
bl x264_sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
vst1.64 {d24-d27}, [r0,:128]!
vst1.64 {d28-d31}, [r0,:128]!
bx lr
vst1.64 {d24-d27}, [r0,:128]!
vst1.64 {d28-d31}, [r0,:128]!
bx lr
function x264_sub16x16_dct8_neon
push {lr}
function x264_sub16x16_dct8_neon
push {lr}
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
b X(x264_sub8x8_dct8_neon)
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
b X(x264_sub8x8_dct8_neon)
// First part of IDCT (minus final SUMSUB_BA)
// First part of IDCT (minus final SUMSUB_BA)
vst1.32 {d2[1]}, [r0,:32], r2
vst1.32 {d2[0]}, [r0,:32], r2
bx lr
vst1.32 {d2[1]}, [r0,:32], r2
vst1.32 {d2[0]}, [r0,:32], r2
bx lr
function x264_add8x4_idct_neon, export=0
vld1.64 {d0-d3}, [r1,:128]!
function x264_add8x4_idct_neon, export=0
vld1.64 {d0-d3}, [r1,:128]!
vst1.32 {d2}, [r0,:64], r2
vst1.32 {d3}, [r0,:64], r2
bx lr
vst1.32 {d2}, [r0,:64], r2
vst1.32 {d3}, [r0,:64], r2
bx lr
function x264_add8x8_idct_neon
mov r2, #FDEC_STRIDE
function x264_add8x8_idct_neon
mov r2, #FDEC_STRIDE
bl x264_add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
bl x264_add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
function x264_add16x16_idct_neon
mov r2, #FDEC_STRIDE
function x264_add16x16_idct_neon
mov r2, #FDEC_STRIDE
bl x264_add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
bl x264_add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
vst1.64 {d6}, [r0,:64], r2
vst1.64 {d7}, [r0,:64], r2
bx lr
vst1.64 {d6}, [r0,:64], r2
vst1.64 {d7}, [r0,:64], r2
bx lr
function x264_add16x16_idct8_neon
mov ip, lr
function x264_add16x16_idct8_neon
mov ip, lr
sub r0, r0, #8*FDEC_STRIDE-8
mov lr, ip
b X(x264_add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
mov lr, ip
b X(x264_add8x8_idct8_neon)
function x264_add8x8_idct_dc_neon
function x264_add8x8_idct_dc_neon
vst1.64 {d6}, [r0,:64], r2
vst1.64 {d7}, [r0,:64], r2
bx lr
vst1.64 {d6}, [r0,:64], r2
vst1.64 {d7}, [r0,:64], r2
bx lr
.macro ADD16x4_IDCT_DC dc
vld1.64 {d16-d17}, [r0,:128], r3
.macro ADD16x4_IDCT_DC dc
vld1.64 {d16-d17}, [r0,:128], r3
ADD16x4_IDCT_DC d2
ADD16x4_IDCT_DC d3
bx lr
ADD16x4_IDCT_DC d2
ADD16x4_IDCT_DC d3
bx lr
function x264_sub8x8_dct_dc_neon
mov r3, #FENC_STRIDE
function x264_sub8x8_dct_dc_neon
mov r3, #FENC_STRIDE
vpadd.s16 d0, d0, d1
vst1.64 {d0}, [r0,:64]
bx lr
vpadd.s16 d0, d0, d1
vst1.64 {d0}, [r0,:64]
bx lr
function x264_zigzag_scan_4x4_frame_neon
function x264_zigzag_scan_4x4_frame_neon
vtbl.8 d7, {d2-d3}, d19
vst1.64 {d4-d7}, [r0,:128]
bx lr
vtbl.8 d7, {d2-d3}, d19
vst1.64 {d4-d7}, [r0,:128]
bx lr
function x264_deblock_h_luma_neon
h264_loop_filter_start
function x264_deblock_h_luma_neon
h264_loop_filter_start
.macro h264_loop_filter_chroma
vdup.8 q11, r2 // alpha
.macro h264_loop_filter_chroma
vdup.8 q11, r2 // alpha
vst2.8 {d0, d1}, [r0,:128], r1
bx lr
vst2.8 {d0, d1}, [r0,:128], r1
bx lr
function x264_deblock_h_chroma_neon
h264_loop_filter_start
function x264_deblock_h_chroma_neon
h264_loop_filter_start
vst1.8 {d3}, [r0], r1
bx lr
vst1.8 {d3}, [r0], r1
bx lr
function x264_deblock_strength_neon
ldr ip, [sp]
function x264_deblock_strength_neon
ldr ip, [sp]
vst1.8 {q8}, [r3,:128] @ bs[0]
bx lr
vst1.8 {q8}, [r3,:128] @ bs[0]
bx lr
pld [r3, r1, lsl #1]
pld [r3, r2]
bx lr
pld [r3, r1, lsl #1]
pld [r3, r2]
bx lr
// void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y,
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
// void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y,
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
pld [ip]
pld [ip, r3]
pop {pc}
pld [ip]
pld [ip, r3]
pop {pc}
// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
movrel ip, memcpy_table
and r3, r3, #0xc
ldr pc, [ip, r3]
movrel ip, memcpy_table
and r3, r3, #0xc
ldr pc, [ip, r3]
.macro MEMCPY_ALIGNED srcalign dstalign
function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0
.macro MEMCPY_ALIGNED srcalign dstalign
function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0
vst1.64 {d0}, [r3,:64]!
.endif
bx lr
vst1.64 {d0}, [r3,:64]!
.endif
bx lr
.endm
MEMCPY_ALIGNED 16, 16
.endm
MEMCPY_ALIGNED 16, 16
.endr
bgt memzero_loop
bx lr
.endr
bgt memzero_loop
bx lr
// void pixel_avg( uint8_t *dst, intptr_t dst_stride,
// void pixel_avg( uint8_t *dst, intptr_t dst_stride,
cmp ip, #0
bge x264_pixel_avg_weight_w\w\()_add_add_neon
b x264_pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
cmp ip, #0
bge x264_pixel_avg_weight_w\w\()_add_add_neon
b x264_pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
vst1.32 {d1[0]}, [r0,:32], r1
bgt 1b
pop {r4-r6,pc}
vst1.32 {d1[0]}, [r0,:32], r1
bgt 1b
pop {r4-r6,pc}
function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
load_weights_\ext
function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
load_weights_\ext
vst1.64 {d3}, [r0,:64], r1
bgt 1b
pop {r4-r6,pc}
vst1.64 {d3}, [r0,:64], r1
bgt 1b
pop {r4-r6,pc}
function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
load_weights_\ext
function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
load_weights_\ext
vst1.64 {d2-d3}, [r0,:128], r1
bgt 1b
pop {r4-r6,pc}
vst1.64 {d2-d3}, [r0,:128], r1
bgt 1b
pop {r4-r6,pc}
vst1.32 {d1[0]}, [r0,:32], r1
bgt x264_pixel_avg_w4_neon
pop {r4-r6,pc}
vst1.32 {d1[0]}, [r0,:32], r1
bgt x264_pixel_avg_w4_neon
pop {r4-r6,pc}
function x264_pixel_avg_w8_neon, export=0
subs lr, lr, #4
function x264_pixel_avg_w8_neon, export=0
subs lr, lr, #4
vst1.64 {d3}, [r0,:64], r1
bgt x264_pixel_avg_w8_neon
pop {r4-r6,pc}
vst1.64 {d3}, [r0,:64], r1
bgt x264_pixel_avg_w8_neon
pop {r4-r6,pc}
function x264_pixel_avg_w16_neon, export=0
subs lr, lr, #4
function x264_pixel_avg_w16_neon, export=0
subs lr, lr, #4
vst1.64 {d6-d7}, [r0,:128], r1
bgt x264_pixel_avg_w16_neon
pop {r4-r6,pc}
vst1.64 {d6-d7}, [r0,:128], r1
bgt x264_pixel_avg_w16_neon
pop {r4-r6,pc}
function x264_pixel_avg2_w4_neon
function x264_pixel_avg2_w4_neon
vst1.32 {d1[0]}, [r0,:32], r1
bgt avg2_w4_loop
pop {pc}
vst1.32 {d1[0]}, [r0,:32], r1
bgt avg2_w4_loop
pop {pc}
function x264_pixel_avg2_w8_neon
ldr ip, [sp, #4]
function x264_pixel_avg2_w8_neon
ldr ip, [sp, #4]
vst1.64 {d1}, [r0,:64], r1
bgt avg2_w8_loop
pop {pc}
vst1.64 {d1}, [r0,:64], r1
bgt avg2_w8_loop
pop {pc}
function x264_pixel_avg2_w16_neon
ldr ip, [sp, #4]
function x264_pixel_avg2_w16_neon
ldr ip, [sp, #4]
vst1.64 {d4-d5}, [r0,:128], r1
bgt avg2_w16_loop
pop {pc}
vst1.64 {d4-d5}, [r0,:128], r1
bgt avg2_w16_loop
pop {pc}
function x264_pixel_avg2_w20_neon
ldr ip, [sp, #4]
function x264_pixel_avg2_w20_neon
ldr ip, [sp, #4]
vst1.32 {d6[0]}, [r0,:32], r1
bgt avg2_w20_loop
pop {pc}
vst1.32 {d6[0]}, [r0,:32], r1
bgt avg2_w20_loop
pop {pc}
.macro weight_prologue type
.macro weight_prologue type
vst1.32 {d20[1]}, [r0,:32], r1
bgt weight20_loop
pop {r4-r5,pc}
vst1.32 {d20[1]}, [r0,:32], r1
bgt weight20_loop
pop {r4-r5,pc}
function x264_mc_weight_w16_neon
weight_prologue full
function x264_mc_weight_w16_neon
weight_prologue full
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_loop
pop {r4-r5,pc}
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_loop
pop {r4-r5,pc}
function x264_mc_weight_w8_neon
weight_prologue full
function x264_mc_weight_w8_neon
weight_prologue full
vst1.8 {d18}, [r0,:64], r1
bgt weight8_loop
pop {r4-r5,pc}
vst1.8 {d18}, [r0,:64], r1
bgt weight8_loop
pop {r4-r5,pc}
function x264_mc_weight_w4_neon
weight_prologue full
function x264_mc_weight_w4_neon
weight_prologue full
vst1.32 {d16[1]}, [r0,:32], r1
bgt weight4_loop
pop {r4-r5,pc}
vst1.32 {d16[1]}, [r0,:32], r1
bgt weight4_loop
pop {r4-r5,pc}
function x264_mc_weight_w20_nodenom_neon
weight_prologue nodenom
function x264_mc_weight_w20_nodenom_neon
weight_prologue nodenom
vst1.32 {d20[1]}, [r0,:32], r1
bgt weight20_nodenom_loop
pop {r4-r5,pc}
vst1.32 {d20[1]}, [r0,:32], r1
bgt weight20_nodenom_loop
pop {r4-r5,pc}
function x264_mc_weight_w16_nodenom_neon
weight_prologue nodenom
function x264_mc_weight_w16_nodenom_neon
weight_prologue nodenom
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_nodenom_loop
pop {r4-r5,pc}
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_nodenom_loop
pop {r4-r5,pc}
function x264_mc_weight_w8_nodenom_neon
weight_prologue nodenom
function x264_mc_weight_w8_nodenom_neon
weight_prologue nodenom
vst1.8 {d17}, [r0,:64], r1
bgt weight8_nodenom_loop
pop {r4-r5,pc}
vst1.8 {d17}, [r0,:64], r1
bgt weight8_nodenom_loop
pop {r4-r5,pc}
function x264_mc_weight_w4_nodenom_neon
weight_prologue nodenom
function x264_mc_weight_w4_nodenom_neon
weight_prologue nodenom
vst1.32 {d16[1]}, [r0,:32], r1
bgt weight4_nodenom_loop
pop {r4-r5,pc}
vst1.32 {d16[1]}, [r0,:32], r1
bgt weight4_nodenom_loop
pop {r4-r5,pc}
.macro weight_simple_prologue
push {lr}
.macro weight_simple_prologue
push {lr}
vst1.8 {d19-d21}, [r0,:64], r1
bgt weight20_\name\()_loop
pop {pc}
vst1.8 {d19-d21}, [r0,:64], r1
bgt weight20_\name\()_loop
pop {pc}
function x264_mc_weight_w16_\name\()_neon
weight_simple_prologue
function x264_mc_weight_w16_\name\()_neon
weight_simple_prologue
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_\name\()_loop
pop {pc}
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_\name\()_loop
pop {pc}
function x264_mc_weight_w8_\name\()_neon
weight_simple_prologue
function x264_mc_weight_w8_\name\()_neon
weight_simple_prologue
vst1.8 {d17}, [r0,:64], r1
bgt weight8_\name\()_loop
pop {pc}
vst1.8 {d17}, [r0,:64], r1
bgt weight8_\name\()_loop
pop {pc}
function x264_mc_weight_w4_\name\()_neon
weight_simple_prologue
function x264_mc_weight_w4_\name\()_neon
weight_simple_prologue
vst1.32 {d17[0]}, [r0,:32], r1
bgt weight4_\name\()_loop
pop {pc}
vst1.32 {d17[0]}, [r0,:32], r1
bgt weight4_\name\()_loop
pop {pc}
.endm
weight_simple offsetadd, vqadd.u8
.endm
weight_simple offsetadd, vqadd.u8
vst1.32 {d3[0]}, [r0,:32], r1
bgt copy_w4_loop
bx lr
vst1.32 {d3[0]}, [r0,:32], r1
bgt copy_w4_loop
bx lr
function x264_mc_copy_w8_neon
ldr ip, [sp]
function x264_mc_copy_w8_neon
ldr ip, [sp]
vst1.32 {d3}, [r0,:64], r1
bgt copy_w8_loop
bx lr
vst1.32 {d3}, [r0,:64], r1
bgt copy_w8_loop
bx lr
function x264_mc_copy_w16_neon
ldr ip, [sp]
function x264_mc_copy_w16_neon
ldr ip, [sp]
vst1.32 {d6-d7}, [r0,:128], r1
bgt copy_w16_loop
bx lr
vst1.32 {d6-d7}, [r0,:128], r1
bgt copy_w16_loop
bx lr
function x264_mc_copy_w16_aligned_neon
ldr ip, [sp]
function x264_mc_copy_w16_aligned_neon
ldr ip, [sp]
vst1.32 {d6-d7}, [r0,:128], r1
bgt copy_w16_aligned_loop
bx lr
vst1.32 {d6-d7}, [r0,:128], r1
bgt copy_w16_aligned_loop
bx lr
// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride,
// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride,
vpop {d8-d11}
pop {r4-r8, pc}
vpop {d8-d11}
pop {r4-r8, pc}
// hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width )
// hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width )
vst1.64 {d0-d1}, [r0,:128]!
bgt filter_v_loop
pop {pc}
vst1.64 {d0-d1}, [r0,:128]!
bgt filter_v_loop
pop {pc}
// hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
function x264_hpel_filter_c_neon
// hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
function x264_hpel_filter_c_neon
vst1.64 {d30-d31}, [r0,:128]!
bgt filter_c_loop
bx lr
vst1.64 {d30-d31}, [r0,:128]!
bgt filter_c_loop
bx lr
// hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
function x264_hpel_filter_h_neon
// hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
function x264_hpel_filter_h_neon
vst1.64 {d6-d7}, [r0,:128]!
bgt filter_h_loop
bx lr
vst1.64 {d6-d7}, [r0,:128]!
bgt filter_h_loop
bx lr
// frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
// frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
vpop {d8-d15}
pop {r4-r10,pc}
vpop {d8-d15}
pop {r4-r10,pc}
function x264_load_deinterleave_chroma_fdec_neon
mov ip, #FDEC_STRIDE/2
function x264_load_deinterleave_chroma_fdec_neon
mov ip, #FDEC_STRIDE/2
function x264_load_deinterleave_chroma_fenc_neon
mov ip, #FENC_STRIDE/2
function x264_load_deinterleave_chroma_fenc_neon
mov ip, #FENC_STRIDE/2
function x264_plane_copy_deinterleave_neon
push {r4-r7, lr}
function x264_plane_copy_deinterleave_neon
push {r4-r7, lr}
bgt block
pop {r4-r7, pc}
bgt block
pop {r4-r7, pc}
function x264_plane_copy_deinterleave_rgb_neon
push {r4-r8, r10, r11, lr}
function x264_plane_copy_deinterleave_rgb_neon
push {r4-r8, r10, r11, lr}
bgt block4
pop {r4-r8, r10, r11, pc}
bgt block4
pop {r4-r8, r10, r11, pc}
function x264_plane_copy_interleave_neon
push {r4-r7, lr}
function x264_plane_copy_interleave_neon
push {r4-r7, lr}
bgt blocki
pop {r4-r7, pc}
bgt blocki
pop {r4-r7, pc}
function x264_store_interleave_chroma_neon
push {lr}
function x264_store_interleave_chroma_neon
push {lr}
.endr
usada8 r0, r6, lr, ip
pop {r4-r6,pc}
.endr
usada8 r0, r6, lr, ip
pop {r4-r6,pc}
vpaddl.u16 d0, d0
vmov.u32 r0, d0[0]
bx lr
vpaddl.u16 d0, d0
vmov.u32 r0, d0[0]
bx lr
vpaddl.u16 d0, d0
vmov.u32 r0, d0[0]
bx lr
vpaddl.u16 d0, d0
vmov.u32 r0, d0[0]
bx lr
vst1.32 {d0-d1}, [r7]
.endif
pop {r6-r7,pc}
vst1.32 {d0-d1}, [r7]
.endif
pop {r6-r7,pc}
vpadd.s32 d0, d0, d0
vmov.32 r0, d0[0]
bx lr
vpadd.s32 d0, d0, d0
vmov.32 r0, d0[0]
bx lr
vld1.64 {d26}, [r0,:64], r1
VAR_SQR_SUM q2, q10, q15, d26
b x264_var_end
vld1.64 {d26}, [r0,:64], r1
VAR_SQR_SUM q2, q10, q15, d26
b x264_var_end
function x264_pixel_var_8x16_neon
vld1.64 {d16}, [r0,:64], r1
function x264_pixel_var_8x16_neon
vld1.64 {d16}, [r0,:64], r1
2:
VAR_SQR_SUM q2, q13, q15, d22
b x264_var_end
2:
VAR_SQR_SUM q2, q13, q15, d22
b x264_var_end
function x264_pixel_var_16x16_neon
vld1.64 {d16-d17}, [r0,:128], r1
function x264_pixel_var_16x16_neon
vld1.64 {d16-d17}, [r0,:128], r1
VAR_SQR_SUM q1, q12, q14, d18
VAR_SQR_SUM q2, q13, q15, d19
bgt var16_loop
VAR_SQR_SUM q1, q12, q14, d18
VAR_SQR_SUM q2, q13, q15, d19
bgt var16_loop
function x264_var_end, export=0
vpaddl.u16 q8, q14
function x264_var_end, export=0
vpaddl.u16 q8, q14
.macro DIFF_SUM diff da db lastdiff
vld1.64 {\da}, [r0,:64], r1
.macro DIFF_SUM diff da db lastdiff
vld1.64 {\da}, [r0,:64], r1
mul r0, r0, r0
sub r0, r1, r0, lsr #6
bx lr
mul r0, r0, r0
sub r0, r1, r0, lsr #6
bx lr
function x264_pixel_var2_8x16_neon
vld1.64 {d16}, [r0,:64], r1
function x264_pixel_var2_8x16_neon
vld1.64 {d16}, [r0,:64], r1
mul r0, r0, r0
sub r0, r1, r0, lsr #7
bx lr
mul r0, r0, r0
sub r0, r1, r0, lsr #7
bx lr
.macro LOAD_DIFF_8x4 q0 q1 q2 q3
vld1.32 {d1}, [r2], r3
.macro LOAD_DIFF_8x4 q0 q1 q2 q3
vld1.32 {d1}, [r2], r3
HORIZ_ADD d0, d0, d1
vmov.32 r0, d0[0]
bx lr
HORIZ_ADD d0, d0, d1
vmov.32 r0, d0[0]
bx lr
function x264_pixel_satd_4x8_neon
vld1.32 {d1[]}, [r2], r3
function x264_pixel_satd_4x8_neon
vld1.32 {d1[]}, [r2], r3
vsubl.u8 q3, d6, d7
SUMSUB_AB q10, q11, q2, q3
b x264_satd_4x8_8x4_end_neon
vsubl.u8 q3, d6, d7
SUMSUB_AB q10, q11, q2, q3
b x264_satd_4x8_8x4_end_neon
function x264_pixel_satd_8x4_neon
vld1.64 {d1}, [r2], r3
function x264_pixel_satd_8x4_neon
vld1.64 {d1}, [r2], r3
vld1.64 {d6}, [r0,:64], r1
vsubl.u8 q3, d6, d7
SUMSUB_AB q10, q11, q2, q3
vld1.64 {d6}, [r0,:64], r1
vsubl.u8 q3, d6, d7
SUMSUB_AB q10, q11, q2, q3
function x264_satd_4x8_8x4_end_neon, export=0
vadd.s16 q0, q8, q10
function x264_satd_4x8_8x4_end_neon, export=0
vadd.s16 q0, q8, q10
HORIZ_ADD d0, d0, d1
vmov.32 r0, d0[0]
bx lr
HORIZ_ADD d0, d0, d1
vmov.32 r0, d0[0]
bx lr
function x264_pixel_satd_8x8_neon
mov ip, lr
function x264_pixel_satd_8x8_neon
mov ip, lr
mov lr, ip
vmov.32 r0, d0[0]
bx lr
mov lr, ip
vmov.32 r0, d0[0]
bx lr
function x264_pixel_satd_8x16_neon
vpush {d8-d11}
function x264_pixel_satd_8x16_neon
vpush {d8-d11}
mov lr, ip
vmov.32 r0, d0[0]
bx lr
mov lr, ip
vmov.32 r0, d0[0]
bx lr
function x264_satd_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
function x264_satd_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
SUMSUB_AB q9, q11, q1, q3
vld1.64 {d0}, [r0,:64], r1
vsubl.u8 q15, d0, d1
SUMSUB_AB q9, q11, q1, q3
vld1.64 {d0}, [r0,:64], r1
vsubl.u8 q15, d0, d1
// one vertical hadamard pass and two horizontal
function x264_satd_8x4v_8x8h_neon, export=0
// one vertical hadamard pass and two horizontal
function x264_satd_8x4v_8x8h_neon, export=0
vmax.s16 q14, q8, q10
vmax.s16 q15, q9, q11
bx lr
vmax.s16 q14, q8, q10
vmax.s16 q15, q9, q11
bx lr
function x264_pixel_satd_16x8_neon
vpush {d8-d11}
function x264_pixel_satd_16x8_neon
vpush {d8-d11}
mov lr, ip
vmov.32 r0, d0[0]
bx lr
mov lr, ip
vmov.32 r0, d0[0]
bx lr
function x264_pixel_satd_16x16_neon
vpush {d8-d11}
function x264_pixel_satd_16x16_neon
vpush {d8-d11}
mov lr, ip
vmov.32 r0, d0[0]
bx lr
mov lr, ip
vmov.32 r0, d0[0]
bx lr
function x264_satd_16x4_neon, export=0
vld1.64 {d2-d3}, [r2], r3
function x264_satd_16x4_neon, export=0
vld1.64 {d2-d3}, [r2], r3
SUMSUB_AB q2, q3, q10, q11
SUMSUB_ABCD q8, q10, q9, q11, q0, q2, q1, q3
b x264_satd_8x4v_8x8h_neon
SUMSUB_AB q2, q3, q10, q11
SUMSUB_ABCD q8, q10, q9, q11, q0, q2, q1, q3
b x264_satd_8x4v_8x8h_neon
function x264_pixel_sa8d_8x8_neon
function x264_pixel_sa8d_8x8_neon
add r0, r0, #1
lsr r0, r0, #1
bx lr
add r0, r0, #1
lsr r0, r0, #1
bx lr
function x264_pixel_sa8d_16x16_neon
vpush {d8-d11}
function x264_pixel_sa8d_16x16_neon
vpush {d8-d11}
add r0, r0, #1
lsr r0, r0, #1
bx lr
add r0, r0, #1
lsr r0, r0, #1
bx lr
.macro HADAMARD4_V r1, r2, r3, r4, t1, t2, t3, t4
SUMSUB_ABCD \t1, \t2, \t3, \t4, \r1, \r2, \r3, \r4
.macro HADAMARD4_V r1, r2, r3, r4, t1, t2, t3, t4
SUMSUB_ABCD \t1, \t2, \t3, \t4, \r1, \r2, \r3, \r4
vadd.i16 q8, q8, q9
vadd.i16 q9, q10, q11
bx lr
vadd.i16 q8, q8, q9
vadd.i16 q9, q10, q11
bx lr
lsr r0, r0, #1
lsr r1, r1, #2
bx lr
lsr r0, r0, #1
lsr r1, r1, #2
bx lr
vadd.s16 q2, q2, q14
vpadal.u16 q5, q2
bx lr
vadd.s16 q2, q2, q14
vpadal.u16 q5, q2
bx lr
.macro SSIM_ITER n ssa s12 ssb lastssa lasts12 lastssb da db dnext
.macro SSIM_ITER n ssa s12 ssb lastssa lasts12 lastssb da db dnext
vst4.32 {d0-d3}, [ip]
bx lr
vst4.32 {d0-d3}, [ip]
bx lr
// FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2
function x264_pixel_ssim_end4_neon
// FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2
function x264_pixel_ssim_end4_neon
vpadd.f32 d0, d0, d0
vmov.32 r0, d0[0]
bx lr
vpadd.f32 d0, d0, d0
vmov.32 r0, d0[0]
bx lr
add ip, ip, ip, lsl #16
str ip, [r0, #3*FDEC_STRIDE]
bx lr
add ip, ip, ip, lsl #16
str ip, [r0, #3*FDEC_STRIDE]
bx lr
function x264_predict_4x4_v_armv6
ldr r1, [r0, #0 - 1 * FDEC_STRIDE]
function x264_predict_4x4_v_armv6
ldr r1, [r0, #0 - 1 * FDEC_STRIDE]
str r1, [r0, #0 + 2 * FDEC_STRIDE]
str r1, [r0, #0 + 3 * FDEC_STRIDE]
bx lr
str r1, [r0, #0 + 2 * FDEC_STRIDE]
str r1, [r0, #0 + 3 * FDEC_STRIDE]
bx lr
function x264_predict_4x4_dc_armv6
mov ip, #0
function x264_predict_4x4_dc_armv6
mov ip, #0
str r1, [r0, #2*FDEC_STRIDE]
str r1, [r0, #3*FDEC_STRIDE]
bx lr
str r1, [r0, #2*FDEC_STRIDE]
str r1, [r0, #3*FDEC_STRIDE]
bx lr
function x264_predict_4x4_dc_top_neon
mov r12, #FDEC_STRIDE
function x264_predict_4x4_dc_top_neon
mov r12, #FDEC_STRIDE
vst1.32 d1[0], [r0,:32], r12
vst1.32 d1[0], [r0,:32], r12
bx lr
vst1.32 d1[0], [r0,:32], r12
vst1.32 d1[0], [r0,:32], r12
bx lr
// return a1 = (a1+2*b1+c1+2)>>2 a2 = (a2+2*b2+c2+2)>>2
.macro PRED4x4_LOWPASS a1 b1 c1 a2 b2 c2 pb_1
// return a1 = (a1+2*b1+c1+2)>>2 a2 = (a2+2*b2+c2+2)>>2
.macro PRED4x4_LOWPASS a1 b1 c1 a2 b2 c2 pb_1
add r5, r5, r4, lsr #8
str r5, [r0, #3*FDEC_STRIDE]
pop {r4-r6,pc}
add r5, r5, r4, lsr #8
str r5, [r0, #3*FDEC_STRIDE]
pop {r4-r6,pc}
function x264_predict_4x4_ddl_neon
sub r0, #FDEC_STRIDE
function x264_predict_4x4_ddl_neon
sub r0, #FDEC_STRIDE
vst1.32 {d2[0]}, [r0,:32], ip
vst1.32 {d3[0]}, [r0,:32], ip
bx lr
vst1.32 {d2[0]}, [r0,:32], ip
vst1.32 {d3[0]}, [r0,:32], ip
bx lr
function x264_predict_8x8_dc_neon
mov ip, #0
function x264_predict_8x8_dc_neon
mov ip, #0
vst1.64 {d0}, [r0,:64], ip
.endr
pop {r4-r5,pc}
vst1.64 {d0}, [r0,:64], ip
.endr
pop {r4-r5,pc}
function x264_predict_8x8_h_neon
add r1, r1, #7
function x264_predict_8x8_h_neon
add r1, r1, #7
vst1.64 {d6}, [r0,:64], ip
vst1.64 {d7}, [r0,:64], ip
bx lr
vst1.64 {d6}, [r0,:64], ip
vst1.64 {d7}, [r0,:64], ip
bx lr
function x264_predict_8x8_v_neon
add r1, r1, #16
function x264_predict_8x8_v_neon
add r1, r1, #16
vst1.8 {d0}, [r0,:64], r12
.endr
bx lr
vst1.8 {d0}, [r0,:64], r12
.endr
bx lr
function x264_predict_8x8_ddl_neon
add r1, #16
function x264_predict_8x8_ddl_neon
add r1, #16
vst1.8 d2, [r0,:64], r12
vst1.8 d1, [r0,:64], r12
bx lr
vst1.8 d2, [r0,:64], r12
vst1.8 d1, [r0,:64], r12
bx lr
function x264_predict_8x8_ddr_neon
vld1.8 {d0-d3}, [r1,:128]
function x264_predict_8x8_ddr_neon
vld1.8 {d0-d3}, [r1,:128]
vst1.8 {d4}, [r0,:64], r12
vst1.8 {d5}, [r0,:64], r12
bx lr
vst1.8 {d4}, [r0,:64], r12
vst1.8 {d5}, [r0,:64], r12
bx lr
function x264_predict_8x8_vl_neon
add r1, #16
function x264_predict_8x8_vl_neon
add r1, #16
vst1.8 {d3}, [r0,:64], r12
vst1.8 {d2}, [r0,:64], r12
bx lr
vst1.8 {d3}, [r0,:64], r12
vst1.8 {d2}, [r0,:64], r12
bx lr
function x264_predict_8x8_vr_neon
add r1, #8
function x264_predict_8x8_vr_neon
add r1, #8
vst1.8 {d6}, [r0,:64], r12
vst1.8 {d3}, [r0,:64], r12
bx lr
vst1.8 {d6}, [r0,:64], r12
vst1.8 {d3}, [r0,:64], r12
bx lr
function x264_predict_8x8_hd_neon
mov r12, #FDEC_STRIDE
function x264_predict_8x8_hd_neon
mov r12, #FDEC_STRIDE
vst1.8 {d16}, [r0,:64], r12
bx lr
vst1.8 {d16}, [r0,:64], r12
bx lr
function x264_predict_8x8_hu_neon
mov r12, #FDEC_STRIDE
function x264_predict_8x8_hu_neon
mov r12, #FDEC_STRIDE
vst1.8 {d7}, [r0,:64], r12
vst1.8 {d17}, [r0,:64]
bx lr
vst1.8 {d7}, [r0,:64], r12
vst1.8 {d17}, [r0,:64]
bx lr
function x264_predict_8x8c_dc_top_neon
sub r2, r0, #FDEC_STRIDE
function x264_predict_8x8c_dc_top_neon
sub r2, r0, #FDEC_STRIDE
vdup.8 d0, d0[0]
vtrn.32 d0, d1
b pred8x8_dc_end
vdup.8 d0, d0[0]
vtrn.32 d0, d1
b pred8x8_dc_end
function x264_predict_8x8c_dc_left_neon
mov r1, #FDEC_STRIDE
function x264_predict_8x8c_dc_left_neon
mov r1, #FDEC_STRIDE
vdup.8 d1, d0[1]
vdup.8 d0, d0[0]
b pred8x8_dc_end
vdup.8 d1, d0[1]
vdup.8 d0, d0[0]
b pred8x8_dc_end
function x264_predict_8x8c_dc_neon
sub r2, r0, #FDEC_STRIDE
function x264_predict_8x8c_dc_neon
sub r2, r0, #FDEC_STRIDE
vst1.8 {d1}, [r2,:64], r1
.endr
bx lr
vst1.8 {d1}, [r2,:64], r1
.endr
bx lr
function x264_predict_8x8c_h_neon
sub r1, r0, #1
function x264_predict_8x8c_h_neon
sub r1, r0, #1
vst1.64 {d2}, [r0,:64], ip
.endr
bx lr
vst1.64 {d2}, [r0,:64], ip
.endr
bx lr
function x264_predict_8x8c_v_neon
sub r0, r0, #FDEC_STRIDE
function x264_predict_8x8c_v_neon
sub r0, r0, #FDEC_STRIDE
vst1.64 {d0}, [r0,:64], ip
.endr
bx lr
vst1.64 {d0}, [r0,:64], ip
.endr
bx lr
function x264_predict_8x8c_p_neon
sub r3, r0, #FDEC_STRIDE
function x264_predict_8x8c_p_neon
sub r3, r0, #FDEC_STRIDE
subs r3, r3, #1
bne 1b
bx lr
subs r3, r3, #1
bne 1b
bx lr
function x264_predict_16x16_dc_top_neon
function x264_predict_16x16_dc_top_neon
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b pred16x16_dc_end
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b pred16x16_dc_end
function x264_predict_16x16_dc_left_neon
mov r1, #FDEC_STRIDE
function x264_predict_16x16_dc_left_neon
mov r1, #FDEC_STRIDE
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b pred16x16_dc_end
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b pred16x16_dc_end
function x264_predict_16x16_dc_neon
sub r3, r0, #FDEC_STRIDE
function x264_predict_16x16_dc_neon
sub r3, r0, #FDEC_STRIDE
vst1.64 {d0-d1}, [r0,:128], r1
.endr
bx lr
vst1.64 {d0-d1}, [r0,:128], r1
.endr
bx lr
function x264_predict_16x16_h_neon
sub r1, r0, #1
function x264_predict_16x16_h_neon
sub r1, r0, #1
vst1.64 {d2-d3}, [r0,:128], ip
.endr
bx lr
vst1.64 {d2-d3}, [r0,:128], ip
.endr
bx lr
function x264_predict_16x16_v_neon
sub r0, r0, #FDEC_STRIDE
function x264_predict_16x16_v_neon
sub r0, r0, #FDEC_STRIDE
vst1.64 {d0-d1}, [r0,:128], ip
.endr
bx lr
vst1.64 {d0-d1}, [r0,:128], ip
.endr
bx lr
function x264_predict_16x16_p_neon
sub r3, r0, #FDEC_STRIDE
function x264_predict_16x16_p_neon
sub r3, r0, #FDEC_STRIDE
subs r3, r3, #1
bne 1b
bx lr
subs r3, r3, #1
bne 1b
bx lr
vsub.s16 d3, d3, d0
vst1.64 {d3}, [r0,:64]
QUANT_END d3
vsub.s16 d3, d3, d0
vst1.64 {d3}, [r0,:64]
QUANT_END d3
// quant_4x4_dc( int16_t dct[16], int mf, int bias )
function x264_quant_4x4_dc_neon
// quant_4x4_dc( int16_t dct[16], int mf, int bias )
function x264_quant_4x4_dc_neon
QUANT_TWO q0, q0, d4, d5, d4, d5, q0
vorr d0, d0, d1
QUANT_END d0
QUANT_TWO q0, q0, d4, d5, d4, d5, q0
vorr d0, d0, d1
QUANT_END d0
// quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4_neon
// quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4_neon
QUANT_TWO q0, q1, d4, d5, d6, d7, q0
vorr d0, d0, d1
QUANT_END d0
QUANT_TWO q0, q1, d4, d5, d6, d7, q0
vorr d0, d0, d1
QUANT_END d0
// quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4x4_neon
// quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4x4_neon
orrne r0, #8
vpop {d8-d15}
bx lr
orrne r0, #8
vpop {d8-d15}
bx lr
// quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
function x264_quant_8x8_neon
// quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
function x264_quant_8x8_neon
.endr
vorr d0, d0, d1
QUANT_END d0
.endr
vorr d0, d0, d1
QUANT_END d0
.macro DEQUANT_START mf_size offset dc=no
mov r3, #0x2b
.macro DEQUANT_START mf_size offset dc=no
mov r3, #0x2b
bgt dequant_\size\()_rshift_loop
.endif
bx lr
bgt dequant_\size\()_rshift_loop
.endif
bx lr
vmovn.s32 d3, q13
vst1.16 {d0-d3}, [r0,:128]
bx lr
vmovn.s32 d3, q13
vst1.16 {d0-d3}, [r0,:128]
bx lr
// int coeff_last( int16_t *l )
// int coeff_last( int16_t *l )
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
function x264_coeff_last8_arm
ldrd r2, r3, [r0, #8]
function x264_coeff_last8_arm
ldrd r2, r3, [r0, #8]
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
.macro COEFF_LAST_1x size
function x264_coeff_last\size\()_neon
.macro COEFF_LAST_1x size
function x264_coeff_last\size\()_neon
subslt r0, r3, r0, lsr #2
movlt r0, #0
bx lr
subslt r0, r3, r0, lsr #2
movlt r0, #0
bx lr
subslt r0, ip, r0
movlt r0, #0
bx lr
subslt r0, ip, r0
movlt r0, #0
bx lr
as_check() {
log_check "whether $AS supports $1"
as_check() {
log_check "whether $AS supports $1"
- echo "$1" > conftest.asm
- if $AS conftest.asm $ASFLAGS $2 -o conftest.o >conftest.log 2>&1; then
+ echo "$1" > conftest$AS_EXT
+ as_cmd="$AS conftest$AS_EXT $ASFLAGS $2 -o conftest.o"
+ if $as_cmd >conftest.log 2>&1; then
log_fail
log_msg "Failed commandline was:"
log_msg "--------------------------------------------------"
log_fail
log_msg "Failed commandline was:"
log_msg "--------------------------------------------------"
- log_msg "$AS conftest.asm $ASFLAGS $2 -o conftest.o"
cat conftest.log >> config.log
log_msg "--------------------------------------------------"
log_msg "Failed program was:"
log_msg "--------------------------------------------------"
cat conftest.log >> config.log
log_msg "--------------------------------------------------"
log_msg "Failed program was:"
log_msg "--------------------------------------------------"
- cat conftest.asm >> config.log
+ cat conftest$AS_EXT >> config.log
log_msg "--------------------------------------------------"
fi
return $res
log_msg "--------------------------------------------------"
fi
return $res
# list of all preprocessor HAVE values we can define
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
# list of all preprocessor HAVE values we can define
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
- LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM"
+ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC"
i*86)
ARCH="X86"
AS="yasm"
i*86)
ARCH="X86"
AS="yasm"
ASFLAGS="$ASFLAGS -O2 -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/"
if [ $compiler = GNU ]; then
if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then
ASFLAGS="$ASFLAGS -O2 -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/"
if [ $compiler = GNU ]; then
if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then
x86_64)
ARCH="X86_64"
AS="yasm"
x86_64)
ARCH="X86_64"
AS="yasm"
ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
[ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
if [ "$SYS" = MACOSX ]; then
ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
[ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
if [ "$SYS" = MACOSX ]; then
if [ $asm = auto ] ; then
define HAVE_ALTIVEC
AS="${AS-${CC}}"
if [ $asm = auto ] ; then
define HAVE_ALTIVEC
AS="${AS-${CC}}"
if [ $SYS = MACOSX ] ; then
CFLAGS="$CFLAGS -faltivec -fastf -mcpu=G4"
else
if [ $SYS = MACOSX ] ; then
CFLAGS="$CFLAGS -faltivec -fastf -mcpu=G4"
else
+if [ $asm = auto -a $ARCH = ARM ] ; then
+ # check if the assembler supports '.func' (clang 3.5 does not)
+ as_check ".func test${NL}.endfunc" && define HAVE_AS_FUNC 1
+fi
+
[ $asm = no ] && AS=""
[ "x$AS" = x ] && asm="no" || asm="yes"
[ $asm = no ] && AS=""
[ "x$AS" = x ] && asm="no" || asm="yes"