Various ARM-related fixes

author David Conrad <lessen42@gmail.com>

Sat, 7 Nov 2009 17:25:18 +0000 (09:25 -0800)

committer Fiona Glaser <fiona@x264.com>

Mon, 9 Nov 2009 04:21:47 +0000 (20:21 -0800)
author David Conrad <lessen42@gmail.com>
Sat, 7 Nov 2009 17:25:18 +0000 (09:25 -0800)
committer Fiona Glaser <fiona@x264.com>
Mon, 9 Nov 2009 04:21:47 +0000 (20:21 -0800)
diff --git a/Makefile b/Makefile

index 3de0b616d20f37a182716312269f5d39b74a8bb2..8ba8d81519badda046e9f379e569a8f730366502 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -119,6 +119,7 @@ checkasm: tools/checkasm.o libx264.a
  
  %.o: %.S
         $(AS) $(ASFLAGS) -o $@ $<
+       -@ $(STRIP) -x $@ # delete local/anonymous symbols, so they don't show up in oprofile
  
  .depend: config.mak
         rm -f .depend
diff --git a/common/arm/asm.S b/common/arm/asm.S

index f7b9f14124e507e4dc432f73d0d8e7485aa230b5..d16316564628e60886606d90d9b961b7f0beb62e 100644 (file)
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -20,19 +20,24 @@
  
  #include "config.h"
  
+#ifdef __ELF__
+#   define ELF
+#else
+#   define ELF @
+#endif
+
          .macro require8, val=1
-        .eabi_attribute 24, \val
+ELF     .eabi_attribute 24, \val
          .endm
  
          .macro preserve8, val=1
-        .eabi_attribute 25, \val
+ELF     .eabi_attribute 25, \val
          .endm
  
-        .macro function name, export=0
-.if \export
+        .macro function name
          .global \name
-.endif
-        .type   \name, %function
+ELF     .hidden \name
+ELF     .type   \name, %function
          .func   \name
  \name:
          .endm
diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S

index ccde3bb4e7595567be07b653f73c26fb02290136..40eff039c26e5ae43496909d95ef144a3ccc92c9 100644 (file)
--- a/common/arm/cpu-a.S
+++ b/common/arm/cpu-a.S
@@ -27,7 +27,7 @@
  
  // done in gas because .fpu neon overrides the refusal to assemble
  // instructions the selected -march/-mcpu doesn't support
-function x264_cpu_neon_test, export=1
+function x264_cpu_neon_test
      vadd.i16    q0, q0, q0
      bx          lr
  .endfunc
@@ -62,7 +62,7 @@ function x264_cpu_disable_armv7_counter
  
  // return: 0 if transfers neon -> arm transfers take more than 10 cycles
  //         nonzero otherwise
-function x264_cpu_fast_neon_mrc_test, export=1
+function x264_cpu_fast_neon_mrc_test
      // check for user access to performance counters
      mrc         p15, 0, r0, c9, c14, 0
      cmp         r0, #0
diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S

index 7a7b03817da47664c42f63dadfd02923eeb7619f..0ed72384bad6f2612e5d47f3b6fac1372441471c 100644 (file)
--- a/common/arm/dct-a.S
+++ b/common/arm/dct-a.S
@@ -62,7 +62,7 @@ scan4x4_frame:
  .endm
  
  
-function x264_dct4x4dc_neon, export=1
+function x264_dct4x4dc_neon
      vld1.64         {d0-d3}, [r0,:128]
      SUMSUB_ABCD     d4, d5, d6, d7, d0, d1, d2, d3
      SUMSUB_ABCD     d0, d2, d3, d1, d4, d6, d5, d7
@@ -81,7 +81,7 @@ function x264_dct4x4dc_neon, export=1
      bx              lr
  .endfunc
  
-function x264_idct4x4dc_neon, export=1
+function x264_idct4x4dc_neon
      vld1.64         {d0-d3}, [r0,:128]
      SUMSUB_ABCD     d4, d5, d6, d7, d0, d1, d2, d3
      SUMSUB_ABCD     d0, d2, d3, d1, d4, d6, d5, d7
@@ -105,7 +105,7 @@ function x264_idct4x4dc_neon, export=1
      vsub.s16        \d3, \d7, \d5
  .endm
  
-function x264_sub4x4_dct_neon, export=1
+function x264_sub4x4_dct_neon
      mov             r3, #FENC_STRIDE
      mov             ip, #FDEC_STRIDE
      vld1.32         {d0[]}, [r1,:32], r3
@@ -128,7 +128,7 @@ function x264_sub4x4_dct_neon, export=1
      bx              lr
  .endfunc
  
-function x264_sub8x4_dct_neon, export=1
+function x264_sub8x4_dct_neon
      vld1.64         {d0}, [r1,:64], r3
      vld1.64         {d1}, [r2,:64], ip
      vsubl.u8        q8,  d0,  d1
@@ -164,7 +164,7 @@ function x264_sub8x4_dct_neon, export=1
      bx              lr
  .endfunc
  
-function x264_sub8x8_dct_neon, export=1
+function x264_sub8x8_dct_neon
      push            {lr}
      mov             r3, #FENC_STRIDE
      mov             ip, #FDEC_STRIDE
@@ -173,7 +173,7 @@ function x264_sub8x8_dct_neon, export=1
      b               x264_sub8x4_dct_neon
  .endfunc
  
-function x264_sub16x16_dct_neon, export=1
+function x264_sub16x16_dct_neon
      push            {lr}
      mov             r3, #FENC_STRIDE
      mov             ip, #FDEC_STRIDE
@@ -226,7 +226,7 @@ function x264_sub16x16_dct_neon, export=1
      SUMSUB_SHR2  2, q11, q13, q3,  q13,  q0, q1
  .endm
  
-function x264_sub8x8_dct8_neon, export=1
+function x264_sub8x8_dct8_neon
      mov             r3, #FENC_STRIDE
      mov             ip, #FDEC_STRIDE
      vld1.64         {d16}, [r1,:64], r3
@@ -278,7 +278,7 @@ function x264_sub8x8_dct8_neon, export=1
      bx              lr
  .endfunc
  
-function x264_sub16x16_dct8_neon, export=1
+function x264_sub16x16_dct8_neon
      push            {lr}
      bl              x264_sub8x8_dct8_neon
      sub             r1,  r1,  #FENC_STRIDE*8 - 8
@@ -303,7 +303,7 @@ function x264_sub16x16_dct8_neon, export=1
      vadd.s16        \d6, \d6, \d1
  .endm
  
-function x264_add4x4_idct_neon, export=1
+function x264_add4x4_idct_neon
      mov             r2, #FDEC_STRIDE
      vld1.64         {d0-d3}, [r1,:128]
  
@@ -335,7 +335,7 @@ function x264_add4x4_idct_neon, export=1
      bx              lr
  .endfunc
  
-function x264_add8x4_idct_neon, export=1
+function x264_add8x4_idct_neon
      vld1.64         {d0-d3}, [r1,:128]!
      IDCT_1D         d16, d18, d20, d22, d0, d1, d2, d3
      vld1.64         {d4-d7}, [r1,:128]!
@@ -375,7 +375,7 @@ function x264_add8x4_idct_neon, export=1
      bx              lr
  .endfunc
  
-function x264_add8x8_idct_neon, export=1
+function x264_add8x8_idct_neon
      mov             r2, #FDEC_STRIDE
      mov             ip, lr
      bl              x264_add8x4_idct_neon
@@ -383,7 +383,7 @@ function x264_add8x8_idct_neon, export=1
      b               x264_add8x4_idct_neon
  .endfunc
  
-function x264_add16x16_idct_neon, export=1
+function x264_add16x16_idct_neon
      mov             r2, #FDEC_STRIDE
      mov             ip, lr
      bl              x264_add8x4_idct_neon
@@ -435,7 +435,7 @@ function x264_add16x16_idct_neon, export=1
      SUMSUB_AB       q11, q12, q2,  q12
  .endm
  
-function x264_add8x8_idct8_neon, export=1
+function x264_add8x8_idct8_neon
      mov             r2,  #FDEC_STRIDE
      vld1.64         {d16-d19}, [r1,:128]!
      vld1.64         {d20-d23}, [r1,:128]!
@@ -497,7 +497,7 @@ function x264_add8x8_idct8_neon, export=1
      bx              lr
  .endfunc
  
-function x264_add16x16_idct8_neon, export=1
+function x264_add16x16_idct8_neon
      mov             ip,  lr
      bl              x264_add8x8_idct8_neon
      sub             r0,  r0,  #8*FDEC_STRIDE-8
@@ -510,7 +510,7 @@ function x264_add16x16_idct8_neon, export=1
  .endfunc
  
  
-function x264_add8x8_idct_dc_neon, export=1
+function x264_add8x8_idct_dc_neon
      mov             r2,  #FDEC_STRIDE
      vld1.64         {d16}, [r1,:64]
      vrshr.s16       d16, d16, #6
@@ -593,7 +593,7 @@ function x264_add8x8_idct_dc_neon, export=1
      vst1.64         {d22-d23}, [r2,:128], r3
  .endm
  
-function x264_add16x16_idct_dc_neon, export=1
+function x264_add16x16_idct_dc_neon
      mov             r2,  r0
      mov             r3,  #FDEC_STRIDE
      vmov.i16        q15, #0
@@ -609,7 +609,7 @@ function x264_add16x16_idct_dc_neon, export=1
      bx              lr
  .endfunc
  
-function x264_sub8x8_dct_dc_neon, export=1
+function x264_sub8x8_dct_dc_neon
      mov             r3,  #FENC_STRIDE
      mov             ip,  #FDEC_STRIDE
      vld1.64         {d16}, [r1,:64], r3
@@ -650,7 +650,7 @@ function x264_sub8x8_dct_dc_neon, export=1
  .endfunc
  
  
-function x264_zigzag_scan_4x4_frame_neon, export=1
+function x264_zigzag_scan_4x4_frame_neon
      movrel      r2, scan4x4_frame
      vld1.64     {d0-d3},   [r1,:128]
      vld1.64     {d16-d19}, [r2,:128]
diff --git a/common/arm/dct.h b/common/arm/dct.h

index b8cb4a127ee24012124ccc3af77f5da3f44d604b..55f53ce98309a069b1898b993963aa7409604dbc 100644 (file)
--- a/common/arm/dct.h
+++ b/common/arm/dct.h
@@ -34,9 +34,9 @@ void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] );
  void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] );
  void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] );
  
-void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[2][2] );
+void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] );
  void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] );
-void x264_sub8x8_dct_dc_neon( int16_t dct[2][2], uint8_t *pix1, uint8_t *pix2 );
+void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 );
  
  void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 );
  void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
diff --git a/common/arm/deblock-a.S b/common/arm/deblock-a.S

index 6d60242c2b9ed003698ce08463c9bc991bb0e3e1..f124b556a2b3363e2a5c399eafad19dd82124fd3 100644 (file)
--- a/common/arm/deblock-a.S
+++ b/common/arm/deblock-a.S
@@ -115,7 +115,7 @@
      vqmovun.s16     d1,  q12
  .endm
  
-function x264_deblock_v_luma_neon, export=1
+function x264_deblock_v_luma_neon
      h264_loop_filter_start
  
      vld1.64         {d0, d1},  [r0,:128], r1
@@ -141,7 +141,7 @@ function x264_deblock_v_luma_neon, export=1
      bx              lr
  .endfunc
  
-function x264_deblock_h_luma_neon, export=1
+function x264_deblock_h_luma_neon
      h264_loop_filter_start
  
      sub             r0,  r0,  #4
@@ -226,7 +226,7 @@ function x264_deblock_h_luma_neon, export=1
      vqmovun.s16     d0,  q11
  .endm
  
-function x264_deblock_v_chroma_neon, export=1
+function x264_deblock_v_chroma_neon
      h264_loop_filter_start
  
      sub             r0,  r0,  r1, lsl #1
@@ -244,7 +244,7 @@ function x264_deblock_v_chroma_neon, export=1
      bx              lr
  .endfunc
  
-function x264_deblock_h_chroma_neon, export=1
+function x264_deblock_h_chroma_neon
      h264_loop_filter_start
  
      sub             r0,  r0,  #2
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S

index afd881c7493d95cf8c88ae7ec543ea3016469687..a62af393483e9a49048519ed745d13b6f8aaf316 100644 (file)
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -30,7 +30,7 @@
  // They also use nothing above armv5te, but we don't care about pre-armv6
  
  // void prefetch_ref( uint8_t *pix, int stride, int parity )
-function x264_prefetch_ref_arm, export=1
+function x264_prefetch_ref_arm
      sub         r2, r2, #1
      add         r0, r0, #64
      and         r2, r2, r1
@@ -50,7 +50,7 @@ function x264_prefetch_ref_arm, export=1
  
  // void prefetch_fenc( uint8_t *pix_y, int stride_y,
  //                     uint8_t *pix_uv, int stride_uv, int mb_x )
-function x264_prefetch_fenc_arm, export=1
+function x264_prefetch_fenc_arm
      ldr         ip, [sp]
      push        {lr}
      and         lr, ip, #3
@@ -76,7 +76,7 @@ function x264_prefetch_fenc_arm, export=1
  
  
  // void *x264_memcpy_aligned( void * dst, const void * src, size_t n )
-function x264_memcpy_aligned_neon, export=1
+function x264_memcpy_aligned_neon
      orr         r3,  r0,  r1,  lsr #1
      movrel      ip,  memcpy_table
      and         r3,  r3,  #0xc
@@ -138,7 +138,7 @@ memcpy_table:
  .ltorg
  
  // void x264_memzero_aligned( void *dst, size_t n )
-function x264_memzero_aligned_neon, export=1
+function x264_memzero_aligned_neon
      vmov.i8     q0, #0
      vmov.i8     q1, #0
  memzero_loop:
@@ -155,7 +155,7 @@ memzero_loop:
  //                 uint8_t *src1, int src1_stride,
  //                 uint8_t *src2, int src2_stride, int weight );
  .macro AVGH w h
-function x264_pixel_avg_\w\()x\h\()_neon, export=1
+function x264_pixel_avg_\w\()x\h\()_neon
      ldr         ip, [sp, #8]
      push        {r4-r6,lr}
      cmp         ip, #32
@@ -230,7 +230,7 @@ AVGH 16, 16
  .endm
  
  .macro AVG_WEIGHT ext
-function x264_pixel_avg_weight_w4_\ext\()_neon, export=1
+function x264_pixel_avg_weight_w4_\ext\()_neon
      load_weights_\ext
  1:  // height loop
      subs            lr,  lr,  #2
@@ -246,7 +246,7 @@ function x264_pixel_avg_weight_w4_\ext\()_neon, export=1
      pop             {r4-r6,pc}
  .endfunc
  
-function x264_pixel_avg_weight_w8_\ext\()_neon, export=1
+function x264_pixel_avg_weight_w8_\ext\()_neon
      load_weights_\ext
  1:  // height loop
      subs            lr,  lr,  #4
@@ -270,7 +270,7 @@ function x264_pixel_avg_weight_w8_\ext\()_neon, export=1
      pop             {r4-r6,pc}
  .endfunc
  
-function x264_pixel_avg_weight_w16_\ext\()_neon, export=1
+function x264_pixel_avg_weight_w16_\ext\()_neon
      load_weights_\ext
  1:  // height loop
      subs            lr,  lr,  #2
@@ -295,7 +295,7 @@ AVG_WEIGHT add_add
  AVG_WEIGHT add_sub
  AVG_WEIGHT sub_add
  
-function x264_pixel_avg_w4_neon, export=1
+function x264_pixel_avg_w4_neon
      subs        lr,  lr,  #2
      vld1.32     {d0[]}, [r2], r3
      vld1.32     {d2[]}, [r4], r5
@@ -309,7 +309,7 @@ function x264_pixel_avg_w4_neon, export=1
      pop         {r4-r6,pc}
  .endfunc
  
-function x264_pixel_avg_w8_neon, export=1
+function x264_pixel_avg_w8_neon
      subs        lr,  lr,  #4
      vld1.64     {d0}, [r2], r3
      vld1.64     {d2}, [r4], r5
@@ -331,7 +331,7 @@ function x264_pixel_avg_w8_neon, export=1
      pop         {r4-r6,pc}
  .endfunc
  
-function x264_pixel_avg_w16_neon, export=1
+function x264_pixel_avg_w16_neon
      subs        lr,  lr,  #4
      vld1.64     {d0-d1}, [r2], r3
      vld1.64     {d2-d3}, [r4], r5
@@ -354,7 +354,7 @@ function x264_pixel_avg_w16_neon, export=1
  .endfunc
  
  
-function x264_pixel_avg2_w4_neon, export=1
+function x264_pixel_avg2_w4_neon
      ldr         ip,  [sp, #4]
      push        {lr}
      ldr         lr,  [sp, #4]
@@ -372,7 +372,7 @@ avg2_w4_loop:
      pop         {pc}
  .endfunc
  
-function x264_pixel_avg2_w8_neon, export=1
+function x264_pixel_avg2_w8_neon
      ldr         ip,  [sp, #4]
      push        {lr}
      ldr         lr,  [sp, #4]
@@ -390,7 +390,7 @@ avg2_w8_loop:
      pop         {pc}
  .endfunc
  
-function x264_pixel_avg2_w16_neon, export=1
+function x264_pixel_avg2_w16_neon
      ldr         ip,  [sp, #4]
      push        {lr}
      ldr         lr,  [sp, #4]
@@ -408,7 +408,7 @@ avg2_w16_loop:
      pop         {pc}
  .endfunc
  
-function x264_pixel_avg2_w20_neon, export=1
+function x264_pixel_avg2_w20_neon
      ldr         ip,  [sp, #4]
      push        {lr}
      sub         r1,  r1,  #16
@@ -432,8 +432,8 @@ avg2_w20_loop:
  .endfunc
  
  
-// void mc_copy( uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int height )
-function x264_mc_copy_w4_neon, export=1
+// void mc_copy( uint8_t *dst, int dst_stride, uint8_t *src, int src_stride, int height )
+function x264_mc_copy_w4_neon
      ldr         ip,  [sp]
  copy_w4_loop:
      subs        ip,  ip,  #4
@@ -449,7 +449,7 @@ copy_w4_loop:
      bx          lr
  .endfunc
  
-function x264_mc_copy_w8_neon, export=1
+function x264_mc_copy_w8_neon
      ldr         ip,  [sp]
  copy_w8_loop:
      subs        ip,  ip,  #4
@@ -465,7 +465,7 @@ copy_w8_loop:
      bx          lr
  .endfunc
  
-function x264_mc_copy_w16_neon, export=1
+function x264_mc_copy_w16_neon
      ldr         ip,  [sp]
  copy_w16_loop:
      subs        ip,  ip,  #4
@@ -481,7 +481,7 @@ copy_w16_loop:
      bx          lr
  .endfunc
  
-function x264_mc_copy_w16_aligned_neon, export=1
+function x264_mc_copy_w16_aligned_neon
      ldr         ip,  [sp]
  copy_w16_aligned_loop:
      subs        ip,  ip,  #4
@@ -501,7 +501,7 @@ copy_w16_aligned_loop:
  // void x264_mc_chroma_neon( uint8_t *dst, int i_dst_stride,
  //                           uint8_t *src, int i_src_stride,
  //                           int dx, int dy, int i_width, int i_height );
-function x264_mc_chroma_neon, export=1
+function x264_mc_chroma_neon
      push            {r4-r6, lr}
      ldrd            r4,  [sp, #16]
      ldr             r6,  [sp, #24]
@@ -741,7 +741,7 @@ mc_chroma_w8:
  
  
  // hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, int stride, int width)
-function x264_hpel_filter_v_neon, export=1
+function x264_hpel_filter_v_neon
      ldr             ip,  [sp]
      sub             r1,  r1,  r3,  lsl #1
      push            {lr}
@@ -781,7 +781,7 @@ filter_v_loop:
  .endfunc
  
  // hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
-function x264_hpel_filter_c_neon, export=1
+function x264_hpel_filter_c_neon
      sub             r1,  #16
      vld1.64         {d0-d3}, [r1,:128]!
  
@@ -866,7 +866,7 @@ filter_c_loop:
  .endfunc
  
  // hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
-function x264_hpel_filter_h_neon, export=1
+function x264_hpel_filter_h_neon
      sub             r1,  #16
      vmov.u8         d30, #5
      vld1.64         {d0-d3}, [r1,:128]!
@@ -956,7 +956,7 @@ filter_h_loop:
  // frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
  //                         uint8_t *dstc, int src_stride, int dst_stride, int width,
  //                         int height )
-function x264_frame_init_lowres_core_neon, export=1
+function x264_frame_init_lowres_core_neon
      push            {r4-r10,lr}
      vpush           {d8-d15}
      ldrd            r4,  [sp, #96]
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c

index 201dc6b0ef739c950804c4288281cab151cb7a4e..c6aaeb0e623a0eba7339bc6cd41842b96ac7ee69 100644 (file)
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -27,7 +27,7 @@ void x264_prefetch_ref_arm( uint8_t *, int, int );
  void x264_prefetch_fenc_arm( uint8_t *, int, uint8_t *, int, int );
  
  void *x264_memcpy_aligned_neon( void * dst, const void * src, size_t n );
-void x264_memzero_aligned_neon( void *dst, size_t n );
+void x264_memzero_aligned_neon( void *dst, int n );
  
  void x264_pixel_avg_16x16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  void x264_pixel_avg_16x8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S

index effe9395f6eb59a2bf94e89725e5c5b4271a2a98..ca406acd6b6566f2635bf90d2e68506406081453 100644 (file)
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -40,7 +40,7 @@ mask_ac8:
  .text
  
  .macro SAD4_ARMV6 h
-function x264_pixel_sad_4x\h\()_armv6, export=1
+function x264_pixel_sad_4x\h\()_armv6
      push        {r4-r6,lr}
      ldr         r4, [r2], r3
      ldr         r5, [r0], r1
@@ -109,7 +109,7 @@ SAD4_ARMV6 8
  .endm
  
  .macro SAD_FUNC w, h, name, align:vararg
-function x264_pixel_sad\name\()_\w\()x\h\()_neon, export=1
+function x264_pixel_sad\name\()_\w\()x\h\()_neon
  .if \w == 16
      .set r, \h / 2 - 1
  .else
@@ -199,7 +199,7 @@ SAD_FUNC  16, 16, _aligned, ,:128
  .endm
  
  .macro SAD_FUNC_DUAL w, h
-function x264_pixel_sad_aligned_\w\()x\h\()_neon_dual, export=1
+function x264_pixel_sad_aligned_\w\()x\h\()_neon_dual
      SAD_DUAL_START_\w
  .rept \h / 2 - \w / 8
      SAD_DUAL_\w
@@ -321,7 +321,7 @@ SAD_FUNC_DUAL  16, 16
  .endm
  
  .macro SAD_X_FUNC x, w, h
-function x264_pixel_sad_x\x\()_\w\()x\h\()_neon, export=1
+function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
      push        {r6-r7,lr}
  .if \x == 3
      ldrd        r6,  [sp, #12]
@@ -463,7 +463,7 @@ SAD_X_FUNC  4, 16, 16
  .endm
  
  .macro SSD_FUNC w h
-function x264_pixel_ssd_\w\()x\h\()_neon, export=1
+function x264_pixel_ssd_\w\()x\h\()_neon
      SSD_START_\w
  .rept \h-2
      SSD_\w
@@ -491,7 +491,7 @@ SSD_FUNC  16, 16
      \vpadal         \qsqr_sum, \qsqr_last
  .endm
  
-function x264_pixel_var_8x8_neon, export=1
+function x264_pixel_var_8x8_neon
      vld1.64         {d16}, [r0,:64], r1
      vmull.u8        q1,  d16, d16
      vmovl.u8        q0,  d16
@@ -517,7 +517,7 @@ function x264_pixel_var_8x8_neon, export=1
      b               x264_var_end
  .endfunc
  
-function x264_pixel_var_16x16_neon, export=1
+function x264_pixel_var_16x16_neon
      vld1.64         {d16-d17}, [r0,:128], r1
      vmull.u8        q12, d16, d16
      vmovl.u8        q0,  d16
@@ -573,7 +573,7 @@ function x264_var_end
      vmlal.s16       \acc, \d1, \d1
  .endm
  
-function x264_pixel_var2_8x8_neon, export=1
+function x264_pixel_var2_8x8_neon
      DIFF_SUM        q0,  d0,  d1
      DIFF_SUM        q8,  d16, d17
      SQR_ACC         q1,  d0,  d1,  vmull.s16
@@ -620,7 +620,7 @@ function x264_pixel_var2_8x8_neon, export=1
      vsubl.u8    \q3, d6,  d7
  .endm
  
-function x264_pixel_satd_4x4_neon, export=1
+function x264_pixel_satd_4x4_neon
      vld1.32     {d1[]},  [r2], r3
      vld1.32     {d0[]},  [r0,:32], r1
      vld1.32     {d3[]},  [r2], r3
@@ -642,7 +642,7 @@ function x264_pixel_satd_4x4_neon, export=1
      bx          lr
  .endfunc
  
-function x264_pixel_satd_4x8_neon, export=1
+function x264_pixel_satd_4x8_neon
      vld1.32     {d1[]},  [r2], r3
      vld1.32     {d0[]},  [r0,:32], r1
      vld1.32     {d3[]},  [r2], r3
@@ -669,7 +669,7 @@ function x264_pixel_satd_4x8_neon, export=1
      b           x264_satd_4x8_8x4_end_neon
  .endfunc
  
-function x264_pixel_satd_8x4_neon, export=1
+function x264_pixel_satd_8x4_neon
      vld1.64     {d1}, [r2], r3
      vld1.64     {d0}, [r0,:64], r1
      vsubl.u8    q0,  d0,  d1
@@ -713,7 +713,7 @@ function x264_satd_4x8_8x4_end_neon
      bx          lr
  .endfunc
  
-function x264_pixel_satd_8x8_neon, export=1
+function x264_pixel_satd_8x8_neon
      mov         ip,  lr
  
      bl x264_satd_8x8_neon
@@ -727,7 +727,7 @@ function x264_pixel_satd_8x8_neon, export=1
      bx          lr
  .endfunc
  
-function x264_pixel_satd_8x16_neon, export=1
+function x264_pixel_satd_8x16_neon
      vpush       {d8-d11}
      mov         ip,  lr
  
@@ -798,7 +798,7 @@ function x264_satd_8x4v_8x8h_neon
      bx          lr
  .endfunc
  
-function x264_pixel_satd_16x8_neon, export=1
+function x264_pixel_satd_16x8_neon
      vpush       {d8-d11}
      mov         ip, lr
  
@@ -820,7 +820,7 @@ function x264_pixel_satd_16x8_neon, export=1
      bx          lr
  .endfunc
  
-function x264_pixel_satd_16x16_neon, export=1
+function x264_pixel_satd_16x16_neon
      vpush       {d8-d11}
      mov         ip, lr
  
@@ -879,7 +879,7 @@ function x264_satd_16x4_neon
  .endfunc
  
  
-function x264_pixel_sa8d_8x8_neon, export=1
+function x264_pixel_sa8d_8x8_neon
      mov             ip,  lr
      bl              x264_sa8d_8x8_neon
      vadd.u16        q0,  q8,  q9
@@ -891,7 +891,7 @@ function x264_pixel_sa8d_8x8_neon, export=1
      bx              lr
  .endfunc
  
-function x264_pixel_sa8d_16x16_neon, export=1
+function x264_pixel_sa8d_16x16_neon
      vpush           {d8-d11}
      mov             ip,  lr
      bl              x264_sa8d_8x8_neon
@@ -988,7 +988,7 @@ function x264_sa8d_8x8_neon
  
  
  .macro HADAMARD_AC w h
-function x264_pixel_hadamard_ac_\w\()x\h\()_neon, export=1
+function x264_pixel_hadamard_ac_\w\()x\h\()_neon
      vpush           {d8-d15}
      movrel          ip, mask_ac4
      vmov.i8         q4, #0
@@ -1143,7 +1143,7 @@ function x264_hadamard_ac_8x8_neon
      vmull.u8    \ssb, \db, \db
  .endm
  
-function x264_pixel_ssim_4x4x2_core_neon, export=1
+function x264_pixel_ssim_4x4x2_core_neon
      ldr         ip, [sp]
      vld1.64     {d0}, [r0], r1
      vld1.64     {d2}, [r2], r3
@@ -1172,7 +1172,7 @@ function x264_pixel_ssim_4x4x2_core_neon, export=1
  .endfunc
  
  // FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2
-function x264_pixel_ssim_end4_neon, export=1
+function x264_pixel_ssim_end4_neon
      vld1.32     {d16-d19}, [r0,:128]!
      vld1.32     {d20-d23}, [r1,:128]!
      vadd.s32    q0,  q8,  q10
diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S

index 8ff61a23183193f844c23558d5524c3ac9d18df3..9a914784ef4b4929bd5f0680f09207fc3c570a3d 100644 (file)
--- a/common/arm/predict-a.S
+++ b/common/arm/predict-a.S
@@ -32,7 +32,7 @@ pw_76543210: .short 7,6,5,4,3,2,1,0
  .text
  
  // because gcc doesn't believe in using the free shift in add
-function x264_predict_4x4_h_armv6, export=1
+function x264_predict_4x4_h_armv6
      ldrb    r1, [r0, #0*FDEC_STRIDE-1]
      ldrb    r2, [r0, #1*FDEC_STRIDE-1]
      ldrb    r3, [r0, #2*FDEC_STRIDE-1]
@@ -52,7 +52,7 @@ function x264_predict_4x4_h_armv6, export=1
      bx      lr
  .endfunc
  
-function x264_predict_4x4_dc_armv6, export=1
+function x264_predict_4x4_dc_armv6
      mov     ip, #0
      ldr     r1, [r0, #-FDEC_STRIDE]
      ldrb    r2, [r0, #0*FDEC_STRIDE-1]
@@ -89,7 +89,7 @@ function x264_predict_4x4_dc_armv6, export=1
      uadd8   \a2, \a2, \c2
  .endm
  
-function x264_predict_4x4_ddr_armv6, export=1
+function x264_predict_4x4_ddr_armv6
      ldr     r1, [r0, # -FDEC_STRIDE]
      ldrb    r2, [r0, # -FDEC_STRIDE-1]
      ldrb    r3, [r0, #0*FDEC_STRIDE-1]
@@ -118,7 +118,7 @@ function x264_predict_4x4_ddr_armv6, export=1
      pop     {r4-r6,pc}
  .endfunc
  
-function x264_predict_4x4_ddl_neon, export=1
+function x264_predict_4x4_ddl_neon
      sub         r0, #FDEC_STRIDE
      mov         ip, #FDEC_STRIDE
      vld1.64     {d0}, [r0], ip
@@ -137,7 +137,7 @@ function x264_predict_4x4_ddl_neon, export=1
      bx          lr
  .endfunc
  
-function x264_predict_8x8_dc_neon, export=1
+function x264_predict_8x8_dc_neon
      mov     ip, #0
      ldrd    r2, [r1, #8]
      push    {r4-r5,lr}
@@ -162,7 +162,7 @@ function x264_predict_8x8_dc_neon, export=1
  .endfunc
  
  
-function x264_predict_8x8_h_neon, export=1
+function x264_predict_8x8_h_neon
      add         r1, r1, #7
      mov         ip, #FDEC_STRIDE
      vld1.64     {d16}, [r1]
@@ -185,7 +185,7 @@ function x264_predict_8x8_h_neon, export=1
      bx          lr
  .endfunc
  
-function x264_predict_8x8c_h_neon, export=1
+function x264_predict_8x8c_h_neon
      sub         r1, r0, #1
      mov         ip, #FDEC_STRIDE
  .rept 4
@@ -197,7 +197,7 @@ function x264_predict_8x8c_h_neon, export=1
      bx          lr
  .endfunc
  
-function x264_predict_8x8c_v_neon, export=1
+function x264_predict_8x8c_v_neon
      sub         r0, r0, #FDEC_STRIDE
      mov         ip, #FDEC_STRIDE
      vld1.64     {d0}, [r0,:64], ip
@@ -208,7 +208,7 @@ function x264_predict_8x8c_v_neon, export=1
  .endfunc
  
  
-function x264_predict_16x16_dc_neon, export=1
+function x264_predict_16x16_dc_neon
      sub         r3, r0, #FDEC_STRIDE
      sub         r0, r0, #1
      vld1.64     {d0-d1}, [r3,:128]
@@ -245,7 +245,7 @@ function x264_predict_16x16_dc_neon, export=1
      bx          lr
  .endfunc
  
-function x264_predict_16x16_h_neon, export=1
+function x264_predict_16x16_h_neon
      sub         r1, r0, #1
      mov         ip, #FDEC_STRIDE
  .rept 8
@@ -259,7 +259,7 @@ function x264_predict_16x16_h_neon, export=1
      bx          lr
  .endfunc
  
-function x264_predict_16x16_v_neon, export=1
+function x264_predict_16x16_v_neon
      sub         r0, r0, #FDEC_STRIDE
      mov         ip, #FDEC_STRIDE
      vld1.64     {d0-d1}, [r0,:128], ip
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S

index 4dd7182901dc93d42ec8a1b847503215f21a19d3..0b49eb47280d0b2507797cdda3ba7fa678280ecd 100644 (file)
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -64,7 +64,7 @@ pmovmskb_byte:
  .endm
  
  // quant_2x2_dc( int16_t dct[4], int mf, int bias )
-function x264_quant_2x2_dc_neon, export=1
+function x264_quant_2x2_dc_neon
      vld1.64     {d0}, [r0,:64]
      vabs.s16    d3,  d0
      vdup.16     d2,  r2
@@ -80,7 +80,7 @@ function x264_quant_2x2_dc_neon, export=1
  .endfunc
  
  // quant_4x4_dc( int16_t dct[16], int mf, int bias )
-function x264_quant_4x4_dc_neon, export=1
+function x264_quant_4x4_dc_neon
      vld1.64     {d28-d31}, [r0,:128]
      vabs.s16    q8,  q14
      vabs.s16    q9,  q15
@@ -92,7 +92,7 @@ function x264_quant_4x4_dc_neon, export=1
  .endfunc
  
  // quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4_neon, export=1
+function x264_quant_4x4_neon
      vld1.64     {d28-d31}, [r0,:128]
      vabs.s16    q8,  q14
      vabs.s16    q9,  q15
@@ -104,7 +104,7 @@ function x264_quant_4x4_neon, export=1
  .endfunc
  
  // quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
-function x264_quant_8x8_neon, export=1
+function x264_quant_8x8_neon
      vld1.64     {d28-d31}, [r0,:128]
      vabs.s16    q8,  q14
      vabs.s16    q9,  q15
@@ -139,7 +139,7 @@ function x264_quant_8x8_neon, export=1
  
  // dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
  .macro DEQUANT size bits
-function x264_dequant_\size\()_neon, export=1
+function x264_dequant_\size\()_neon
      DEQUANT_START \bits+2, \bits
  .ifc \size, 8x8
      mov         r2,  #4
@@ -220,7 +220,7 @@ DEQUANT 4x4, 4
  DEQUANT 8x8, 6
  
  // dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
-function x264_dequant_4x4_dc_neon, export=1
+function x264_dequant_4x4_dc_neon
      DEQUANT_START 6, 6, yes
      blt         dequant_4x4_dc_rshift
  
@@ -267,7 +267,7 @@ dequant_4x4_dc_rshift:
  
  
  // int coeff_last( int16_t *l )
-function x264_coeff_last4_arm, export=1
+function x264_coeff_last4_arm
      ldrd        r2,  [r0]
      subs        r0,  r3,  #0
      movne       r0,  #2
@@ -278,7 +278,7 @@ function x264_coeff_last4_arm, export=1
  .endfunc
  
  .macro COEFF_LAST_1x size
-function x264_coeff_last\size\()_neon, export=1
+function x264_coeff_last\size\()_neon
  .if \size == 15
      sub         r0,  r0,  #2
      vld1.64     {d0-d3}, [r0]
@@ -306,7 +306,7 @@ function x264_coeff_last\size\()_neon, export=1
  COEFF_LAST_1x 15
  COEFF_LAST_1x 16
  
-function x264_coeff_last64_neon, export=1
+function x264_coeff_last64_neon
      vld1.64     {d16-d19}, [r0,:128]!
      vqmovn.u16  d16, q8
      vqmovn.u16  d17, q9
author	David Conrad <lessen42@gmail.com>
	Sat, 7 Nov 2009 17:25:18 +0000 (09:25 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Mon, 9 Nov 2009 04:21:47 +0000 (20:21 -0800)
Makefile		patch \| blob \| history
common/arm/asm.S		patch \| blob \| history
common/arm/cpu-a.S		patch \| blob \| history
common/arm/dct-a.S		patch \| blob \| history
common/arm/dct.h		patch \| blob \| history
common/arm/deblock-a.S		patch \| blob \| history
common/arm/mc-a.S		patch \| blob \| history
common/arm/mc-c.c		patch \| blob \| history
common/arm/pixel-a.S		patch \| blob \| history
common/arm/predict-a.S		patch \| blob \| history
common/arm/quant-a.S		patch \| blob \| history