]> git.sesse.net Git - x264/blobdiff - common/arm/asm.S
Much faster weightp
[x264] / common / arm / asm.S
index 529fa0cef39239e0e08403450dec14bfd9f6ce4e..d16316564628e60886606d90d9b961b7f0beb62e 100644 (file)
 
 #include "config.h"
 
+#ifdef __ELF__
+#   define ELF
+#else
+#   define ELF @
+#endif
+
         .macro require8, val=1
-        .eabi_attribute 24, \val
+ELF     .eabi_attribute 24, \val
         .endm
 
         .macro preserve8, val=1
-        .eabi_attribute 25, \val
+ELF     .eabi_attribute 25, \val
         .endm
 
-        .macro function name, export=0
-.if \export
+        .macro function name
         .global \name
-.endif
-        .type   \name, %function
+ELF     .hidden \name
+ELF     .type   \name, %function
         .func   \name
 \name:
         .endm
 #endif
         .endm
 
+.macro movconst rd, val
+#ifdef HAVE_ARMV6T2
+    movw        \rd, #:lower16:\val
+.if \val >> 16
+    movt        \rd, #:upper16:\val
+.endif
+#else
+    ldr         \rd, =\val
+#endif
+.endm
+
 #define FENC_STRIDE 16
 #define FDEC_STRIDE 32
 
     vmax.s16    \d1, \s1, \s2
 .endif
 .endm
+
+.macro TRANSPOSE8x8 r0 r1 r2 r3 r4 r5 r6 r7
+    vtrn.32         \r0, \r4
+    vtrn.32         \r1, \r5
+    vtrn.32         \r2, \r6
+    vtrn.32         \r3, \r7
+    vtrn.16         \r0, \r2
+    vtrn.16         \r1, \r3
+    vtrn.16         \r4, \r6
+    vtrn.16         \r5, \r7
+    vtrn.8          \r0, \r1
+    vtrn.8          \r2, \r3
+    vtrn.8          \r4, \r5
+    vtrn.8          \r6, \r7
+.endm
+
+.macro TRANSPOSE4x4 r0 r1 r2 r3
+    vtrn.16         \r0, \r2
+    vtrn.16         \r1, \r3
+    vtrn.8          \r0, \r1
+    vtrn.8          \r2, \r3
+.endm
+
+.macro TRANSPOSE4x4_16  d0 d1 d2 d3
+    vtrn.32     \d0, \d2
+    vtrn.32     \d1, \d3
+    vtrn.16     \d0, \d1
+    vtrn.16     \d2, \d3
+.endm