arm: dsputil: Add a bunch of missing #includes

[ffmpeg] / libavcodec / arm / simple_idct_neon.S
diff --git a/libavcodec/arm/simple_idct_neon.S b/libavcodec/arm/simple_idct_neon.S

index cbed9eefe45ede8507c2a9be2375cb7d94d60ef9..a1cde8d80a13a369ed72c5459006d748f9ffaa6a 100644 (file)
--- a/libavcodec/arm/simple_idct_neon.S
+++ b/libavcodec/arm/simple_idct_neon.S
@@ -23,7 +23,7 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
-#include "asm.S"
+#include "libavutil/arm/asm.S"
  
  #define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  #define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
@@ -159,8 +159,8 @@ function idct_col4_neon
          vmull.s16       q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
          vld1.64         {d8}, [r2,:64], ip /* d5 = col[3] */
  
-        ldrd            r4,  [r2]
-        ldrd            r6,  [r2, #16]
+        ldrd            r4,  r5,  [r2]
+        ldrd            r6,  r7,  [r2, #16]
          orrs            r4,  r4,  r5
  
          idct_col4_top
@@ -176,7 +176,7 @@ function idct_col4_neon
          vadd.i32        q14, q14, q7
  
  1:      orrs            r6,  r6,  r7
-        ldrd            r4,  [r2, #16]
+        ldrd            r4,  r5,  [r2, #16]
          it              eq
          addeq           r2,  r2,  #16
          beq             2f
@@ -188,7 +188,7 @@ function idct_col4_neon
          vmlal.s16       q6,  d5,  w3    /* q6  += W3 * col[5] */
  
  2:      orrs            r4,  r4,  r5
-        ldrd            r4,  [r2, #16]
+        ldrd            r4,  r5,  [r2, #16]
          it              eq
          addeq           r2,  r2,  #16
          beq             3f
@@ -243,10 +243,9 @@ function idct_col4_st8_neon
          bx              lr
  endfunc
  
-        .section .rodata
-        .align 4
-idct_coeff_neon:
+const   idct_coeff_neon, align=4
          .short W1, W2, W3, W4, W5, W6, W7, W4c
+endconst
  
          .macro idct_start data
          push            {r4-r7, lr}
@@ -262,7 +261,7 @@ idct_coeff_neon:
          pop             {r4-r7, pc}
          .endm
  
-/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, int16_t *data); */
  function ff_simple_idct_put_neon, export=1
          idct_start      r2
  
@@ -317,7 +316,7 @@ function idct_col4_add8_neon
          bx              lr
  endfunc
  
-/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, int16_t *data); */
  function ff_simple_idct_add_neon, export=1
          idct_start      r2
  
@@ -356,7 +355,7 @@ function idct_col4_st16_neon
          bx              lr
  endfunc
  
-/* void ff_simple_idct_neon(DCTELEM *data); */
+/* void ff_simple_idct_neon(int16_t *data); */
  function ff_simple_idct_neon, export=1
          idct_start      r0