]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/arm/mdct_neon.S
ARM: NEON H264 8x8 IDCT
[ffmpeg] / libavcodec / arm / mdct_neon.S
index 6b6c4256963a919eea2bb1bbc9659917259760bc..fcf802275fb91865492b1083f8c1627ab941b807 100644 (file)
@@ -21,6 +21,8 @@
 
 #include "asm.S"
 
+        preserve8
+
         .text
 
 #define ff_fft_calc_neon X(ff_fft_calc_neon)
@@ -29,8 +31,8 @@ function ff_imdct_half_neon, export=1
         push            {r4-r8,lr}
 
         mov             r12, #1
-        ldr             lr,  [r0, #28]          @ mdct_bits
-        ldr             r4,  [r0, #32]          @ tcos
+        ldr             lr,  [r0, #20]          @ mdct_bits
+        ldr             r4,  [r0, #24]          @ tcos
         ldr             r3,  [r0, #8]           @ revtab
         lsl             r12, r12, lr            @ n  = 1 << nbits
         lsr             lr,  r12, #2            @ n4 = n >> 2
@@ -74,8 +76,8 @@ function ff_imdct_half_neon, export=1
         bl              ff_fft_calc_neon
 
         mov             r12, #1
-        ldr             lr,  [r4, #28]          @ mdct_bits
-        ldr             r4,  [r4, #32]          @ tcos
+        ldr             lr,  [r4, #20]          @ mdct_bits
+        ldr             r4,  [r4, #24]          @ tcos
         lsl             r12, r12, lr            @ n  = 1 << nbits
         lsr             lr,  r12, #3            @ n8 = n >> 3
 
@@ -120,12 +122,12 @@ function ff_imdct_half_neon, export=1
         vst2.32         {d5,d7},  [r8,:128]
 
         pop             {r4-r8,pc}
-.endfunc
+endfunc
 
 function ff_imdct_calc_neon, export=1
         push            {r4-r6,lr}
 
-        ldr             r3,  [r0, #28]
+        ldr             r3,  [r0, #20]
         mov             r4,  #1
         mov             r5,  r1
         lsl             r4,  r4,  r3
@@ -156,14 +158,14 @@ function ff_imdct_calc_neon, export=1
         bgt             1b
 
         pop             {r4-r6,pc}
-.endfunc
+endfunc
 
 function ff_mdct_calc_neon, export=1
         push            {r4-r10,lr}
 
         mov             r12, #1
-        ldr             lr,  [r0, #28]          @ mdct_bits
-        ldr             r4,  [r0, #32]          @ tcos
+        ldr             lr,  [r0, #20]          @ mdct_bits
+        ldr             r4,  [r0, #24]          @ tcos
         ldr             r3,  [r0, #8]           @ revtab
         lsl             lr,  r12, lr            @ n  = 1 << nbits
         add             r7,  r2,  lr            @ in4u
@@ -251,8 +253,8 @@ function ff_mdct_calc_neon, export=1
         bl              ff_fft_calc_neon
 
         mov             r12, #1
-        ldr             lr,  [r4, #28]          @ mdct_bits
-        ldr             r4,  [r4, #32]          @ tcos
+        ldr             lr,  [r4, #20]          @ mdct_bits
+        ldr             r4,  [r4, #24]          @ tcos
         lsl             r12, r12, lr            @ n  = 1 << nbits
         lsr             lr,  r12, #3            @ n8 = n >> 3
 
@@ -298,4 +300,4 @@ function ff_mdct_calc_neon, export=1
         vst2.32         {d5,d7},  [r8,:128]
 
         pop             {r4-r10,pc}
-.endfunc
+endfunc