]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/arm/jrevdct_arm.S
arm: asm decode_block_coeffs_internal is vp8 specific
[ffmpeg] / libavcodec / arm / jrevdct_arm.S
index f7fd5c7bde29e459f684c0865dcc6f109a1c6ca3..f951e2af34ed925904a8bca5c44ce6d859dd67d6 100644 (file)
@@ -25,7 +25,7 @@
 
 */
 
-#include "asm.S"
+#include "libavutil/arm/asm.S"
 
 #define FIX_0_298631336 2446
 #define FIX_0_541196100 4433
@@ -56,9 +56,7 @@
 #define FIX_0xFFFF_ID          48
 
 function ff_j_rev_dct_arm, export=1
-        stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
-        sub sp, sp, #4                  @ reserve some space on the stack
-        str r0, [ sp ]                  @ save the DCT pointer to the stack
+        push {r0, r4 - r11, lr}
 
         mov lr, r0                      @ lr = pointer to the current row
         mov r12, #8                     @ r12 = row-counter
@@ -68,7 +66,7 @@ row_loop:
         ldrsh r2, [lr, # 2]             @ r2 = 'd2'
 
         @ Optimization for row that have all items except the first set to 0
-        @ (this works as the DCTELEMS are always 4-byte aligned)
+        @ (this works as the int16_t are always 4-byte aligned)
         ldr r5, [lr, # 0]
         ldr r6, [lr, # 4]
         ldr r3, [lr, # 8]
@@ -99,7 +97,7 @@ row_loop:
         add r4, r6, r3, lsl #13             @ r4 = tmp11
         rsb r3, r6, r3, lsl #13             @ r3 = tmp12
 
-        stmdb   sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+        push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11
 
         ldrsh r3, [lr, #10]             @ r3 = 'd3'
         ldrsh r5, [lr, #12]             @ r5 = 'd5'
@@ -133,8 +131,8 @@ row_loop:
         add r3, r3, r4                  @ r3 = tmp2
         add r1, r1, r6                  @ r1 = tmp3
 
-        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
-                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
+                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
 
         @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
         add r8, r0, r1
@@ -208,7 +206,7 @@ end_of_row_loop:
 
 start_column_loop:
         @ Start of column loop
-        ldr lr, [ sp ]
+        pop {lr}
         mov r12, #8
 column_loop:
         ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
@@ -242,7 +240,7 @@ column_loop:
         orrs r10, r9, r10
         beq empty_odd_column
 
-        stmdb   sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+        push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11
 
         add r0, r3, r5                  @ r0 = 'z2'
         add r2, r1, r7                  @ r2 = 'z1'
@@ -272,8 +270,8 @@ column_loop:
         add r3, r3, r4                  @ r3 = tmp2
         add r1, r1, r6                  @ r1 = tmp3
 
-        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
-                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
+                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
 
         @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
         add r8, r0, r1
@@ -365,8 +363,7 @@ empty_odd_column:
 
 the_end:
         @ The end....
-        add sp, sp, #4
-        ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
+        pop {r4 - r11, pc}
 endfunc
 
 const const_array