]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/alpha/dsputil_alpha_asm.S
* Ogg/Vorbis patch by Mark Hills
[ffmpeg] / libavcodec / alpha / dsputil_alpha_asm.S
index 9e2476de1a3a53d3a4d1ae5caecea0dcd01b244e..7ec6757d75c3fa01b7a2db76395f900b76854954 100644 (file)
@@ -23,6 +23,9 @@
  */
 
 #include "regdef.h"
+#ifdef HAVE_AV_CONFIG_H        
+#include "config.h"
+#endif
 
 /* Some nicer register names.  */
 #define ta t10
         .arch pca56
         .text
 
+/************************************************************************
+ * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ *                         int line_size, int h)
+ */
+        .align 6
+        .globl put_pixels_axp_asm
+        .ent put_pixels_axp_asm
+put_pixels_axp_asm:
+        .frame sp, 0, ra
+        .prologue 0
+
+#ifdef HAVE_GPROF
+        lda     AT, _mcount
+        jsr     AT, (AT), _mcount
+#endif
+
+        and     a1, 7, t0
+        beq     t0, $aligned
+
+        .align 4
+$unaligned:
+        ldq_u   t0, 0(a1)
+        ldq_u   t1, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t2, 0(a1)
+        ldq_u   t3, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+       ldq_u   t4, 0(a1)
+        ldq_u   t5, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t6, 0(a1)
+        ldq_u   t7, 8(a1)
+        extql   t0, a1, t0
+        addq    a1, a2, a1
+
+        extqh   t1, a1, t1
+        addq    a0, a2, t8
+        extql   t2, a1, t2
+        addq    t8, a2, t9
+
+        extqh   t3, a1, t3
+        addq    t9, a2, ta
+        extql   t4, a1, t4
+        or      t0, t1, t0
+
+        extqh   t5, a1, t5
+        or      t2, t3, t2
+        extql   t6, a1, t6
+        or      t4, t5, t4
+
+        extqh   t7, a1, t7
+        or      t6, t7, t6
+        stq     t0, 0(a0)
+        stq     t2, 0(t8)
+
+        stq     t4, 0(t9)
+        subq    a3, 4, a3
+        stq     t6, 0(ta)
+        addq    ta, a2, a0
+
+        bne     a3, $unaligned
+        ret
+
+        .align 4
+$aligned:
+        ldq     t0, 0(a1)
+        addq    a1, a2, a1
+        ldq     t1, 0(a1)
+        addq    a1, a2, a1
+
+        ldq     t2, 0(a1)
+        addq    a1, a2, a1
+        ldq     t3, 0(a1)
+
+       addq    a0, a2, t4
+       addq    a1, a2, a1
+       addq    t4, a2, t5
+       subq    a3, 4, a3
+
+       stq     t0, 0(a0)
+       addq    t5, a2, t6
+       stq     t1, 0(t4)
+       addq    t6, a2, a0
+
+       stq     t2, 0(t5)
+       stq     t3, 0(t6)
+       
+       bne     a3, $aligned
+        ret
+        .end put_pixels_axp_asm
+
 /************************************************************************
  * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, 
  *                                 int line_size)
@@ -51,6 +151,11 @@ put_pixels_clamped_mvi_asm:
         .frame sp, 0, ra
         .prologue 0
 
+#ifdef HAVE_GPROF
+        lda     AT, _mcount
+        jsr     AT, (AT), _mcount
+#endif
+
         lda     t8, -1
         lda     t9, 8           # loop counter
         zap     t8, 0xaa, t8    # 00ff00ff00ff00ff
@@ -101,6 +206,11 @@ add_pixels_clamped_mvi_asm:
         .frame sp, 0, ra
         .prologue 0
 
+#ifdef HAVE_GPROF
+        lda     AT, _mcount
+        jsr     AT, (AT), _mcount
+#endif
+
         lda     t1, -1
         lda     th, 8
         zap     t1, 0x33, tg