git.sesse.net Git - ffmpeg/blob - libavcodec/alpha/motion_est_mvi_asm.S

   1 /*
   2  * Alpha optimized DSP utils
   3  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  */
  19
  20 #include "regdef.h"
  21 #ifdef HAVE_AV_CONFIG_H
  22 #include "config.h"
  23 #endif
  24
  25 /* Some nicer register names.  */
  26 #define ta t10
  27 #define tb t11
  28 #define tc t12
  29 #define td AT
  30 /* Danger: these overlap with the argument list and the return value */
  31 #define te a5
  32 #define tf a4
  33 #define tg a3
  34 #define th v0
  35
  36         .set noat
  37         .set noreorder
  38         .arch pca56
  39         .text
  40
  41 /*****************************************************************************
  42  * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
  43  *
  44  * This code is written with a pca56 in mind. For ev6, one should
  45  * really take the increased latency of 3 cycles for MVI instructions
  46  * into account.
  47  *
  48  * It is important to keep the loading and first use of a register as
  49  * far apart as possible, because if a register is accessed before it
  50  * has been fetched from memory, the CPU will stall.
  51  */
  52         .align 4
  53         .globl pix_abs16x16_mvi_asm
  54         .ent pix_abs16x16_mvi_asm
  55 pix_abs16x16_mvi_asm:
  56         .frame sp, 0, ra, 0
  57         .prologue 0
  58
  59 #ifdef HAVE_GPROF
  60         lda     AT, _mcount
  61         jsr     AT, (AT), _mcount
  62 #endif
  63
  64         and     a1, 7, t0
  65         clr     v0
  66         lda     a3, 16
  67         beq     t0, $aligned
  68         .align 4
  69 $unaligned:
  70         /* Registers:
  71            line 0:
  72            t0:  left_u -> left lo -> left
  73            t1:  mid
  74            t2:  right_u -> right hi -> right
  75            t3:  ref left
  76            t4:  ref right
  77            line 1:
  78            t5:  left_u -> left lo -> left
  79            t6:  mid
  80            t7:  right_u -> right hi -> right
  81            t8:  ref left
  82            t9:  ref right
  83            temp:
  84            ta:  left hi
  85            tb:  right lo
  86            tc:  error left
  87            td:  error right  */
  88
  89         /* load line 0 */
  90         ldq_u   t0, 0(a1)       # left_u
  91         ldq_u   t1, 8(a1)       # mid
  92         ldq_u   t2, 16(a1)      # right_u
  93         ldq     t3, 0(a0)       # ref left
  94         ldq     t4, 8(a0)       # ref right
  95         addq    a0, a2, a0      # pix1
  96         addq    a1, a2, a1      # pix2
  97         /* load line 1 */
  98         ldq_u   t5, 0(a1)       # left_u
  99         ldq_u   t6, 8(a1)       # mid
 100         ldq_u   t7, 16(a1)      # right_u
 101         ldq     t8, 0(a0)       # ref left
 102         ldq     t9, 8(a0)       # ref right
 103         addq    a0, a2, a0      # pix1
 104         addq    a1, a2, a1      # pix2
 105         /* calc line 0 */
 106         extql   t0, a1, t0      # left lo
 107         extqh   t1, a1, ta      # left hi
 108         extql   t1, a1, tb      # right lo
 109         or      t0, ta, t0      # left
 110         extqh   t2, a1, t2      # right hi
 111         perr    t3, t0, tc      # error left
 112         or      t2, tb, t2      # right
 113         perr    t4, t2, td      # error right
 114         addq    v0, tc, v0      # add error left
 115         addq    v0, td, v0      # add error left
 116         /* calc line 1 */
 117         extql   t5, a1, t5      # left lo
 118         extqh   t6, a1, ta      # left hi
 119         extql   t6, a1, tb      # right lo
 120         or      t5, ta, t5      # left
 121         extqh   t7, a1, t7      # right hi
 122         perr    t8, t5, tc      # error left
 123         or      t7, tb, t7      # right
 124         perr    t9, t7, td      # error right
 125         addq    v0, tc, v0      # add error left
 126         addq    v0, td, v0      # add error left
 127         /* loop */
 128         subq    a3,  2, a3      # h -= 2
 129         bne     a3, $unaligned
 130         ret
 131
 132         .align 4
 133 $aligned:
 134         /* load line 0 */
 135         ldq     t0, 0(a1)       # left
 136         ldq     t1, 8(a1)       # right
 137         addq    a1, a2, a1      # pix2
 138         ldq     t2, 0(a0)       # ref left
 139         ldq     t3, 8(a0)       # ref right
 140         addq    a0, a2, a0      # pix1
 141         /* load line 1 */
 142         ldq     t4, 0(a1)       # left
 143         ldq     t5, 8(a1)       # right
 144         addq    a1, a2, a1      # pix2
 145         ldq     t6, 0(a0)       # ref left
 146         ldq     t7, 8(a0)       # ref right
 147         addq    a0, a2, a0      # pix1
 148         /* load line 2 */
 149         ldq     t8, 0(a1)       # left
 150         ldq     t9, 8(a1)       # right
 151         addq    a1, a2, a1      # pix2
 152         ldq     ta, 0(a0)       # ref left
 153         ldq     tb, 8(a0)       # ref right
 154         addq    a0, a2, a0      # pix1
 155         /* load line 3 */
 156         ldq     tc, 0(a1)       # left
 157         ldq     td, 8(a1)       # right
 158         addq    a1, a2, a1      # pix2
 159         ldq     te, 0(a0)       # ref left
 160         ldq     tf, 8(a0)       # ref right
 161         /* calc line 0 */
 162         perr    t0, t2, t0      # error left
 163         addq    a0, a2, a0      # pix1
 164         perr    t1, t3, t1      # error right
 165         addq    v0, t0, v0      # add error left
 166         /* calc line 1 */
 167         perr    t4, t6, t0      # error left
 168         addq    v0, t1, v0      # add error right
 169         perr    t5, t7, t1      # error right
 170         addq    v0, t0, v0      # add error left
 171         /* calc line 2 */
 172         perr    t8, ta, t0      # error left
 173         addq    v0, t1, v0      # add error right
 174         perr    t9, tb, t1      # error right
 175         addq    v0, t0, v0      # add error left
 176         /* calc line 3 */
 177         perr    tc, te, t0      # error left
 178         addq    v0, t1, v0      # add error right
 179         perr    td, tf, t1      # error right
 180         addq    v0, t0, v0      # add error left
 181         addq    v0, t1, v0      # add error right
 182         /* loop */
 183         subq    a3,  4, a3      # h -= 4
 184         bne     a3, $aligned
 185         ret
 186         .end pix_abs16x16_mvi_asm