git.sesse.net Git - ffmpeg/blob - libavcodec/alpha/motion_est_mvi_asm.S

   1 /*
   2  * Alpha optimized DSP utils
   3  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "regdef.h"
  23
  24 /* Some nicer register names.  */
  25 #define ta t10
  26 #define tb t11
  27 #define tc t12
  28 #define td AT
  29 /* Danger: these overlap with the argument list and the return value */
  30 #define te a5
  31 #define tf a4
  32 #define tg a3
  33 #define th v0
  34
  35         .set noat
  36         .set noreorder
  37         .arch pca56
  38         .text
  39
  40 /*****************************************************************************
  41  * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
  42  *
  43  * This code is written with a pca56 in mind. For ev6, one should
  44  * really take the increased latency of 3 cycles for MVI instructions
  45  * into account.
  46  *
  47  * It is important to keep the loading and first use of a register as
  48  * far apart as possible, because if a register is accessed before it
  49  * has been fetched from memory, the CPU will stall.
  50  */
  51         .align 4
  52         .globl pix_abs16x16_mvi_asm
  53         .ent pix_abs16x16_mvi_asm
  54 pix_abs16x16_mvi_asm:
  55         .frame sp, 0, ra, 0
  56         .prologue 0
  57
  58 #if CONFIG_GPROF
  59         lda     AT, _mcount
  60         jsr     AT, (AT), _mcount
  61 #endif
  62
  63         and     a2, 7, t0
  64         clr     v0
  65         beq     t0, $aligned
  66         .align 4
  67 $unaligned:
  68         /* Registers:
  69            line 0:
  70            t0:  left_u -> left lo -> left
  71            t1:  mid
  72            t2:  right_u -> right hi -> right
  73            t3:  ref left
  74            t4:  ref right
  75            line 1:
  76            t5:  left_u -> left lo -> left
  77            t6:  mid
  78            t7:  right_u -> right hi -> right
  79            t8:  ref left
  80            t9:  ref right
  81            temp:
  82            ta:  left hi
  83            tb:  right lo
  84            tc:  error left
  85            td:  error right  */
  86
  87         /* load line 0 */
  88         ldq_u   t0, 0(a2)       # left_u
  89         ldq_u   t1, 8(a2)       # mid
  90         ldq_u   t2, 16(a2)      # right_u
  91         ldq     t3, 0(a1)       # ref left
  92         ldq     t4, 8(a1)       # ref right
  93         addq    a1, a3, a1      # pix1
  94         addq    a2, a3, a2      # pix2
  95         /* load line 1 */
  96         ldq_u   t5, 0(a2)       # left_u
  97         ldq_u   t6, 8(a2)       # mid
  98         ldq_u   t7, 16(a2)      # right_u
  99         ldq     t8, 0(a1)       # ref left
 100         ldq     t9, 8(a1)       # ref right
 101         addq    a1, a3, a1      # pix1
 102         addq    a2, a3, a2      # pix2
 103         /* calc line 0 */
 104         extql   t0, a2, t0      # left lo
 105         extqh   t1, a2, ta      # left hi
 106         extql   t1, a2, tb      # right lo
 107         or      t0, ta, t0      # left
 108         extqh   t2, a2, t2      # right hi
 109         perr    t3, t0, tc      # error left
 110         or      t2, tb, t2      # right
 111         perr    t4, t2, td      # error right
 112         addq    v0, tc, v0      # add error left
 113         addq    v0, td, v0      # add error left
 114         /* calc line 1 */
 115         extql   t5, a2, t5      # left lo
 116         extqh   t6, a2, ta      # left hi
 117         extql   t6, a2, tb      # right lo
 118         or      t5, ta, t5      # left
 119         extqh   t7, a2, t7      # right hi
 120         perr    t8, t5, tc      # error left
 121         or      t7, tb, t7      # right
 122         perr    t9, t7, td      # error right
 123         addq    v0, tc, v0      # add error left
 124         addq    v0, td, v0      # add error left
 125         /* loop */
 126         subq    a4,  2, a4      # h -= 2
 127         bne     a4, $unaligned
 128         ret
 129
 130         .align 4
 131 $aligned:
 132         /* load line 0 */
 133         ldq     t0, 0(a2)       # left
 134         ldq     t1, 8(a2)       # right
 135         addq    a2, a3, a2      # pix2
 136         ldq     t2, 0(a1)       # ref left
 137         ldq     t3, 8(a1)       # ref right
 138         addq    a1, a3, a1      # pix1
 139         /* load line 1 */
 140         ldq     t4, 0(a2)       # left
 141         ldq     t5, 8(a2)       # right
 142         addq    a2, a3, a2      # pix2
 143         ldq     t6, 0(a1)       # ref left
 144         ldq     t7, 8(a1)       # ref right
 145         addq    a1, a3, a1      # pix1
 146         /* load line 2 */
 147         ldq     t8, 0(a2)       # left
 148         ldq     t9, 8(a2)       # right
 149         addq    a2, a3, a2      # pix2
 150         ldq     ta, 0(a1)       # ref left
 151         ldq     tb, 8(a1)       # ref right
 152         addq    a1, a3, a1      # pix1
 153         /* load line 3 */
 154         ldq     tc, 0(a2)       # left
 155         ldq     td, 8(a2)       # right
 156         addq    a2, a3, a2      # pix2
 157         ldq     te, 0(a1)       # ref left
 158         ldq     a0, 8(a1)       # ref right
 159         /* calc line 0 */
 160         perr    t0, t2, t0      # error left
 161         addq    a1, a3, a1      # pix1
 162         perr    t1, t3, t1      # error right
 163         addq    v0, t0, v0      # add error left
 164         /* calc line 1 */
 165         perr    t4, t6, t0      # error left
 166         addq    v0, t1, v0      # add error right
 167         perr    t5, t7, t1      # error right
 168         addq    v0, t0, v0      # add error left
 169         /* calc line 2 */
 170         perr    t8, ta, t0      # error left
 171         addq    v0, t1, v0      # add error right
 172         perr    t9, tb, t1      # error right
 173         addq    v0, t0, v0      # add error left
 174         /* calc line 3 */
 175         perr    tc, te, t0      # error left
 176         addq    v0, t1, v0      # add error right
 177         perr    td, a0, t1      # error right
 178         addq    v0, t0, v0      # add error left
 179         addq    v0, t1, v0      # add error right
 180         /* loop */
 181         subq    a4,  4, a4      # h -= 4
 182         bne     a4, $aligned
 183         ret
 184         .end pix_abs16x16_mvi_asm