git.sesse.net Git - x264/blob - common/amd64/predict-a.asm

   1 ;*****************************************************************************
   2 ;* predict-a.asm: h264 encoder library
   3 ;*****************************************************************************
   4 ;* Copyright (C) 2005 x264 project
   5 ;*
   6 ;* This program is free software; you can redistribute it and/or modify
   7 ;* it under the terms of the GNU General Public License as published by
   8 ;* the Free Software Foundation; either version 2 of the License, or
   9 ;* (at your option) any later version.
  10 ;*
  11 ;* This program is distributed in the hope that it will be useful,
  12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 ;* GNU General Public License for more details.
  15 ;*
  16 ;* You should have received a copy of the GNU General Public License
  17 ;* along with this program; if not, write to the Free Software
  18 ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  19 ;*****************************************************************************
  20
  21 BITS 64
  22
  23 ;=============================================================================
  24 ; Macros and other preprocessor constants
  25 ;=============================================================================
  26
  27 %macro cglobal 1
  28     %ifdef PREFIX
  29         global _%1
  30         %define %1 _%1
  31     %else
  32         global %1
  33     %endif
  34 %endmacro
  35
  36 ;=============================================================================
  37 ; Macros
  38 ;=============================================================================
  39
  40 %macro SAVE_0_1 1
  41     movq        [%1]         , mm0
  42     movq        [%1 + 8]     , mm1
  43 %endmacro
  44
  45 ;=============================================================================
  46 ; Code
  47 ;=============================================================================
  48
  49 SECTION .text
  50
  51 cglobal predict_8x8c_v_mmx
  52 cglobal predict_16x16_v_mmx
  53
  54 ;-----------------------------------------------------------------------------
  55 ;
  56 ; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
  57 ;
  58 ;-----------------------------------------------------------------------------
  59
  60 ALIGN 16
  61 predict_8x8c_v_mmx :
  62     movsxd      rcx, esi        ; i_stride
  63
  64     sub         rdi             , rcx               ; esi <-- line -1
  65
  66     movq        mm0             , [rdi]
  67     movq        [rdi + rcx]     , mm0               ; 0
  68     movq        [rdi + 2 * rcx] , mm0               ; 1
  69     movq        [rdi + 4 * rcx] , mm0               ; 3
  70     movq        [rdi + 8 * rcx] , mm0               ; 7
  71     add         rdi             , rcx               ; esi <-- line 0
  72     movq        [rdi + 2 * rcx] , mm0               ; 2
  73     movq        [rdi + 4 * rcx] , mm0               ; 4
  74     lea         rdi             , [rdi + 4 * rcx]   ; esi <-- line 4
  75     movq        [rdi + rcx]     , mm0               ; 5
  76     movq        [rdi + 2 * rcx] , mm0               ; 6
  77
  78     ret
  79
  80 ;-----------------------------------------------------------------------------
  81 ;
  82 ; void predict_16x16_v_mmx( uint8_t *src, int i_stride )
  83 ;
  84 ;-----------------------------------------------------------------------------
  85
  86 ALIGN 16
  87 predict_16x16_v_mmx :
  88     movsxd      rcx, esi                ; i_stride
  89
  90     sub         rdi, rcx                ; esi <-- line -1
  91
  92     movq        mm0, [rdi]
  93     movq        mm1, [rdi + 8]
  94     lea         rax, [rcx + 2 * rcx]    ; rax <-- 3* stride
  95
  96     SAVE_0_1    (rdi + rcx)             ; 0
  97     SAVE_0_1    (rdi + 2 * rcx)         ; 1
  98     SAVE_0_1    (rdi + rax)             ; 2
  99     SAVE_0_1    (rdi + 4 * rcx)         ; 3
 100     SAVE_0_1    (rdi + 2 * rax)         ; 5
 101     SAVE_0_1    (rdi + 8 * rcx)         ; 7
 102     SAVE_0_1    (rdi + 4 * rax)         ; 11
 103     add         rdi, rcx                ; esi <-- line 0
 104     SAVE_0_1    (rdi + 4 * rcx)         ; 4
 105     SAVE_0_1    (rdi + 2 * rax)         ; 6
 106     SAVE_0_1    (rdi + 8 * rcx)         ; 8
 107     SAVE_0_1    (rdi + 4 * rax)         ; 12
 108     lea         rdi, [rdi + 8 * rcx]    ; esi <-- line 8
 109     SAVE_0_1    (rdi + rcx)             ; 9
 110     SAVE_0_1    (rdi + 2 * rcx)         ; 10
 111     lea         rdi, [rdi + 4 * rcx]    ; esi <-- line 12
 112     SAVE_0_1    (rdi + rcx)             ; 13
 113     SAVE_0_1    (rdi + 2 * rcx)         ; 14
 114     SAVE_0_1    (rdi + rax)             ; 15
 115
 116     ret