git.sesse.net Git - ffmpeg/blob - libavcodec/x86/huffyuvencdsp.asm

   1 ;************************************************************************
   2 ;* SIMD-optimized HuffYUV encoding functions
   3 ;* Copyright (c) 2000, 2001 Fabrice Bellard
   4 ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5 ;*
   6 ;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
   7 ;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
   8 ;*
   9 ;* This file is part of FFmpeg.
  10 ;*
  11 ;* FFmpeg is free software; you can redistribute it and/or
  12 ;* modify it under the terms of the GNU Lesser General Public
  13 ;* License as published by the Free Software Foundation; either
  14 ;* version 2.1 of the License, or (at your option) any later version.
  15 ;*
  16 ;* FFmpeg is distributed in the hope that it will be useful,
  17 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19 ;* Lesser General Public License for more details.
  20 ;*
  21 ;* You should have received a copy of the GNU Lesser General Public
  22 ;* License along with FFmpeg; if not, write to the Free Software
  23 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24 ;******************************************************************************
  25
  26 %include "libavutil/x86/x86util.asm"
  27
  28 SECTION .text
  29
  30 %include "libavcodec/x86/huffyuvdsp_template.asm"
  31
  32 ;------------------------------------------------------------------------------
  33 ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
  34 ;                    unsigned mask, int w);
  35 ;------------------------------------------------------------------------------
  36
  37 %macro DIFF_INT16 0
  38 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
  39 %if mmsize > 8
  40     test src1q, mmsize-1
  41     jnz .unaligned
  42     test src2q, mmsize-1
  43     jnz .unaligned
  44     test dstq, mmsize-1
  45     jnz .unaligned
  46 %endif
  47     INT16_LOOP a, sub
  48 %if mmsize > 8
  49 .unaligned:
  50     INT16_LOOP u, sub
  51 %endif
  52 %endmacro
  53
  54 %if ARCH_X86_32
  55 INIT_MMX mmx
  56 DIFF_INT16
  57 %endif
  58
  59 INIT_XMM sse2
  60 DIFF_INT16
  61
  62 %if HAVE_AVX2_EXTERNAL
  63 INIT_YMM avx2
  64 DIFF_INT16
  65 %endif
  66
  67 INIT_MMX mmxext
  68 cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
  69     add      wd, wd
  70     movd    mm7, maskd
  71     SPLATW  mm7, mm7
  72     movq    mm0, [src1q]
  73     movq    mm2, [src2q]
  74     psllq   mm0, 16
  75     psllq   mm2, 16
  76     movd    mm6, [left_topq]
  77     por     mm0, mm6
  78     movd    mm6, [leftq]
  79     por     mm2, mm6
  80     xor     maskq, maskq
  81 .loop:
  82     movq    mm1, [src1q + maskq]
  83     movq    mm3, [src2q + maskq]
  84     movq    mm4, mm2
  85     psubw   mm2, mm0
  86     paddw   mm2, mm1
  87     pand    mm2, mm7
  88     movq    mm5, mm4
  89     pmaxsw  mm4, mm1
  90     pminsw  mm1, mm5
  91     pminsw  mm4, mm2
  92     pmaxsw  mm4, mm1
  93     psubw   mm3, mm4
  94     pand    mm3, mm7
  95     movq    [dstq + maskq], mm3
  96     add     maskq, 8
  97     movq    mm0, [src1q + maskq - 2]
  98     movq    mm2, [src2q + maskq - 2]
  99     cmp     maskq, wq
 100         jb .loop
 101     movzx maskd, word [src1q + wq - 2]
 102     mov [left_topq], maskd
 103     movzx maskd, word [src2q + wq - 2]
 104     mov [leftq], maskd
 105     RET