1 ;************************************************************************
2 ;* SIMD-optimized HuffYUV encoding functions
3 ;* Copyright (c) 2000, 2001 Fabrice Bellard
4 ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 ;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7 ;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
9 ;* This file is part of FFmpeg.
11 ;* FFmpeg is free software; you can redistribute it and/or
12 ;* modify it under the terms of the GNU Lesser General Public
13 ;* License as published by the Free Software Foundation; either
14 ;* version 2.1 of the License, or (at your option) any later version.
16 ;* FFmpeg is distributed in the hope that it will be useful,
17 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;* Lesser General Public License for more details.
21 ;* You should have received a copy of the GNU Lesser General Public
22 ;* License along with FFmpeg; if not, write to the Free Software
23 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 ;******************************************************************************
26 %include "libavutil/x86/x86util.asm"
30 ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
31 ; unsigned mask, int w);
32 %macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
67 mov%1 m2, [srcq+wq+mmsize]
68 mov%1 m3, [dstq+wq+mmsize]
72 mov%1 m2, [src1q+wq+mmsize]
73 mov%1 m3, [src2q+wq+mmsize]
80 mov%1 [dstq+wq+mmsize], m2
89 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
94 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
106 cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
114 movd mm6, [left_topq]
120 movq mm1, [src1q + maskq]
121 movq mm3, [src2q + maskq]
133 movq [dstq + maskq], mm3
135 movq mm0, [src1q + maskq - 2]
136 movq mm2, [src2q + maskq - 2]
139 movzx maskd, word [src1q + wq - 2]
140 mov [left_topq], maskd
141 movzx maskd, word [src2q + wq - 2]