1 ;******************************************************************************
2 ;* SIMD lossless video DSP utils
3 ;* Copyright (c) 2008 Loren Merritt
4 ;* Copyright (c) 2014 Michael Niedermayer
6 ;* This file is part of FFmpeg.
8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* FFmpeg is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;******************************************************************************
23 %include "libavutil/x86/x86util.asm"
28 pb_zzzzzzzz77777777: times 8 db -1
30 pb_ef: times 8 db 14,15
31 pb_67: times 8 db 6, 7
32 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
33 pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13
34 pb_zzzz2323zzzzabab: db -1,-1,-1,-1, 2, 3, 2, 3,-1,-1,-1,-1,10,11,10,11
35 pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
39 ;------------------------------------------------------------------------------
40 ; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
41 ; const uint8_t *diff, int w,
42 ; int *left, int *left_top)
43 ;------------------------------------------------------------------------------
45 cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
76 pmaxub m3, m5 ; median
77 paddb m3, m2 ; +residual
97 movzx r2d, byte [dstq-1]
99 movzx r2d, byte [topq-1]
112 %macro ADD_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
142 movhps [dstq+wq+8], m0
154 ;------------------------------------------------------------------------------
155 ; int ff_add_left_pred(uint8_t *dst, const uint8_t *src, int w, int left)
156 ;------------------------------------------------------------------------------
158 cglobal add_left_pred, 3,3,7, dst, src, w, left
161 mova m4, [pb_zzzz3333zzzzbbbb]
162 mova m3, [pb_zz11zz55zz99zzdd]
168 cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
170 mova m6, [pb_zzzzzzzz77777777]
171 mova m4, [pb_zzzz3333zzzzbbbb]
172 mova m3, [pb_zz11zz55zz99zzdd]
185 ;------------------------------------------------------------------------------
186 ; void ff_add_bytes(uint8_t *dst, uint8_t *src, ptrdiff_t w);
187 ;------------------------------------------------------------------------------
189 cglobal add_bytes, 3,4,2, dst, src, w, size
197 mova m0, [srcq + sizeq]
198 mova m1, [srcq + sizeq + mmsize]
199 paddb m0, [dstq + sizeq]
200 paddb m1, [dstq + sizeq + mmsize]
201 mova [dstq + sizeq], m0
202 mova [dstq + sizeq + mmsize], m1
212 mov sizeb, [srcq + wq]
213 add [dstq + wq], sizeb
227 %if HAVE_AVX2_EXTERNAL
232 %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
258 movhps [dstq+wq+8], m0
273 ;---------------------------------------------------------------------------------------------
274 ; int add_left_pred_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left)
275 ;---------------------------------------------------------------------------------------------
277 cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
280 mova m3, [pb_zzzz2323zzzzabab]
285 ADD_HFYU_LEFT_LOOP_INT16 a, a
288 cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
290 mova m4, [pb_zzzzzzzz67676767]
291 mova m3, [pb_zzzz2323zzzzabab]
300 ADD_HFYU_LEFT_LOOP_INT16 a, a
302 ADD_HFYU_LEFT_LOOP_INT16 u, a
304 ADD_HFYU_LEFT_LOOP_INT16 u, u