1 ;******************************************************************************
2 ;* SIMD lossless video DSP utils
3 ;* Copyright (c) 2008 Loren Merritt
4 ;* Copyright (c) 2014 Michael Niedermayer
6 ;* This file is part of FFmpeg.
8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* FFmpeg is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;******************************************************************************
23 %include "libavutil/x86/x86util.asm"
28 pb_zzzzzzzz77777777: times 8 db -1
30 pb_ef: times 8 db 14,15
31 pb_67: times 8 db 6, 7
32 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
33 pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13
34 pb_zzzz2323zzzzabab: db -1,-1,-1,-1, 2, 3, 2, 3,-1,-1,-1,-1,10,11,10,11
35 pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
39 ; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
40 ; const uint8_t *diff, int w,
41 ; int *left, int *left_top)
43 cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
74 pmaxub m3, m5 ; median
75 paddb m3, m2 ; +residual
95 movzx r2d, byte [dstq-1]
97 movzx r2d, byte [topq-1]
110 %macro ADD_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
140 movhps [dstq+wq+8], m0
152 ; int ff_add_left_pred(uint8_t *dst, const uint8_t *src, int w, int left)
154 cglobal add_left_pred, 3,3,7, dst, src, w, left
157 mova m4, [pb_zzzz3333zzzzbbbb]
158 mova m3, [pb_zz11zz55zz99zzdd]
164 cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
166 mova m6, [pb_zzzzzzzz77777777]
167 mova m4, [pb_zzzz3333zzzzbbbb]
168 mova m3, [pb_zz11zz55zz99zzdd]
182 cglobal add_bytes, 3,4,2, dst, src, w, size
190 mova m0, [srcq + sizeq]
191 mova m1, [srcq + sizeq + mmsize]
192 paddb m0, [dstq + sizeq]
193 paddb m1, [dstq + sizeq + mmsize]
194 mova [dstq + sizeq], m0
195 mova [dstq + sizeq + mmsize], m1
205 mov sizeb, [srcq + wq]
206 add [dstq + wq], sizeb
220 %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
246 movhps [dstq+wq+8], m0
261 ; int add_left_pred_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left)
263 cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
266 mova m3, [pb_zzzz2323zzzzabab]
271 ADD_HFYU_LEFT_LOOP_INT16 a, a
274 cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
276 mova m4, [pb_zzzzzzzz67676767]
277 mova m3, [pb_zzzz2323zzzzabab]
286 ADD_HFYU_LEFT_LOOP_INT16 a, a
288 ADD_HFYU_LEFT_LOOP_INT16 u, a
290 ADD_HFYU_LEFT_LOOP_INT16 u, u