1 ;******************************************************************************
2 ;* x86 optimizations for PNG decoding
4 ;* Copyright (c) 2008 Loren Merritt <lorenm@u.washington.edu>
5 ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
7 ;* This file is part of FFmpeg.
9 ;* FFmpeg is free software; you can redistribute it and/or
10 ;* modify it under the terms of the GNU Lesser General Public
11 ;* License as published by the Free Software Foundation; either
12 ;* version 2.1 of the License, or (at your option) any later version.
14 ;* FFmpeg is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;* Lesser General Public License for more details.
19 ;* You should have received a copy of the GNU Lesser General Public
20 ;* License along with FFmpeg; if not, write to the Free Software
21 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;******************************************************************************
24 %include "libavutil/x86/x86util.asm"
32 ; %1 = nr. of xmm registers used
34 cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
42 and waq, ~(mmsize*2-1)
46 mova m1, [src1q+iq+mmsize]
48 paddb m1, [src2q+iq+mmsize]
50 mova [dstq+iq+mmsize], m1
71 ; scalar loop for leftover
92 %macro ADD_PAETH_PRED_FN 1
93 cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
98 lea endq, [dstq+wq-(mmsize/2-1)]
106 shr cntrq, 2 + mmsize/16
108 lea dstq, [dstq+cntrq*(mmsize/2)]
128 %else ; !cpuflag(ssse3)
138 %endif ; cpuflag(ssse3)