1 ;******************************************************************************
2 ;* x86 optimizations for PNG decoding
4 ;* Copyright (c) 2008 Loren Merritt <lorenm@u.washington.edu>
5 ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
7 ;* This file is part of Libav.
9 ;* Libav is free software; you can redistribute it and/or
10 ;* modify it under the terms of the GNU Lesser General Public
11 ;* License as published by the Free Software Foundation; either
12 ;* version 2.1 of the License, or (at your option) any later version.
14 ;* Libav is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;* Lesser General Public License for more details.
19 ;* You should have received a copy of the GNU Lesser General Public
20 ;* License along with Libav; if not, write to the Free Software
21 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;******************************************************************************
25 %include "x86util.asm"
33 ; %1 = nr. of xmm registers used
35 cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
43 and waq, ~(mmsize*2-1)
47 mova m1, [src1q+iq+mmsize]
49 paddb m1, [src2q+iq+mmsize]
51 mova [dstq+iq+mmsize], m1
72 ; scalar loop for leftover
93 %macro ADD_PAETH_PRED_FN 1
94 cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
99 lea endq, [dstq+wq-(mmsize/2-1)]
107 shr cntrq, 2 + mmsize/16
109 lea dstq, [dstq+cntrq*(mmsize/2)]
129 %else ; !cpuflag(ssse3)
139 %endif ; cpuflag(ssse3)