1 ;*****************************************************************************
2 ;* x86-optimized functions for yadif filter
4 ;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
5 ;* Copyright (c) 2013 Daniel Kang <daniel.d.kang@gmail.com>
6 ;* Copyright (c) 2011-2013 James Darnley <james.darnley@gmail.com>
8 ;* This file is part of FFmpeg.
10 ;* FFmpeg is free software; you can redistribute it and/or
11 ;* modify it under the terms of the GNU Lesser General Public
12 ;* License as published by the Free Software Foundation; either
13 ;* version 2.1 of the License, or (at your option) any later version.
15 ;* FFmpeg is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;* Lesser General Public License for more details.
20 ;* You should have received a copy of the GNU Lesser General Public
21 ;* License along with FFmpeg; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 ;******************************************************************************
25 %include "libavutil/x86/x86util.asm"
30 pw_8000: times 8 dw 0x8000
32 pd_8000: times 4 dd 0x8000
91 movu m2, [curq+t1+%1*2]
92 movu m3, [curq+t0+%2*2]
140 ; This version of CHECK2 has 3 fewer instructions on sets older than SSE4 but I
141 ; am not sure whether it is any faster. A rewrite or refactor of the filter
142 ; code should make it possible to eliminate the move instruction at the end. It
143 ; exists to satisfy the expectation that the "score" values are in m1.
206 movu m2, [curq+t1-1*2]
207 movu m3, [curq+t0-1*2]
274 sub DWORD r4m, mmsize/4
280 cglobal yadif_filter_line_16bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
281 prefs, mrefs, parity, mode
283 cglobal yadif_filter_line_16bit, 4, 7, 8, 80, dst, prev, cur, next, w, \
284 prefs, mrefs, parity, mode
298 FILTER 1, prevq, curq
302 FILTER 0, curq, nextq