1 ;*****************************************************************************
2 ;* x86-optimized functions for yadif filter
4 ;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
5 ;* Copyright (c) 2013 Daniel Kang <daniel.d.kang@gmail.com>
6 ;* Copyright (c) 2011-2013 James Darnley <james.darnley@gmail.com>
8 ;* This file is part of FFmpeg.
10 ;* FFmpeg is free software; you can redistribute it and/or
11 ;* modify it under the terms of the GNU Lesser General Public
12 ;* License as published by the Free Software Foundation; either
13 ;* version 2.1 of the License, or (at your option) any later version.
15 ;* FFmpeg is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;* Lesser General Public License for more details.
20 ;* You should have received a copy of the GNU Lesser General Public
21 ;* License along with FFmpeg; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 ;******************************************************************************
25 %include "libavutil/x86/x86util.asm"
30 pw_8000: times 8 dw 0x8000
32 pd_8000: times 4 dd 0x8000
67 movu m2, [curq+t1+%1*2]
68 movu m3, [curq+t0+%2*2]
116 ; This version of CHECK2 has 3 fewer instructions on sets older than SSE4 but I
117 ; am not sure whether it is any faster. A rewrite or refactor of the filter
118 ; code should make it possible to eliminate the move instruction at the end. It
119 ; exists to satisfy the expectation that the "score" values are in m1.
182 movu m2, [curq+t1-1*2]
183 movu m3, [curq+t0-1*2]
250 sub DWORD r4m, mmsize/4
256 cglobal yadif_filter_line_16bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
257 prefs, mrefs, parity, mode
259 cglobal yadif_filter_line_16bit, 4, 7, 8, 80, dst, prev, cur, next, w, \
260 prefs, mrefs, parity, mode
274 FILTER 1, prevq, curq
278 FILTER 0, curq, nextq