1 ;*****************************************************************************
2 ;* x86-optimized functions for yadif filter
4 ;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
5 ;* Copyright (c) 2013 Daniel Kang <daniel.d.kang@gmail.com>
6 ;* Copyright (c) 2011-2013 James Darnley <james.darnley@gmail.com>
8 ;* This file is part of FFmpeg.
10 ;* FFmpeg is free software; you can redistribute it and/or modify
11 ;* it under the terms of the GNU General Public License as published by
12 ;* the Free Software Foundation; either version 2 of the License, or
13 ;* (at your option) any later version.
15 ;* FFmpeg is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;* GNU General Public License for more details.
20 ;* You should have received a copy of the GNU General Public License along
21 ;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
22 ;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 ;******************************************************************************
25 %include "libavutil/x86/x86util.asm"
30 pw_8000: times 8 dw 0x8000
32 pd_8000: times 4 dd 0x8000
107 movu m2, [curq+t1+%1*2]
108 movu m3, [curq+t0+%2*2]
165 ; This version of CHECK2 has 3 fewer instructions on sets older than SSE4 but I
166 ; am not sure whether it is any faster. A rewrite or refactor of the filter
167 ; code should make it possible to eliminate the move intruction at the end. It
168 ; exists to satisfy the expectation that the "score" values are in m1.
231 movu m2, [curq+t1-1*2]
232 movu m3, [curq+t0-1*2]
304 sub DWORD r4m, mmsize/4
310 cglobal yadif_filter_line_16bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
311 prefs, mrefs, parity, mode
313 cglobal yadif_filter_line_16bit, 4, 7, 8, 80, dst, prev, cur, next, w, \
314 prefs, mrefs, parity, mode
328 FILTER 1, prevq, curq
332 FILTER 0, curq, nextq