1 ;*****************************************************************************
2 ;* x86-optimized functions for bwdif filter
4 ;* Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de>
6 ;* Based on yadif simd code
7 ;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
8 ;* 2013 Daniel Kang <daniel.d.kang@gmail.com>
10 ;* This file is part of FFmpeg.
12 ;* FFmpeg is free software; you can redistribute it and/or
13 ;* modify it under the terms of the GNU Lesser General Public
14 ;* License as published by the Free Software Foundation; either
15 ;* version 2.1 of the License, or (at your option) any later version.
17 ;* FFmpeg is distributed in the hope that it will be useful,
18 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ;* Lesser General Public License for more details.
22 ;* You should have received a copy of the GNU Lesser General Public
23 ;* License along with FFmpeg; if not, write to the Free Software
24 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 ;******************************************************************************
27 %include "libavutil/x86/x86util.asm"
31 pw_coefhf: times 4 dw 1016, 5570
32 pw_coefhf1: times 8 dw -3801
33 pw_coefsp: times 4 dw 5077, -981
34 pw_splfdif: times 4 dw -768, 768
60 LOAD%4 m0, [curq+t0*%5]
61 LOAD%4 m1, [curq+t1*%5]
70 LOAD%4 m3, [prevq+t0*%5]
71 LOAD%4 m4, [prevq+t1*%5]
79 LOAD%4 m3, [nextq+t0*%5]
80 LOAD%4 m4, [nextq+t1*%5]
88 LOAD%4 m3, [%2+t0*2*%5]
89 LOAD%4 m4, [%3+t0*2*%5]
90 LOAD%4 m5, [%2+t1*2*%5]
91 LOAD%4 m6, [%3+t1*2*%5]
122 LOAD%4 m2, [%2+t0*4*%5]
123 LOAD%4 m3, [%3+t0*4*%5]
124 LOAD%4 m4, [%2+t1*4*%5]
125 LOAD%4 m5, [%3+t1*4*%5]
132 pmaddwd m2, [pw_coefhf]
133 pmaddwd m3, [pw_coefhf]
136 pmullw m4, [pw_coefhf1]
137 pmulhw m6, [pw_coefhf1]
149 LOAD%4 m5, [curq+t2*%5]
150 LOAD%4 m6, [curq+t3*%5]
154 LOAD%4 m5, [curq+t0*%5]
155 LOAD%4 m6, [curq+t1*%5]
168 mova m5, [pw_splfdif]
172 paddw m5, [pw_coefsp]
173 paddw m7, [pw_coefsp]
199 sub DWORD wm, mmsize/2
205 movsxd r5, DWORD prefsm
206 movsxd r6, DWORD mrefsm
207 movsxd r7, DWORD prefs3m
208 movsxd r8, DWORD mrefs3m
209 DECLARE_REG_TMP 5, 6, 7, 8
221 FILTER 1, prevq, curq, %1, %2
224 FILTER 0, curq, nextq, %1, %2
231 cglobal bwdif_filter_line, 4, 9, 12, 0, dst, prev, cur, next, w, prefs, \
232 mrefs, prefs2, mrefs2, prefs3, mrefs3, \
233 prefs4, mrefs4, parity, clip_max
235 cglobal bwdif_filter_line, 4, 6, 8, 64, dst, prev, cur, next, w, prefs, \
236 mrefs, prefs2, mrefs2, prefs3, mrefs3, \
237 prefs4, mrefs4, parity, clip_max
239 %define STEP mmsize/2
243 cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \
244 prefs, mrefs, prefs2, mrefs2, \
245 prefs3, mrefs3, prefs4, \
246 mrefs4, parity, clip_max
247 movd m12, DWORD clip_maxm
250 cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
251 prefs, mrefs, prefs2, mrefs2, \
252 prefs3, mrefs3, prefs4, \
253 mrefs4, parity, clip_max
255 movd m0, DWORD clip_maxm