1 ;*****************************************************************************
2 ;* x86-optimized functions for v360 filter
4 ;* This file is part of FFmpeg.
6 ;* FFmpeg is free software; you can redistribute it and/or
7 ;* modify it under the terms of the GNU Lesser General Public
8 ;* License as published by the Free Software Foundation; either
9 ;* version 2.1 of the License, or (at your option) any later version.
11 ;* FFmpeg is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;* Lesser General Public License for more details.
16 ;* You should have received a copy of the GNU Lesser General Public
17 ;* License along with FFmpeg; if not, write to the Free Software
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 ;******************************************************************************
22 %include "libavutil/x86/x86util.asm"
24 %if HAVE_AVX2_EXTERNAL
28 pb_mask: db 0,4,8,12,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
29 pw_mask: db 0,1,4, 5, 8, 9,12,13,-1,-1,-1,-1,-1,-1,-1,-1
30 pd_255: times 4 dd 255
31 pd_65535: times 4 dd 65535
35 ; void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
36 ; const uint16_t *u, const uint16_t *v, const int16_t *ker);
39 cglobal remap1_8bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x
40 movsxdifnidn widthq, widthd
42 movd xm0, in_linesized
44 VBROADCASTI128 m3, [pb_mask]
48 pmovsxwd m1, [vq + xq * 2]
49 pmovsxwd m2, [uq + xq * 2]
54 vpgatherdd m5, [srcq + m1], m2
56 vextracti128 xm2, m1, 1
66 cglobal remap1_16bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x
67 movsxdifnidn widthq, widthd
69 movd xm0, in_linesized
71 VBROADCASTI128 m3, [pw_mask]
75 pmovsxwd m1, [vq + xq * 2]
76 pmovsxwd m2, [uq + xq * 2]
82 vpgatherdd m5, [srcq + m1], m2
84 vextracti128 xm2, m1, 1
86 movq [dstq+xq*2+8], xm2
94 cglobal remap2_8bit_line, 7, 8, 8, dst, width, src, in_linesize, u, v, ker, x
95 movsxdifnidn widthq, widthd
96 movd xm0, in_linesized
98 DEFINE_ARGS dst, width, src, x, u, v, ker
103 vpbroadcastd m6, [pd_255]
106 pmovsxwd m1, [kerq + xq * 8]
107 pmovsxwd m2, [vq + xq * 8]
108 pmovsxwd m3, [uq + xq * 8]
113 vpgatherdd m2, [srcq + m4], m3
119 vextracti128 xm2, m1, 1
121 pextrb [dstq+xq], xm1, 0
122 pextrb [dstq+xq+1], xm2, 0
130 cglobal remap2_16bit_line, 7, 8, 8, dst, width, src, in_linesize, u, v, ker, x
131 movsxdifnidn widthq, widthd
132 movd xm0, in_linesized
134 DEFINE_ARGS dst, width, src, x, u, v, ker
139 vpbroadcastd m6, [pd_65535]
142 pmovsxwd m1, [kerq + xq * 8]
143 pmovsxwd m2, [vq + xq * 8]
144 pmovsxwd m3, [uq + xq * 8]
150 vpgatherdd m2, [srcq + m4], m3
156 vextracti128 xm2, m1, 1
158 pextrw [dstq+xq*2], xm1, 0
159 pextrw [dstq+xq*2+2], xm2, 0
169 cglobal remap3_8bit_line, 7, 11, 8, dst, width, src, in_linesize, u, v, ker, x, y, tmp, z
170 movsxdifnidn widthq, widthd
174 movd xm0, in_linesized
177 vpbroadcastd m6, [pd_255]
180 pmovsxwd m1, [kerq + yq]
181 pmovsxwd m2, [vq + yq]
182 pmovsxwd m3, [uq + yq]
187 vpgatherdd m2, [srcq + m4], m3
191 movzx tmpq, word [vq + yq + 16]
192 imul tmpq, in_linesizeq
193 movzx zq, word [uq + yq + 16]
195 movzx zq, byte [srcq + tmpq]
196 movzx tmpq, word [kerq + yq + 16]
203 pextrb [dstq+xq], xm2, 0
212 cglobal remap4_8bit_line, 7, 9, 11, dst, width, src, in_linesize, u, v, ker, x, y
213 movsxdifnidn widthq, widthd
216 movd xm0, in_linesized
219 vpbroadcastd m6, [pd_255]
222 pmovsxwd m1, [kerq + yq]
223 pmovsxwd m5, [kerq + yq + 16]
224 pmovsxwd m2, [vq + yq]
225 pmovsxwd m8, [vq + yq + 16]
226 pmovsxwd m3, [uq + yq]
227 pmovsxwd m9, [uq + yq + 16]
234 vpgatherdd m2, [srcq + m4], m3
236 vpgatherdd m4, [srcq + m10], m3
247 pextrb [dstq+xq], xm2, 0