1 ;******************************************************************************
2 ;* x86-optimized yuv2yuvX
3 ;* Copyright 2020 Google LLC
4 ;* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
6 ;* This file is part of FFmpeg.
8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* FFmpeg is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;******************************************************************************
23 %include "libavutil/x86/x86util.asm"
27 ;-----------------------------------------------------------------------------
30 ; void ff_yuv2yuvX_<opt>(const int16_t *filter, int filterSize,
31 ; int srcOffset, uint8_t *dest, int dstW,
32 ; const uint8_t *dither, int offset);
34 ;-----------------------------------------------------------------------------
36 %macro YUV2YUVX_FUNC 0
37 cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
45 movsxdifnidn dstWq, dstWd
46 movsxdifnidn offsetq, offsetd
47 movsxdifnidn srcq, srcd
49 vpbroadcastq m3, [ditherq]
66 mov filterSizeq, filterq
67 mov srcq, [filterSizeq]
81 vpbroadcastq m0, [filterSizeq + 8]
83 movddup m0, [filterSizeq + 8]
85 mova m0, [filterSizeq + 8]
87 pmulhw m2, m0, [srcq + offsetq * 2]
88 pmulhw m5, m0, [srcq + offsetq * 2 + mmsize]
92 pmulhw m2, m0, [srcq + offsetq * 2 + 2 * mmsize]
93 pmulhw m5, m0, [srcq + offsetq * 2 + 3 * mmsize]
98 mov srcq, [filterSizeq]
116 movr [destq + offsetq], m3
118 movr [destq + offsetq + mmsize], m6
120 add offsetq, mmsize * unroll
121 mov filterSizeq, filterq
133 %if HAVE_AVX2_EXTERNAL