1 ;*****************************************************************************
2 ;* x86-optimized functions for hflip filter
4 ;* Copyright (C) 2017 Paul B Mahol
6 ;* This file is part of FFmpeg.
8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* FFmpeg is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;*****************************************************************************
23 %include "libavutil/x86/x86util.asm"
27 pb_flip_byte: db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
28 pb_flip_short: db 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1
32 ;%1 byte or short, %2 b or w, %3 size in byte (1 for byte, 2 for short)
34 cglobal hflip_%1, 3, 5, 3, src, dst, w, r, x
35 VBROADCASTI128 m0, [pb_flip_%1]
43 and rq, 2 * mmsize - 1
51 vpermq m1, [srcq + xq - mmsize + %3], 0x4e; flip each lane at load
52 vpermq m2, [srcq + xq - 2 * mmsize + %3], 0x4e; flip each lane at load
54 movu m1, [srcq + xq - mmsize + %3]
55 movu m2, [srcq + xq - 2 * mmsize + %3]
61 movu [dstq + xq + mmsize], m2
86 %if HAVE_AVX2_EXTERNAL