1 ;******************************************************************************
2 ;* X86 Optimized functions for Open Exr Decoder
3 ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
5 ;* reorder_pixels, predictor based on patch by John Loy
6 ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
8 ;* predictor AVX/AVX2 by Henrik Gramner
10 ;* This file is part of FFmpeg.
12 ;* FFmpeg is free software; you can redistribute it and/or
13 ;* modify it under the terms of the GNU Lesser General Public
14 ;* License as published by the Free Software Foundation; either
15 ;* version 2.1 of the License, or (at your option) any later version.
17 ;* FFmpeg is distributed in the hope that it will be useful,
18 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ;* Lesser General Public License for more details.
22 ;* You should have received a copy of the GNU Lesser General Public
23 ;* License along with FFmpeg; if not, write to the Free Software
24 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 ;******************************************************************************
27 %include "libavutil/x86/x86util.asm"
34 ;------------------------------------------------------------------------------
35 ; void ff_reorder_pixels(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
36 ;------------------------------------------------------------------------------
38 %macro REORDER_PIXELS 0
39 cglobal reorder_pixels, 3,4,3, dst, src1, size, src2
40 lea src2q, [src1q+sizeq] ; src2 = src + 2 * half_size
41 add dstq, sizeq ; dst offset by size
42 shr sizeq, 1 ; half_size
43 add src1q, sizeq ; offset src by half_size
44 neg sizeq ; size = offset for dst, src1, src2
47 mova m0, [src1q+sizeq] ; load first part
48 movu m1, [src2q+sizeq] ; load second part
49 SBUTTERFLY bw, 0, 1, 2 ; interleaved
50 mova [dstq+2*sizeq ], xm0 ; copy to dst
51 mova [dstq+2*sizeq+16], xm1
53 vperm2i128 m0, m0, m1, q0301
54 mova [dstq+2*sizeq+32], m0
64 %if HAVE_AVX2_EXTERNAL
70 ;------------------------------------------------------------------------------
71 ; void ff_predictor(uint8_t *src, ptrdiff_t size);
72 ;------------------------------------------------------------------------------
75 cglobal predictor, 2,2,5, src, size
77 vbroadcasti128 m0, [pb_80]
86 pxor m3, m0, [srcq + sizeq]
97 vextracti128 xm4, m3, 1
98 mova [srcq + sizeq], xm2
101 mova [srcq + sizeq + 16], xm2
105 mova [srcq + sizeq], m2
119 %if HAVE_AVX2_EXTERNAL