1 ;******************************************************************************
2 ;* SIMD-optimized JPEG2000 DSP functions
3 ;* Copyright (c) 2014 Nicolas Bertrand
4 ;* Copyright (c) 2015 James Almer
6 ;* This file is part of FFmpeg.
8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* FFmpeg is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;******************************************************************************
23 %include "libavutil/x86/x86util.asm"
27 pf_ict0: times 8 dd 1.402
28 pf_ict1: times 8 dd 0.34413
29 pf_ict2: times 8 dd 0.71414
30 pf_ict3: times 8 dd 1.772
34 ;***********************************************************************
35 ; ff_ict_float_<opt>(float *src0, float *src1, float *src2, int csize)
36 ;***********************************************************************
38 cglobal ict_float, 4, 4, %1, src0, src1, src2, csize
61 %define ICT2 [pf_ict2]
66 %define ICT3 [pf_ict3]
73 movaps m0, [src0q+csizeq]
74 movaps m1, [src1q+csizeq]
75 movaps m2, [src2q+csizeq]
77 %if cpuflag(fma4) || cpuflag(fma3)
79 fnmaddps m5, m1, ICT1, m0
80 fmaddps m4, m2, ICT0, m0
84 fnmaddps m5, m5, ICT1, m0
85 fmaddps m4, m4, ICT0, m0
87 fmaddps m0, m1, ICT3, m0
88 fnmaddps m5, m2, ICT2, m5
111 movaps [src0q+csizeq], m4
112 movaps [src2q+csizeq], m0
113 movaps [src1q+csizeq], m5
123 %if HAVE_FMA4_EXTERNAL
130 ;***************************************************************************
131 ; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)
132 ;***************************************************************************
134 cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
143 mova m1, [src1q+csizeq]
144 mova m2, [src2q+csizeq]
145 mova m0, [src0q+csizeq]
151 mova [src1q+csizeq], m0
152 mova [src2q+csizeq], m1
153 mova [src0q+csizeq], m2
161 %if HAVE_AVX2_EXTERNAL