2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #ifndef AVUTIL_FLOAT_DSP_H
20 #define AVUTIL_FLOAT_DSP_H
24 typedef struct AVFloatDSPContext {
26 * Calculate the entry wise product of two vectors of floats and store the result in
29 * @param dst output vector
30 * constraints: 32-byte aligned
31 * @param src0 first input vector
32 * constraints: 32-byte aligned
33 * @param src1 second input vector
34 * constraints: 32-byte aligned
35 * @param len number of elements in the input
36 * constraints: multiple of 16
38 void (*vector_fmul)(float *dst, const float *src0, const float *src1,
42 * Multiply a vector of floats by a scalar float and add to
43 * destination vector. Source and destination vectors must
44 * overlap exactly or not at all.
46 * @param dst result vector
47 * constraints: 32-byte aligned
48 * @param src input vector
49 * constraints: 32-byte aligned
50 * @param mul scalar value
51 * @param len length of vector
52 * constraints: multiple of 16
54 void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
58 * Multiply a vector of doubles by a scalar double and add to
59 * destination vector. Source and destination vectors must
60 * overlap exactly or not at all.
62 * @param dst result vector
63 * constraints: 32-byte aligned
64 * @param src input vector
65 * constraints: 32-byte aligned
66 * @param mul scalar value
67 * @param len length of vector
68 * constraints: multiple of 16
70 void (*vector_dmac_scalar)(double *dst, const double *src, double mul,
74 * Multiply a vector of floats by a scalar float. Source and
75 * destination vectors must overlap exactly or not at all.
77 * @param dst result vector
78 * constraints: 16-byte aligned
79 * @param src input vector
80 * constraints: 16-byte aligned
81 * @param mul scalar value
82 * @param len length of vector
83 * constraints: multiple of 4
85 void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
89 * Multiply a vector of double by a scalar double. Source and
90 * destination vectors must overlap exactly or not at all.
92 * @param dst result vector
93 * constraints: 32-byte aligned
94 * @param src input vector
95 * constraints: 32-byte aligned
96 * @param mul scalar value
97 * @param len length of vector
98 * constraints: multiple of 8
100 void (*vector_dmul_scalar)(double *dst, const double *src, double mul,
104 * Overlap/add with window function.
105 * Used primarily by MDCT-based audio codecs.
106 * Source and destination vectors must overlap exactly or not at all.
108 * @param dst result vector
109 * constraints: 16-byte aligned
110 * @param src0 first source vector
111 * constraints: 16-byte aligned
112 * @param src1 second source vector
113 * constraints: 16-byte aligned
114 * @param win half-window vector
115 * constraints: 16-byte aligned
116 * @param len length of vector
117 * constraints: multiple of 4
119 void (*vector_fmul_window)(float *dst, const float *src0,
120 const float *src1, const float *win, int len);
123 * Calculate the entry wise product of two vectors of floats, add a third vector of
124 * floats and store the result in a vector of floats.
126 * @param dst output vector
127 * constraints: 32-byte aligned
128 * @param src0 first input vector
129 * constraints: 32-byte aligned
130 * @param src1 second input vector
131 * constraints: 32-byte aligned
132 * @param src2 third input vector
133 * constraints: 32-byte aligned
134 * @param len number of elements in the input
135 * constraints: multiple of 16
137 void (*vector_fmul_add)(float *dst, const float *src0, const float *src1,
138 const float *src2, int len);
141 * Calculate the entry wise product of two vectors of floats, and store the result
142 * in a vector of floats. The second vector of floats is iterated over
145 * @param dst output vector
146 * constraints: 32-byte aligned
147 * @param src0 first input vector
148 * constraints: 32-byte aligned
149 * @param src1 second input vector
150 * constraints: 32-byte aligned
151 * @param len number of elements in the input
152 * constraints: multiple of 16
154 void (*vector_fmul_reverse)(float *dst, const float *src0,
155 const float *src1, int len);
158 * Calculate the sum and difference of two vectors of floats.
160 * @param v1 first input vector, sum output, 16-byte aligned
161 * @param v2 second input vector, difference output, 16-byte aligned
162 * @param len length of vectors, multiple of 4
164 void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len);
167 * Calculate the scalar product of two vectors of floats.
169 * @param v1 first vector, 16-byte aligned
170 * @param v2 second vector, 16-byte aligned
171 * @param len length of vectors, multiple of 4
173 * @return sum of elementwise products
175 float (*scalarproduct_float)(const float *v1, const float *v2, int len);
178 * Calculate the entry wise product of two vectors of doubles and store the result in
179 * a vector of doubles.
181 * @param dst output vector
182 * constraints: 32-byte aligned
183 * @param src0 first input vector
184 * constraints: 32-byte aligned
185 * @param src1 second input vector
186 * constraints: 32-byte aligned
187 * @param len number of elements in the input
188 * constraints: multiple of 16
190 void (*vector_dmul)(double *dst, const double *src0, const double *src1,
195 * Return the scalar product of two vectors.
197 * @param v1 first input vector
198 * @param v2 first input vector
199 * @param len number of elements
201 * @return sum of elementwise products
203 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
205 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
206 void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
207 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
208 void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
209 void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
212 * Allocate a float DSP context.
214 * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant
216 AVFloatDSPContext *avpriv_float_dsp_alloc(int strict);
218 #endif /* AVUTIL_FLOAT_DSP_H */