]> git.sesse.net Git - ffmpeg/blob - libavresample/x86/audio_mix_init.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavresample / x86 / audio_mix_init.c
1 /*
2  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "config.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/x86/cpu.h"
24 #include "libavresample/audio_mix.h"
25
26 extern void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
27                                        int out_ch, int in_ch);
28 extern void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
29                                        int out_ch, int in_ch);
30
31 extern void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
32                                         int out_ch, int in_ch);
33 extern void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
34                                         int out_ch, int in_ch);
35
36 extern void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
37                                        int len, int out_ch, int in_ch);
38
39 extern void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
40                                        int out_ch, int in_ch);
41 extern void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
42                                        int out_ch, int in_ch);
43
44 extern void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
45                                         int out_ch, int in_ch);
46 extern void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
47                                         int out_ch, int in_ch);
48 extern void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
49                                         int out_ch, int in_ch);
50
51 #define DEFINE_MIX_3_8_TO_1_2(chan)                                         \
52 extern void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src,              \
53                                                   float **matrix, int len,  \
54                                                   int out_ch, int in_ch);   \
55 extern void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src,              \
56                                                   float **matrix, int len,  \
57                                                   int out_ch, int in_ch);   \
58                                                                             \
59 extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src,           \
60                                                    float **matrix, int len, \
61                                                    int out_ch, int in_ch);  \
62 extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src,           \
63                                                    float **matrix, int len, \
64                                                    int out_ch, int in_ch);  \
65                                                                             \
66 extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src,           \
67                                                    float **matrix, int len, \
68                                                    int out_ch, int in_ch);  \
69 extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src,           \
70                                                    float **matrix, int len, \
71                                                    int out_ch, int in_ch);  \
72                                                                             \
73 extern void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src,              \
74                                                   float **matrix, int len,  \
75                                                   int out_ch, int in_ch);   \
76 extern void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src,              \
77                                                   float **matrix, int len,  \
78                                                   int out_ch, int in_ch);   \
79                                                                             \
80 extern void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src,            \
81                                                   float **matrix, int len,  \
82                                                   int out_ch, int in_ch);   \
83 extern void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src,            \
84                                                   float **matrix, int len,  \
85                                                   int out_ch, int in_ch);   \
86                                                                             \
87 extern void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src,             \
88                                                    float **matrix, int len, \
89                                                    int out_ch, int in_ch);  \
90 extern void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src,             \
91                                                    float **matrix, int len, \
92                                                    int out_ch, int in_ch);  \
93                                                                             \
94 extern void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src,           \
95                                                    float **matrix, int len, \
96                                                    int out_ch, int in_ch);  \
97 extern void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src,           \
98                                                    float **matrix, int len, \
99                                                    int out_ch, int in_ch);
100
101 DEFINE_MIX_3_8_TO_1_2(3)
102 DEFINE_MIX_3_8_TO_1_2(4)
103 DEFINE_MIX_3_8_TO_1_2(5)
104 DEFINE_MIX_3_8_TO_1_2(6)
105 DEFINE_MIX_3_8_TO_1_2(7)
106 DEFINE_MIX_3_8_TO_1_2(8)
107
108 #define SET_MIX_3_8_TO_1_2(chan)                                            \
109     if (EXTERNAL_SSE(mm_flags)) {                                           \
110         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
111                               chan, 1, 16, 4, "SSE",                        \
112                               ff_mix_ ## chan ## _to_1_fltp_flt_sse);       \
113         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
114                               chan, 2, 16, 4, "SSE",                        \
115                               ff_mix_## chan ##_to_2_fltp_flt_sse);         \
116     }                                                                       \
117     if (EXTERNAL_SSE2(mm_flags)) {                                          \
118         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
119                               chan, 1, 16, 8, "SSE2",                       \
120                               ff_mix_ ## chan ## _to_1_s16p_flt_sse2);      \
121         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
122                               chan, 2, 16, 8, "SSE2",                       \
123                               ff_mix_ ## chan ## _to_2_s16p_flt_sse2);      \
124     }                                                                       \
125     if (EXTERNAL_SSE4(mm_flags)) {                                          \
126         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
127                               chan, 1, 16, 8, "SSE4",                       \
128                               ff_mix_ ## chan ## _to_1_s16p_flt_sse4);      \
129         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
130                               chan, 2, 16, 8, "SSE4",                       \
131                               ff_mix_ ## chan ## _to_2_s16p_flt_sse4);      \
132     }                                                                       \
133     if (EXTERNAL_AVX(mm_flags)) {                                           \
134         int ptr_align = 32;                                                 \
135         int smp_align = 8;                                                  \
136         if (ARCH_X86_32 || chan >= 6) {                                     \
137             ptr_align = 16;                                                 \
138             smp_align = 4;                                                  \
139         }                                                                   \
140         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
141                               chan, 1, ptr_align, smp_align, "AVX",         \
142                               ff_mix_ ## chan ## _to_1_fltp_flt_avx);       \
143         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
144                               chan, 2, ptr_align, smp_align, "AVX",         \
145                               ff_mix_ ## chan ## _to_2_fltp_flt_avx);       \
146         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
147                               chan, 1, 16, 8, "AVX",                        \
148                               ff_mix_ ## chan ## _to_1_s16p_flt_avx);       \
149         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
150                               chan, 2, 16, 8, "AVX",                        \
151                               ff_mix_ ## chan ## _to_2_s16p_flt_avx);       \
152     }                                                                       \
153     if (EXTERNAL_FMA4(mm_flags)) {                                          \
154         int ptr_align = 32;                                                 \
155         int smp_align = 8;                                                  \
156         if (ARCH_X86_32 || chan >= 6) {                                     \
157             ptr_align = 16;                                                 \
158             smp_align = 4;                                                  \
159         }                                                                   \
160         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
161                               chan, 1, ptr_align, smp_align, "FMA4",        \
162                               ff_mix_ ## chan ## _to_1_fltp_flt_fma4);      \
163         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
164                               chan, 2, ptr_align, smp_align, "FMA4",        \
165                               ff_mix_ ## chan ## _to_2_fltp_flt_fma4);      \
166         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
167                               chan, 1, 16, 8, "FMA4",                       \
168                               ff_mix_ ## chan ## _to_1_s16p_flt_fma4);      \
169         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
170                               chan, 2, 16, 8, "FMA4",                       \
171                               ff_mix_ ## chan ## _to_2_s16p_flt_fma4);      \
172     }
173
174 av_cold void ff_audio_mix_init_x86(AudioMix *am)
175 {
176 #if HAVE_YASM
177     int mm_flags = av_get_cpu_flags();
178
179     if (EXTERNAL_SSE(mm_flags)) {
180         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
181                               2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
182         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
183                               1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
184     }
185     if (EXTERNAL_SSE2(mm_flags)) {
186         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
187                               2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
188         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
189                               2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
190         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
191                               1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
192     }
193     if (EXTERNAL_SSE4(mm_flags)) {
194         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
195                               2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
196         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
197                               1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
198     }
199     if (EXTERNAL_AVX(mm_flags)) {
200         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
201                               2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
202         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
203                               1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
204         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
205                               1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
206     }
207
208     SET_MIX_3_8_TO_1_2(3)
209     SET_MIX_3_8_TO_1_2(4)
210     SET_MIX_3_8_TO_1_2(5)
211     SET_MIX_3_8_TO_1_2(6)
212     SET_MIX_3_8_TO_1_2(7)
213     SET_MIX_3_8_TO_1_2(8)
214 #endif /* HAVE_YASM */
215 }