3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 /***********************************
29 * add sane pulse detection
30 * add temporal noise shaping
31 ***********************************/
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
38 #include "mpeg4audio.h"
48 #define AAC_MAX_CHANNELS 6
50 #define ERROR_IF(cond, ...) \
52 av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53 return AVERROR(EINVAL); \
56 #define WARN_IF(cond, ...) \
58 av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
61 float ff_aac_pow34sf_tab[428];
63 static const uint8_t swb_size_1024_96[] = {
64 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
65 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
66 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
69 static const uint8_t swb_size_1024_64[] = {
70 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
71 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
72 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
75 static const uint8_t swb_size_1024_48[] = {
76 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
77 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
78 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
82 static const uint8_t swb_size_1024_32[] = {
83 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
84 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
85 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
88 static const uint8_t swb_size_1024_24[] = {
89 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
90 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
91 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
94 static const uint8_t swb_size_1024_16[] = {
95 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
96 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
97 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
100 static const uint8_t swb_size_1024_8[] = {
101 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
102 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
103 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
106 static const uint8_t *swb_size_1024[] = {
107 swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
108 swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
109 swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
110 swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
114 static const uint8_t swb_size_128_96[] = {
115 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
118 static const uint8_t swb_size_128_48[] = {
119 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
122 static const uint8_t swb_size_128_24[] = {
123 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
126 static const uint8_t swb_size_128_16[] = {
127 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
130 static const uint8_t swb_size_128_8[] = {
131 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
134 static const uint8_t *swb_size_128[] = {
135 /* the last entry on the following row is swb_size_128_64 but is a
136 duplicate of swb_size_128_96 */
137 swb_size_128_96, swb_size_128_96, swb_size_128_96,
138 swb_size_128_48, swb_size_128_48, swb_size_128_48,
139 swb_size_128_24, swb_size_128_24, swb_size_128_16,
140 swb_size_128_16, swb_size_128_16, swb_size_128_8,
144 /** default channel configurations */
145 static const uint8_t aac_chan_configs[6][5] = {
146 {1, TYPE_SCE}, // 1 channel - single channel element
147 {1, TYPE_CPE}, // 2 channels - channel pair
148 {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
149 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
150 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
151 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
155 * Table to remap channels from libavcodec's default order to AAC order.
157 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
163 { 2, 0, 1, 4, 5, 3 },
167 * Make AAC audio config object.
168 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
170 static void put_audio_specific_config(AVCodecContext *avctx)
173 AACEncContext *s = avctx->priv_data;
175 init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
176 put_bits(&pb, 5, 2); //object type - AAC-LC
177 put_bits(&pb, 4, s->samplerate_index); //sample rate index
178 put_bits(&pb, 4, s->channels);
180 put_bits(&pb, 1, 0); //frame length - 1024 samples
181 put_bits(&pb, 1, 0); //does not depend on core coder
182 put_bits(&pb, 1, 0); //is not extension
184 //Explicitly Mark SBR absent
185 put_bits(&pb, 11, 0x2b7); //sync extension
186 put_bits(&pb, 5, AOT_SBR);
191 #define WINDOW_FUNC(type) \
192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
193 SingleChannelElement *sce, \
196 WINDOW_FUNC(only_long)
198 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
200 float *out = sce->ret_buf;
202 fdsp->vector_fmul (out, audio, lwindow, 1024);
203 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
206 WINDOW_FUNC(long_start)
208 const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
209 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
210 float *out = sce->ret_buf;
212 fdsp->vector_fmul(out, audio, lwindow, 1024);
213 memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
214 fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
215 memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
218 WINDOW_FUNC(long_stop)
220 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
221 const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
222 float *out = sce->ret_buf;
224 memset(out, 0, sizeof(out[0]) * 448);
225 fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
226 memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
227 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
230 WINDOW_FUNC(eight_short)
232 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
233 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
234 const float *in = audio + 448;
235 float *out = sce->ret_buf;
238 for (w = 0; w < 8; w++) {
239 fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
242 fdsp->vector_fmul_reverse(out, in, swindow, 128);
247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
248 SingleChannelElement *sce,
249 const float *audio) = {
250 [ONLY_LONG_SEQUENCE] = apply_only_long_window,
251 [LONG_START_SEQUENCE] = apply_long_start_window,
252 [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
253 [LONG_STOP_SEQUENCE] = apply_long_stop_window
256 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
260 float *output = sce->ret_buf;
262 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
264 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
265 s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
267 for (i = 0; i < 1024; i += 128)
268 s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
269 memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
270 memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
274 * Encode ics_info element.
275 * @see Table 4.6 (syntax of ics_info)
277 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
281 put_bits(&s->pb, 1, 0); // ics_reserved bit
282 put_bits(&s->pb, 2, info->window_sequence[0]);
283 put_bits(&s->pb, 1, info->use_kb_window[0]);
284 if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
285 put_bits(&s->pb, 6, info->max_sfb);
286 put_bits(&s->pb, 1, 0); // no prediction
288 put_bits(&s->pb, 4, info->max_sfb);
289 for (w = 1; w < 8; w++)
290 put_bits(&s->pb, 1, !info->group_len[w]);
296 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
298 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
302 put_bits(pb, 2, cpe->ms_mode);
303 if (cpe->ms_mode == 1)
304 for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
305 for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
306 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
310 * Produce integer coefficients from scalefactors provided by the model.
312 static void adjust_frame_information(ChannelElement *cpe, int chans)
315 int start, maxsfb, cmaxsfb;
317 for (ch = 0; ch < chans; ch++) {
318 IndividualChannelStream *ics = &cpe->ch[ch].ics;
321 cpe->ch[ch].pulse.num_pulse = 0;
322 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
323 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
324 start = (w+w2) * 128;
325 for (g = 0; g < ics->num_swb; g++) {
327 if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
328 for (i = 0; i < ics->swb_sizes[g]; i++) {
329 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
330 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
333 start += ics->swb_sizes[g];
335 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
337 maxsfb = FFMAX(maxsfb, cmaxsfb);
340 ics->max_sfb = maxsfb;
342 //adjust zero bands for window groups
343 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
344 for (g = 0; g < ics->max_sfb; g++) {
346 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
347 if (!cpe->ch[ch].zeroes[w2*16 + g]) {
352 cpe->ch[ch].zeroes[w*16 + g] = i;
357 if (chans > 1 && cpe->common_window) {
358 IndividualChannelStream *ics0 = &cpe->ch[0].ics;
359 IndividualChannelStream *ics1 = &cpe->ch[1].ics;
361 ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
362 ics1->max_sfb = ics0->max_sfb;
363 for (w = 0; w < ics0->num_windows*16; w += 16)
364 for (i = 0; i < ics0->max_sfb; i++)
365 if (cpe->ms_mask[w+i])
367 if (msc == 0 || ics0->max_sfb == 0)
370 cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
375 * Encode scalefactor band coding type.
377 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
381 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
382 s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
386 * Encode scalefactors.
388 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
389 SingleChannelElement *sce)
391 int off = sce->sf_idx[0], diff;
394 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
395 for (i = 0; i < sce->ics.max_sfb; i++) {
396 if (!sce->zeroes[w*16 + i]) {
397 diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
398 av_assert0(diff >= 0 && diff <= 120);
399 off = sce->sf_idx[w*16 + i];
400 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
409 static void encode_pulses(AACEncContext *s, Pulse *pulse)
413 put_bits(&s->pb, 1, !!pulse->num_pulse);
414 if (!pulse->num_pulse)
417 put_bits(&s->pb, 2, pulse->num_pulse - 1);
418 put_bits(&s->pb, 6, pulse->start);
419 for (i = 0; i < pulse->num_pulse; i++) {
420 put_bits(&s->pb, 5, pulse->pos[i]);
421 put_bits(&s->pb, 4, pulse->amp[i]);
426 * Encode spectral coefficients processed by psychoacoustic model.
428 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
432 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
434 for (i = 0; i < sce->ics.max_sfb; i++) {
435 if (sce->zeroes[w*16 + i]) {
436 start += sce->ics.swb_sizes[i];
439 for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
440 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
441 sce->ics.swb_sizes[i],
442 sce->sf_idx[w*16 + i],
443 sce->band_type[w*16 + i],
445 start += sce->ics.swb_sizes[i];
451 * Encode one channel of audio data.
453 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
454 SingleChannelElement *sce,
457 put_bits(&s->pb, 8, sce->sf_idx[0]);
459 put_ics_info(s, &sce->ics);
460 encode_band_info(s, sce);
461 encode_scale_factors(avctx, s, sce);
462 encode_pulses(s, &sce->pulse);
463 put_bits(&s->pb, 1, 0); //tns
464 put_bits(&s->pb, 1, 0); //ssr
465 encode_spectral_coeffs(s, sce);
470 * Write some auxiliary information about the created AAC file.
472 static void put_bitstream_info(AACEncContext *s, const char *name)
474 int i, namelen, padbits;
476 namelen = strlen(name) + 2;
477 put_bits(&s->pb, 3, TYPE_FIL);
478 put_bits(&s->pb, 4, FFMIN(namelen, 15));
480 put_bits(&s->pb, 8, namelen - 14);
481 put_bits(&s->pb, 4, 0); //extension type - filler
482 padbits = -put_bits_count(&s->pb) & 7;
483 avpriv_align_put_bits(&s->pb);
484 for (i = 0; i < namelen - 2; i++)
485 put_bits(&s->pb, 8, name[i]);
486 put_bits(&s->pb, 12 - padbits, 0);
490 * Copy input samples.
491 * Channels are reordered from libavcodec's default order to AAC order.
493 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
496 int end = 2048 + (frame ? frame->nb_samples : 0);
497 const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
499 /* copy and remap input samples */
500 for (ch = 0; ch < s->channels; ch++) {
501 /* copy last 1024 samples of previous frame to the start of the current frame */
502 memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
504 /* copy new samples and zero any remaining samples */
506 memcpy(&s->planar_samples[ch][2048],
507 frame->extended_data[channel_map[ch]],
508 frame->nb_samples * sizeof(s->planar_samples[0][0]));
510 memset(&s->planar_samples[ch][end], 0,
511 (3072 - end) * sizeof(s->planar_samples[0][0]));
515 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
516 const AVFrame *frame, int *got_packet_ptr)
518 AACEncContext *s = avctx->priv_data;
519 float **samples = s->planar_samples, *samples2, *la, *overlap;
521 int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
522 int chan_el_counter[4];
523 FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
525 if (s->last_frame == 2)
528 /* add current frame to queue */
530 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
534 copy_input_samples(s, frame);
536 ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
538 if (!avctx->frame_number)
542 for (i = 0; i < s->chan_map[0]; i++) {
543 FFPsyWindowInfo* wi = windows + start_ch;
544 tag = s->chan_map[i+1];
545 chans = tag == TYPE_CPE ? 2 : 1;
547 for (ch = 0; ch < chans; ch++) {
548 IndividualChannelStream *ics = &cpe->ch[ch].ics;
549 int cur_channel = start_ch + ch;
550 overlap = &samples[cur_channel][0];
551 samples2 = overlap + 1024;
552 la = samples2 + (448+64);
555 if (tag == TYPE_LFE) {
556 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
557 wi[ch].window_shape = 0;
558 wi[ch].num_windows = 1;
559 wi[ch].grouping[0] = 1;
561 /* Only the lowest 12 coefficients are used in a LFE channel.
562 * The expression below results in only the bottom 8 coefficients
563 * being used for 11.025kHz to 16kHz sample rates.
565 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
567 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
568 ics->window_sequence[0]);
570 ics->window_sequence[1] = ics->window_sequence[0];
571 ics->window_sequence[0] = wi[ch].window_type[0];
572 ics->use_kb_window[1] = ics->use_kb_window[0];
573 ics->use_kb_window[0] = wi[ch].window_shape;
574 ics->num_windows = wi[ch].num_windows;
575 ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
576 ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
577 for (w = 0; w < ics->num_windows; w++)
578 ics->group_len[w] = wi[ch].grouping[w];
580 apply_window_and_mdct(s, &cpe->ch[ch], overlap);
581 if (isnan(cpe->ch->coeffs[0])) {
582 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
583 return AVERROR(EINVAL);
588 if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
593 init_put_bits(&s->pb, avpkt->data, avpkt->size);
595 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
596 put_bitstream_info(s, LIBAVCODEC_IDENT);
598 memset(chan_el_counter, 0, sizeof(chan_el_counter));
599 for (i = 0; i < s->chan_map[0]; i++) {
600 FFPsyWindowInfo* wi = windows + start_ch;
601 const float *coeffs[2];
602 tag = s->chan_map[i+1];
603 chans = tag == TYPE_CPE ? 2 : 1;
605 put_bits(&s->pb, 3, tag);
606 put_bits(&s->pb, 4, chan_el_counter[tag]++);
607 for (ch = 0; ch < chans; ch++)
608 coeffs[ch] = cpe->ch[ch].coeffs;
609 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
610 for (ch = 0; ch < chans; ch++) {
611 s->cur_channel = start_ch + ch;
612 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
614 cpe->common_window = 0;
616 && wi[0].window_type[0] == wi[1].window_type[0]
617 && wi[0].window_shape == wi[1].window_shape) {
619 cpe->common_window = 1;
620 for (w = 0; w < wi[0].num_windows; w++) {
621 if (wi[0].grouping[w] != wi[1].grouping[w]) {
622 cpe->common_window = 0;
627 s->cur_channel = start_ch;
628 if (s->options.stereo_mode && cpe->common_window) {
629 if (s->options.stereo_mode > 0) {
630 IndividualChannelStream *ics = &cpe->ch[0].ics;
631 for (w = 0; w < ics->num_windows; w += ics->group_len[w])
632 for (g = 0; g < ics->num_swb; g++)
633 cpe->ms_mask[w*16+g] = 1;
634 } else if (s->coder->search_for_ms) {
635 s->coder->search_for_ms(s, cpe, s->lambda);
638 adjust_frame_information(cpe, chans);
640 put_bits(&s->pb, 1, cpe->common_window);
641 if (cpe->common_window) {
642 put_ics_info(s, &cpe->ch[0].ics);
643 encode_ms_info(&s->pb, cpe);
644 if (cpe->ms_mode) ms_mode = 1;
647 for (ch = 0; ch < chans; ch++) {
648 s->cur_channel = start_ch + ch;
649 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
654 frame_bits = put_bits_count(&s->pb);
655 if (frame_bits <= 6144 * s->channels - 3) {
656 s->psy.bitres.bits = frame_bits / s->channels;
660 for (i = 0; i < s->chan_map[0]; i++) {
661 // Must restore coeffs
662 chans = tag == TYPE_CPE ? 2 : 1;
664 for (ch = 0; ch < chans; ch++)
665 memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
669 s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
673 put_bits(&s->pb, 3, TYPE_END);
674 flush_put_bits(&s->pb);
675 avctx->frame_bits = put_bits_count(&s->pb);
677 // rate control stuff
678 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
679 float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
681 s->lambda = FFMIN(s->lambda, 65536.f);
687 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
690 avpkt->size = put_bits_count(&s->pb) >> 3;
695 static av_cold int aac_encode_end(AVCodecContext *avctx)
697 AACEncContext *s = avctx->priv_data;
699 ff_mdct_end(&s->mdct1024);
700 ff_mdct_end(&s->mdct128);
703 ff_psy_preprocess_end(s->psypp);
704 av_freep(&s->buffer.samples);
707 ff_af_queue_close(&s->afq);
711 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
715 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
717 return AVERROR(ENOMEM);
720 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
721 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
722 ff_init_ff_sine_windows(10);
723 ff_init_ff_sine_windows(7);
725 if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
727 if (ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0))
733 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
736 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
737 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
738 FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
740 for(ch = 0; ch < s->channels; ch++)
741 s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
745 return AVERROR(ENOMEM);
748 static av_cold int aac_encode_init(AVCodecContext *avctx)
750 AACEncContext *s = avctx->priv_data;
752 const uint8_t *sizes[2];
753 uint8_t grouping[AAC_MAX_CHANNELS];
756 avctx->frame_size = 1024;
758 for (i = 0; i < 16; i++)
759 if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
762 s->channels = avctx->channels;
765 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
766 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
767 "Unsupported sample rate %d\n", avctx->sample_rate);
768 ERROR_IF(s->channels > AAC_MAX_CHANNELS,
769 "Unsupported number of channels: %d\n", s->channels);
770 ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
771 "Unsupported profile %d\n", avctx->profile);
772 WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
773 "Too many bits per frame requested, clamping to max\n");
775 avctx->bit_rate = (int)FFMIN(
776 6144 * s->channels / 1024.0 * avctx->sample_rate,
779 s->samplerate_index = i;
781 s->chan_map = aac_chan_configs[s->channels-1];
783 if ((ret = dsp_init(avctx, s)) < 0)
786 if ((ret = alloc_buffers(avctx, s)) < 0)
789 avctx->extradata_size = 5;
790 put_audio_specific_config(avctx);
792 sizes[0] = swb_size_1024[i];
793 sizes[1] = swb_size_128[i];
794 lengths[0] = ff_aac_num_swb_1024[i];
795 lengths[1] = ff_aac_num_swb_128[i];
796 for (i = 0; i < s->chan_map[0]; i++)
797 grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
798 if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
799 s->chan_map[0], grouping)) < 0)
801 s->psypp = ff_psy_preprocess_init(avctx);
802 s->coder = &ff_aac_coders[s->options.aac_coder];
805 ff_aac_coder_init_mips(s);
807 s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
811 for (i = 0; i < 428; i++)
812 ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
814 avctx->initial_padding = 1024;
815 ff_af_queue_init(avctx, &s->afq);
819 aac_encode_end(avctx);
823 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
824 static const AVOption aacenc_options[] = {
825 {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
826 {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
827 {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
828 {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
829 {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
830 {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
831 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
832 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
833 {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
837 static const AVClass aacenc_class = {
839 av_default_item_name,
841 LIBAVUTIL_VERSION_INT,
844 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
846 static const int mpeg4audio_sample_rates[16] = {
847 96000, 88200, 64000, 48000, 44100, 32000,
848 24000, 22050, 16000, 12000, 11025, 8000, 7350
851 AVCodec ff_aac_encoder = {
853 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
854 .type = AVMEDIA_TYPE_AUDIO,
855 .id = AV_CODEC_ID_AAC,
856 .priv_data_size = sizeof(AACEncContext),
857 .init = aac_encode_init,
858 .encode2 = aac_encode_frame,
859 .close = aac_encode_end,
860 .supported_samplerates = mpeg4audio_sample_rates,
861 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
862 CODEC_CAP_EXPERIMENTAL,
863 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
864 AV_SAMPLE_FMT_NONE },
865 .priv_class = &aacenc_class,