3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 /***********************************
29 * add sane pulse detection
30 * add temporal noise shaping
31 ***********************************/
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
38 #include "mpeg4audio.h"
48 #define AAC_MAX_CHANNELS 6
50 #define ERROR_IF(cond, ...) \
52 av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53 return AVERROR(EINVAL); \
56 #define WARN_IF(cond, ...) \
58 av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
61 float ff_aac_pow34sf_tab[428];
63 static const uint8_t swb_size_1024_96[] = {
64 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
65 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
66 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
69 static const uint8_t swb_size_1024_64[] = {
70 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
71 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
72 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
75 static const uint8_t swb_size_1024_48[] = {
76 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
77 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
78 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
82 static const uint8_t swb_size_1024_32[] = {
83 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
84 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
85 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
88 static const uint8_t swb_size_1024_24[] = {
89 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
90 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
91 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
94 static const uint8_t swb_size_1024_16[] = {
95 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
96 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
97 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
100 static const uint8_t swb_size_1024_8[] = {
101 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
102 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
103 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
106 static const uint8_t *swb_size_1024[] = {
107 swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
108 swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
109 swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
110 swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
114 static const uint8_t swb_size_128_96[] = {
115 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
118 static const uint8_t swb_size_128_48[] = {
119 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
122 static const uint8_t swb_size_128_24[] = {
123 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
126 static const uint8_t swb_size_128_16[] = {
127 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
130 static const uint8_t swb_size_128_8[] = {
131 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
134 static const uint8_t *swb_size_128[] = {
135 /* the last entry on the following row is swb_size_128_64 but is a
136 duplicate of swb_size_128_96 */
137 swb_size_128_96, swb_size_128_96, swb_size_128_96,
138 swb_size_128_48, swb_size_128_48, swb_size_128_48,
139 swb_size_128_24, swb_size_128_24, swb_size_128_16,
140 swb_size_128_16, swb_size_128_16, swb_size_128_8,
144 /** default channel configurations */
145 static const uint8_t aac_chan_configs[6][5] = {
146 {1, TYPE_SCE}, // 1 channel - single channel element
147 {1, TYPE_CPE}, // 2 channels - channel pair
148 {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
149 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
150 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
151 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
155 * Table to remap channels from libavcodec's default order to AAC order.
157 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
163 { 2, 0, 1, 4, 5, 3 },
167 * Make AAC audio config object.
168 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
170 static void put_audio_specific_config(AVCodecContext *avctx)
173 AACEncContext *s = avctx->priv_data;
175 init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
176 put_bits(&pb, 5, 2); //object type - AAC-LC
177 put_bits(&pb, 4, s->samplerate_index); //sample rate index
178 put_bits(&pb, 4, s->channels);
180 put_bits(&pb, 1, 0); //frame length - 1024 samples
181 put_bits(&pb, 1, 0); //does not depend on core coder
182 put_bits(&pb, 1, 0); //is not extension
184 //Explicitly Mark SBR absent
185 put_bits(&pb, 11, 0x2b7); //sync extension
186 put_bits(&pb, 5, AOT_SBR);
191 #define WINDOW_FUNC(type) \
192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
193 SingleChannelElement *sce, \
196 WINDOW_FUNC(only_long)
198 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
200 float *out = sce->ret_buf;
202 fdsp->vector_fmul (out, audio, lwindow, 1024);
203 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
206 WINDOW_FUNC(long_start)
208 const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
209 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
210 float *out = sce->ret_buf;
212 fdsp->vector_fmul(out, audio, lwindow, 1024);
213 memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
214 fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
215 memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
218 WINDOW_FUNC(long_stop)
220 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
221 const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
222 float *out = sce->ret_buf;
224 memset(out, 0, sizeof(out[0]) * 448);
225 fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
226 memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
227 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
230 WINDOW_FUNC(eight_short)
232 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
233 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
234 const float *in = audio + 448;
235 float *out = sce->ret_buf;
238 for (w = 0; w < 8; w++) {
239 fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
242 fdsp->vector_fmul_reverse(out, in, swindow, 128);
247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
248 SingleChannelElement *sce,
249 const float *audio) = {
250 [ONLY_LONG_SEQUENCE] = apply_only_long_window,
251 [LONG_START_SEQUENCE] = apply_long_start_window,
252 [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
253 [LONG_STOP_SEQUENCE] = apply_long_stop_window
256 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
260 float *output = sce->ret_buf;
262 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
264 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
265 s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
267 for (i = 0; i < 1024; i += 128)
268 s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
269 memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
270 memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
274 * Encode ics_info element.
275 * @see Table 4.6 (syntax of ics_info)
277 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
281 put_bits(&s->pb, 1, 0); // ics_reserved bit
282 put_bits(&s->pb, 2, info->window_sequence[0]);
283 put_bits(&s->pb, 1, info->use_kb_window[0]);
284 if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
285 put_bits(&s->pb, 6, info->max_sfb);
286 put_bits(&s->pb, 1, 0); // no prediction
288 put_bits(&s->pb, 4, info->max_sfb);
289 for (w = 1; w < 8; w++)
290 put_bits(&s->pb, 1, !info->group_len[w]);
296 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
298 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
302 put_bits(pb, 2, cpe->ms_mode);
303 if (cpe->ms_mode == 1)
304 for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
305 for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
306 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
310 * Produce integer coefficients from scalefactors provided by the model.
312 static void adjust_frame_information(ChannelElement *cpe, int chans)
315 int start, maxsfb, cmaxsfb;
317 for (ch = 0; ch < chans; ch++) {
318 IndividualChannelStream *ics = &cpe->ch[ch].ics;
321 cpe->ch[ch].pulse.num_pulse = 0;
322 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
323 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
324 start = (w+w2) * 128;
325 for (g = 0; g < ics->num_swb; g++) {
327 if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
328 for (i = 0; i < ics->swb_sizes[g]; i++) {
329 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
330 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
333 start += ics->swb_sizes[g];
335 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
337 maxsfb = FFMAX(maxsfb, cmaxsfb);
340 ics->max_sfb = maxsfb;
342 //adjust zero bands for window groups
343 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
344 for (g = 0; g < ics->max_sfb; g++) {
346 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
347 if (!cpe->ch[ch].zeroes[w2*16 + g]) {
352 cpe->ch[ch].zeroes[w*16 + g] = i;
357 if (chans > 1 && cpe->common_window) {
358 IndividualChannelStream *ics0 = &cpe->ch[0].ics;
359 IndividualChannelStream *ics1 = &cpe->ch[1].ics;
361 ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
362 ics1->max_sfb = ics0->max_sfb;
363 for (w = 0; w < ics0->num_windows*16; w += 16)
364 for (i = 0; i < ics0->max_sfb; i++)
365 if (cpe->ms_mask[w+i])
367 if (msc == 0 || ics0->max_sfb == 0)
370 cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
375 * Encode scalefactor band coding type.
377 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
381 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
382 s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
386 * Encode scalefactors.
388 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
389 SingleChannelElement *sce)
391 int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
395 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
396 for (i = 0; i < sce->ics.max_sfb; i++) {
397 if (!sce->zeroes[w*16 + i]) {
398 if (sce->band_type[w*16 + i] == NOISE_BT) {
399 diff = sce->sf_idx[w*16 + i] - off_pns;
400 off_pns = sce->sf_idx[w*16 + i];
401 if (noise_flag-- > 0) {
402 put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
406 diff = sce->sf_idx[w*16 + i] - off_sf;
407 off_sf = sce->sf_idx[w*16 + i];
409 diff += SCALE_DIFF_ZERO;
410 av_assert0(diff >= 0 && diff <= 120);
411 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
420 static void encode_pulses(AACEncContext *s, Pulse *pulse)
424 put_bits(&s->pb, 1, !!pulse->num_pulse);
425 if (!pulse->num_pulse)
428 put_bits(&s->pb, 2, pulse->num_pulse - 1);
429 put_bits(&s->pb, 6, pulse->start);
430 for (i = 0; i < pulse->num_pulse; i++) {
431 put_bits(&s->pb, 5, pulse->pos[i]);
432 put_bits(&s->pb, 4, pulse->amp[i]);
437 * Encode spectral coefficients processed by psychoacoustic model.
439 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
443 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
445 for (i = 0; i < sce->ics.max_sfb; i++) {
446 if (sce->zeroes[w*16 + i]) {
447 start += sce->ics.swb_sizes[i];
450 for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
451 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
452 sce->ics.swb_sizes[i],
453 sce->sf_idx[w*16 + i],
454 sce->band_type[w*16 + i],
456 start += sce->ics.swb_sizes[i];
462 * Encode one channel of audio data.
464 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
465 SingleChannelElement *sce,
468 put_bits(&s->pb, 8, sce->sf_idx[0]);
470 put_ics_info(s, &sce->ics);
471 encode_band_info(s, sce);
472 encode_scale_factors(avctx, s, sce);
473 encode_pulses(s, &sce->pulse);
474 put_bits(&s->pb, 1, 0); //tns
475 put_bits(&s->pb, 1, 0); //ssr
476 encode_spectral_coeffs(s, sce);
481 * Write some auxiliary information about the created AAC file.
483 static void put_bitstream_info(AACEncContext *s, const char *name)
485 int i, namelen, padbits;
487 namelen = strlen(name) + 2;
488 put_bits(&s->pb, 3, TYPE_FIL);
489 put_bits(&s->pb, 4, FFMIN(namelen, 15));
491 put_bits(&s->pb, 8, namelen - 14);
492 put_bits(&s->pb, 4, 0); //extension type - filler
493 padbits = -put_bits_count(&s->pb) & 7;
494 avpriv_align_put_bits(&s->pb);
495 for (i = 0; i < namelen - 2; i++)
496 put_bits(&s->pb, 8, name[i]);
497 put_bits(&s->pb, 12 - padbits, 0);
501 * Copy input samples.
502 * Channels are reordered from libavcodec's default order to AAC order.
504 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
507 int end = 2048 + (frame ? frame->nb_samples : 0);
508 const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
510 /* copy and remap input samples */
511 for (ch = 0; ch < s->channels; ch++) {
512 /* copy last 1024 samples of previous frame to the start of the current frame */
513 memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
515 /* copy new samples and zero any remaining samples */
517 memcpy(&s->planar_samples[ch][2048],
518 frame->extended_data[channel_map[ch]],
519 frame->nb_samples * sizeof(s->planar_samples[0][0]));
521 memset(&s->planar_samples[ch][end], 0,
522 (3072 - end) * sizeof(s->planar_samples[0][0]));
526 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
527 const AVFrame *frame, int *got_packet_ptr)
529 AACEncContext *s = avctx->priv_data;
530 float **samples = s->planar_samples, *samples2, *la, *overlap;
532 int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
533 int chan_el_counter[4];
534 FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
536 if (s->last_frame == 2)
539 /* add current frame to queue */
541 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
545 copy_input_samples(s, frame);
547 ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
549 if (!avctx->frame_number)
553 for (i = 0; i < s->chan_map[0]; i++) {
554 FFPsyWindowInfo* wi = windows + start_ch;
555 tag = s->chan_map[i+1];
556 chans = tag == TYPE_CPE ? 2 : 1;
558 for (ch = 0; ch < chans; ch++) {
559 IndividualChannelStream *ics = &cpe->ch[ch].ics;
560 int cur_channel = start_ch + ch;
561 overlap = &samples[cur_channel][0];
562 samples2 = overlap + 1024;
563 la = samples2 + (448+64);
566 if (tag == TYPE_LFE) {
567 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
568 wi[ch].window_shape = 0;
569 wi[ch].num_windows = 1;
570 wi[ch].grouping[0] = 1;
572 /* Only the lowest 12 coefficients are used in a LFE channel.
573 * The expression below results in only the bottom 8 coefficients
574 * being used for 11.025kHz to 16kHz sample rates.
576 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
578 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
579 ics->window_sequence[0]);
581 ics->window_sequence[1] = ics->window_sequence[0];
582 ics->window_sequence[0] = wi[ch].window_type[0];
583 ics->use_kb_window[1] = ics->use_kb_window[0];
584 ics->use_kb_window[0] = wi[ch].window_shape;
585 ics->num_windows = wi[ch].num_windows;
586 ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
587 ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
588 for (w = 0; w < ics->num_windows; w++)
589 ics->group_len[w] = wi[ch].grouping[w];
591 apply_window_and_mdct(s, &cpe->ch[ch], overlap);
592 if (isnan(cpe->ch->coeffs[0])) {
593 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
594 return AVERROR(EINVAL);
599 if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
604 init_put_bits(&s->pb, avpkt->data, avpkt->size);
606 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
607 put_bitstream_info(s, LIBAVCODEC_IDENT);
609 memset(chan_el_counter, 0, sizeof(chan_el_counter));
610 for (i = 0; i < s->chan_map[0]; i++) {
611 FFPsyWindowInfo* wi = windows + start_ch;
612 const float *coeffs[2];
613 tag = s->chan_map[i+1];
614 chans = tag == TYPE_CPE ? 2 : 1;
616 put_bits(&s->pb, 3, tag);
617 put_bits(&s->pb, 4, chan_el_counter[tag]++);
618 for (ch = 0; ch < chans; ch++)
619 coeffs[ch] = cpe->ch[ch].coeffs;
620 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
621 for (ch = 0; ch < chans; ch++) {
622 s->cur_channel = start_ch + ch;
623 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
625 cpe->common_window = 0;
627 && wi[0].window_type[0] == wi[1].window_type[0]
628 && wi[0].window_shape == wi[1].window_shape) {
630 cpe->common_window = 1;
631 for (w = 0; w < wi[0].num_windows; w++) {
632 if (wi[0].grouping[w] != wi[1].grouping[w]) {
633 cpe->common_window = 0;
638 s->cur_channel = start_ch;
639 if (s->options.stereo_mode && cpe->common_window) {
640 if (s->options.stereo_mode > 0) {
641 IndividualChannelStream *ics = &cpe->ch[0].ics;
642 for (w = 0; w < ics->num_windows; w += ics->group_len[w])
643 for (g = 0; g < ics->num_swb; g++)
644 cpe->ms_mask[w*16+g] = 1;
645 } else if (s->coder->search_for_ms) {
646 s->coder->search_for_ms(s, cpe, s->lambda);
649 adjust_frame_information(cpe, chans);
651 put_bits(&s->pb, 1, cpe->common_window);
652 if (cpe->common_window) {
653 put_ics_info(s, &cpe->ch[0].ics);
654 encode_ms_info(&s->pb, cpe);
655 if (cpe->ms_mode) ms_mode = 1;
658 for (ch = 0; ch < chans; ch++) {
659 s->cur_channel = start_ch + ch;
660 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
665 frame_bits = put_bits_count(&s->pb);
666 if (frame_bits <= 6144 * s->channels - 3) {
667 s->psy.bitres.bits = frame_bits / s->channels;
671 for (i = 0; i < s->chan_map[0]; i++) {
672 // Must restore coeffs
673 chans = tag == TYPE_CPE ? 2 : 1;
675 for (ch = 0; ch < chans; ch++)
676 memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
680 s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
684 put_bits(&s->pb, 3, TYPE_END);
685 flush_put_bits(&s->pb);
686 avctx->frame_bits = put_bits_count(&s->pb);
688 // rate control stuff
689 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
690 float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
692 s->lambda = FFMIN(s->lambda, 65536.f);
698 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
701 avpkt->size = put_bits_count(&s->pb) >> 3;
706 static av_cold int aac_encode_end(AVCodecContext *avctx)
708 AACEncContext *s = avctx->priv_data;
710 ff_mdct_end(&s->mdct1024);
711 ff_mdct_end(&s->mdct128);
714 ff_psy_preprocess_end(s->psypp);
715 av_freep(&s->buffer.samples);
718 ff_af_queue_close(&s->afq);
722 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
726 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
728 return AVERROR(ENOMEM);
731 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
732 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
733 ff_init_ff_sine_windows(10);
734 ff_init_ff_sine_windows(7);
736 if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
738 if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
744 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
747 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
748 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
749 FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
751 for(ch = 0; ch < s->channels; ch++)
752 s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
756 return AVERROR(ENOMEM);
759 static av_cold int aac_encode_init(AVCodecContext *avctx)
761 AACEncContext *s = avctx->priv_data;
763 const uint8_t *sizes[2];
764 uint8_t grouping[AAC_MAX_CHANNELS];
767 avctx->frame_size = 1024;
769 for (i = 0; i < 16; i++)
770 if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
773 s->channels = avctx->channels;
776 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
777 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
778 "Unsupported sample rate %d\n", avctx->sample_rate);
779 ERROR_IF(s->channels > AAC_MAX_CHANNELS,
780 "Unsupported number of channels: %d\n", s->channels);
781 ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
782 "Unsupported profile %d\n", avctx->profile);
783 WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
784 "Too many bits per frame requested, clamping to max\n");
786 avctx->bit_rate = (int)FFMIN(
787 6144 * s->channels / 1024.0 * avctx->sample_rate,
790 s->samplerate_index = i;
792 s->chan_map = aac_chan_configs[s->channels-1];
794 if ((ret = dsp_init(avctx, s)) < 0)
797 if ((ret = alloc_buffers(avctx, s)) < 0)
800 avctx->extradata_size = 5;
801 put_audio_specific_config(avctx);
803 sizes[0] = swb_size_1024[i];
804 sizes[1] = swb_size_128[i];
805 lengths[0] = ff_aac_num_swb_1024[i];
806 lengths[1] = ff_aac_num_swb_128[i];
807 for (i = 0; i < s->chan_map[0]; i++)
808 grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
809 if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
810 s->chan_map[0], grouping)) < 0)
812 s->psypp = ff_psy_preprocess_init(avctx);
813 s->coder = &ff_aac_coders[s->options.aac_coder];
816 ff_aac_coder_init_mips(s);
818 s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
822 for (i = 0; i < 428; i++)
823 ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
825 avctx->initial_padding = 1024;
826 ff_af_queue_init(avctx, &s->afq);
830 aac_encode_end(avctx);
834 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
835 static const AVOption aacenc_options[] = {
836 {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
837 {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
838 {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
839 {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
840 {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
841 {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
842 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
843 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
844 {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
845 {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
846 {"disable", "Disable PNS", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
847 {"enable", "Enable PNS (Proof of concept)", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
851 static const AVClass aacenc_class = {
853 av_default_item_name,
855 LIBAVUTIL_VERSION_INT,
858 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
860 static const int mpeg4audio_sample_rates[16] = {
861 96000, 88200, 64000, 48000, 44100, 32000,
862 24000, 22050, 16000, 12000, 11025, 8000, 7350
865 AVCodec ff_aac_encoder = {
867 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
868 .type = AVMEDIA_TYPE_AUDIO,
869 .id = AV_CODEC_ID_AAC,
870 .priv_data_size = sizeof(AACEncContext),
871 .init = aac_encode_init,
872 .encode2 = aac_encode_frame,
873 .close = aac_encode_end,
874 .supported_samplerates = mpeg4audio_sample_rates,
875 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
876 CODEC_CAP_EXPERIMENTAL,
877 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
878 AV_SAMPLE_FMT_NONE },
879 .priv_class = &aacenc_class,