3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 /***********************************
29 * add sane pulse detection
30 * add temporal noise shaping
31 ***********************************/
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
38 #include "mpeg4audio.h"
48 #define AAC_MAX_CHANNELS 6
50 #define ERROR_IF(cond, ...) \
52 av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53 return AVERROR(EINVAL); \
56 #define WARN_IF(cond, ...) \
58 av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
61 float ff_aac_pow34sf_tab[428];
63 static const uint8_t swb_size_1024_96[] = {
64 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
65 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
66 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
69 static const uint8_t swb_size_1024_64[] = {
70 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
71 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
72 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
75 static const uint8_t swb_size_1024_48[] = {
76 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
77 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
78 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
82 static const uint8_t swb_size_1024_32[] = {
83 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
84 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
85 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
88 static const uint8_t swb_size_1024_24[] = {
89 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
90 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
91 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
94 static const uint8_t swb_size_1024_16[] = {
95 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
96 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
97 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
100 static const uint8_t swb_size_1024_8[] = {
101 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
102 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
103 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
106 static const uint8_t *swb_size_1024[] = {
107 swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
108 swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
109 swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
110 swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
114 static const uint8_t swb_size_128_96[] = {
115 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
118 static const uint8_t swb_size_128_48[] = {
119 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
122 static const uint8_t swb_size_128_24[] = {
123 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
126 static const uint8_t swb_size_128_16[] = {
127 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
130 static const uint8_t swb_size_128_8[] = {
131 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
134 static const uint8_t *swb_size_128[] = {
135 /* the last entry on the following row is swb_size_128_64 but is a
136 duplicate of swb_size_128_96 */
137 swb_size_128_96, swb_size_128_96, swb_size_128_96,
138 swb_size_128_48, swb_size_128_48, swb_size_128_48,
139 swb_size_128_24, swb_size_128_24, swb_size_128_16,
140 swb_size_128_16, swb_size_128_16, swb_size_128_8,
144 /** default channel configurations */
145 static const uint8_t aac_chan_configs[6][5] = {
146 {1, TYPE_SCE}, // 1 channel - single channel element
147 {1, TYPE_CPE}, // 2 channels - channel pair
148 {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
149 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
150 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
151 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
155 * Table to remap channels from libavcodec's default order to AAC order.
157 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
163 { 2, 0, 1, 4, 5, 3 },
167 * Make AAC audio config object.
168 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
170 static void put_audio_specific_config(AVCodecContext *avctx)
173 AACEncContext *s = avctx->priv_data;
175 init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
176 put_bits(&pb, 5, 2); //object type - AAC-LC
177 put_bits(&pb, 4, s->samplerate_index); //sample rate index
178 put_bits(&pb, 4, s->channels);
180 put_bits(&pb, 1, 0); //frame length - 1024 samples
181 put_bits(&pb, 1, 0); //does not depend on core coder
182 put_bits(&pb, 1, 0); //is not extension
184 //Explicitly Mark SBR absent
185 put_bits(&pb, 11, 0x2b7); //sync extension
186 put_bits(&pb, 5, AOT_SBR);
191 #define WINDOW_FUNC(type) \
192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
193 SingleChannelElement *sce, \
196 WINDOW_FUNC(only_long)
198 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
200 float *out = sce->ret_buf;
202 fdsp->vector_fmul (out, audio, lwindow, 1024);
203 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
206 WINDOW_FUNC(long_start)
208 const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
209 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
210 float *out = sce->ret_buf;
212 fdsp->vector_fmul(out, audio, lwindow, 1024);
213 memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
214 fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
215 memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
218 WINDOW_FUNC(long_stop)
220 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
221 const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
222 float *out = sce->ret_buf;
224 memset(out, 0, sizeof(out[0]) * 448);
225 fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
226 memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
227 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
230 WINDOW_FUNC(eight_short)
232 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
233 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
234 const float *in = audio + 448;
235 float *out = sce->ret_buf;
238 for (w = 0; w < 8; w++) {
239 fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
242 fdsp->vector_fmul_reverse(out, in, swindow, 128);
247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
248 SingleChannelElement *sce,
249 const float *audio) = {
250 [ONLY_LONG_SEQUENCE] = apply_only_long_window,
251 [LONG_START_SEQUENCE] = apply_long_start_window,
252 [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
253 [LONG_STOP_SEQUENCE] = apply_long_stop_window
256 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
260 float *output = sce->ret_buf;
262 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
264 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
265 s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
267 for (i = 0; i < 1024; i += 128)
268 s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
269 memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
270 memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
274 * Encode ics_info element.
275 * @see Table 4.6 (syntax of ics_info)
277 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
281 put_bits(&s->pb, 1, 0); // ics_reserved bit
282 put_bits(&s->pb, 2, info->window_sequence[0]);
283 put_bits(&s->pb, 1, info->use_kb_window[0]);
284 if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
285 put_bits(&s->pb, 6, info->max_sfb);
286 put_bits(&s->pb, 1, 0); // no prediction
288 put_bits(&s->pb, 4, info->max_sfb);
289 for (w = 1; w < 8; w++)
290 put_bits(&s->pb, 1, !info->group_len[w]);
296 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
298 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
302 put_bits(pb, 2, cpe->ms_mode);
303 if (cpe->ms_mode == 1)
304 for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
305 for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
306 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
310 * Produce integer coefficients from scalefactors provided by the model.
312 static void adjust_frame_information(ChannelElement *cpe, int chans)
316 IndividualChannelStream *ics;
318 if (cpe->common_window) {
319 ics = &cpe->ch[0].ics;
320 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
321 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
322 int start = (w+w2) * 128;
323 for (g = 0; g < ics->num_swb; g++) {
324 //apply Intensity stereo coeffs transformation
325 if (cpe->is_mask[w*16 + g]) {
326 int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
327 float scale = cpe->ch[0].is_ener[w*16+g];
328 for (i = 0; i < ics->swb_sizes[g]; i++) {
329 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + p*cpe->ch[1].pcoeffs[start+i]) * scale;
330 cpe->ch[1].coeffs[start+i] = 0.0f;
332 } else if (cpe->ms_mask[w*16 + g] &&
333 cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
334 cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
335 for (i = 0; i < ics->swb_sizes[g]; i++) {
336 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
337 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
340 start += ics->swb_sizes[g];
346 for (ch = 0; ch < chans; ch++) {
347 IndividualChannelStream *ics = &cpe->ch[ch].ics;
349 cpe->ch[ch].pulse.num_pulse = 0;
350 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
351 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
352 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
354 maxsfb = FFMAX(maxsfb, cmaxsfb);
357 ics->max_sfb = maxsfb;
359 //adjust zero bands for window groups
360 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
361 for (g = 0; g < ics->max_sfb; g++) {
363 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
364 if (!cpe->ch[ch].zeroes[w2*16 + g]) {
369 cpe->ch[ch].zeroes[w*16 + g] = i;
374 if (chans > 1 && cpe->common_window) {
375 IndividualChannelStream *ics0 = &cpe->ch[0].ics;
376 IndividualChannelStream *ics1 = &cpe->ch[1].ics;
378 ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
379 ics1->max_sfb = ics0->max_sfb;
380 for (w = 0; w < ics0->num_windows*16; w += 16)
381 for (i = 0; i < ics0->max_sfb; i++)
382 if (cpe->ms_mask[w+i])
384 if (msc == 0 || ics0->max_sfb == 0)
387 cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
392 * Encode scalefactor band coding type.
394 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
398 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
399 s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
403 * Encode scalefactors.
405 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
406 SingleChannelElement *sce)
408 int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
409 int off_is = 0, noise_flag = 1;
412 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
413 for (i = 0; i < sce->ics.max_sfb; i++) {
414 if (!sce->zeroes[w*16 + i]) {
415 if (sce->band_type[w*16 + i] == NOISE_BT) {
416 diff = sce->sf_idx[w*16 + i] - off_pns;
417 off_pns = sce->sf_idx[w*16 + i];
418 if (noise_flag-- > 0) {
419 put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
422 } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
423 sce->band_type[w*16 + i] == INTENSITY_BT2) {
424 diff = sce->sf_idx[w*16 + i] - off_is;
425 off_is = sce->sf_idx[w*16 + i];
427 diff = sce->sf_idx[w*16 + i] - off_sf;
428 off_sf = sce->sf_idx[w*16 + i];
430 diff += SCALE_DIFF_ZERO;
431 av_assert0(diff >= 0 && diff <= 120);
432 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
441 static void encode_pulses(AACEncContext *s, Pulse *pulse)
445 put_bits(&s->pb, 1, !!pulse->num_pulse);
446 if (!pulse->num_pulse)
449 put_bits(&s->pb, 2, pulse->num_pulse - 1);
450 put_bits(&s->pb, 6, pulse->start);
451 for (i = 0; i < pulse->num_pulse; i++) {
452 put_bits(&s->pb, 5, pulse->pos[i]);
453 put_bits(&s->pb, 4, pulse->amp[i]);
458 * Encode spectral coefficients processed by psychoacoustic model.
460 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
464 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
466 for (i = 0; i < sce->ics.max_sfb; i++) {
467 if (sce->zeroes[w*16 + i]) {
468 start += sce->ics.swb_sizes[i];
471 for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
472 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
473 sce->ics.swb_sizes[i],
474 sce->sf_idx[w*16 + i],
475 sce->band_type[w*16 + i],
477 start += sce->ics.swb_sizes[i];
483 * Encode one channel of audio data.
485 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
486 SingleChannelElement *sce,
489 put_bits(&s->pb, 8, sce->sf_idx[0]);
491 put_ics_info(s, &sce->ics);
492 encode_band_info(s, sce);
493 encode_scale_factors(avctx, s, sce);
494 encode_pulses(s, &sce->pulse);
495 put_bits(&s->pb, 1, 0); //tns
496 put_bits(&s->pb, 1, 0); //ssr
497 encode_spectral_coeffs(s, sce);
502 * Write some auxiliary information about the created AAC file.
504 static void put_bitstream_info(AACEncContext *s, const char *name)
506 int i, namelen, padbits;
508 namelen = strlen(name) + 2;
509 put_bits(&s->pb, 3, TYPE_FIL);
510 put_bits(&s->pb, 4, FFMIN(namelen, 15));
512 put_bits(&s->pb, 8, namelen - 14);
513 put_bits(&s->pb, 4, 0); //extension type - filler
514 padbits = -put_bits_count(&s->pb) & 7;
515 avpriv_align_put_bits(&s->pb);
516 for (i = 0; i < namelen - 2; i++)
517 put_bits(&s->pb, 8, name[i]);
518 put_bits(&s->pb, 12 - padbits, 0);
522 * Copy input samples.
523 * Channels are reordered from libavcodec's default order to AAC order.
525 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
528 int end = 2048 + (frame ? frame->nb_samples : 0);
529 const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
531 /* copy and remap input samples */
532 for (ch = 0; ch < s->channels; ch++) {
533 /* copy last 1024 samples of previous frame to the start of the current frame */
534 memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
536 /* copy new samples and zero any remaining samples */
538 memcpy(&s->planar_samples[ch][2048],
539 frame->extended_data[channel_map[ch]],
540 frame->nb_samples * sizeof(s->planar_samples[0][0]));
542 memset(&s->planar_samples[ch][end], 0,
543 (3072 - end) * sizeof(s->planar_samples[0][0]));
547 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
548 const AVFrame *frame, int *got_packet_ptr)
550 AACEncContext *s = avctx->priv_data;
551 float **samples = s->planar_samples, *samples2, *la, *overlap;
553 int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0, is_mode = 0;
554 int chan_el_counter[4];
555 FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
557 if (s->last_frame == 2)
560 /* add current frame to queue */
562 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
566 copy_input_samples(s, frame);
568 ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
570 if (!avctx->frame_number)
574 for (i = 0; i < s->chan_map[0]; i++) {
575 FFPsyWindowInfo* wi = windows + start_ch;
576 tag = s->chan_map[i+1];
577 chans = tag == TYPE_CPE ? 2 : 1;
579 for (ch = 0; ch < chans; ch++) {
580 IndividualChannelStream *ics = &cpe->ch[ch].ics;
581 int cur_channel = start_ch + ch;
582 overlap = &samples[cur_channel][0];
583 samples2 = overlap + 1024;
584 la = samples2 + (448+64);
587 if (tag == TYPE_LFE) {
588 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
589 wi[ch].window_shape = 0;
590 wi[ch].num_windows = 1;
591 wi[ch].grouping[0] = 1;
593 /* Only the lowest 12 coefficients are used in a LFE channel.
594 * The expression below results in only the bottom 8 coefficients
595 * being used for 11.025kHz to 16kHz sample rates.
597 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
599 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
600 ics->window_sequence[0]);
602 ics->window_sequence[1] = ics->window_sequence[0];
603 ics->window_sequence[0] = wi[ch].window_type[0];
604 ics->use_kb_window[1] = ics->use_kb_window[0];
605 ics->use_kb_window[0] = wi[ch].window_shape;
606 ics->num_windows = wi[ch].num_windows;
607 ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
608 ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
609 for (w = 0; w < ics->num_windows; w++)
610 ics->group_len[w] = wi[ch].grouping[w];
612 apply_window_and_mdct(s, &cpe->ch[ch], overlap);
613 if (isnan(cpe->ch->coeffs[0])) {
614 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
615 return AVERROR(EINVAL);
620 if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
625 init_put_bits(&s->pb, avpkt->data, avpkt->size);
627 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
628 put_bitstream_info(s, LIBAVCODEC_IDENT);
630 memset(chan_el_counter, 0, sizeof(chan_el_counter));
631 for (i = 0; i < s->chan_map[0]; i++) {
632 FFPsyWindowInfo* wi = windows + start_ch;
633 const float *coeffs[2];
634 tag = s->chan_map[i+1];
635 chans = tag == TYPE_CPE ? 2 : 1;
637 memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
638 memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
639 put_bits(&s->pb, 3, tag);
640 put_bits(&s->pb, 4, chan_el_counter[tag]++);
641 for (ch = 0; ch < chans; ch++)
642 coeffs[ch] = cpe->ch[ch].coeffs;
643 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
644 for (ch = 0; ch < chans; ch++) {
645 s->cur_channel = start_ch + ch;
646 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
648 cpe->common_window = 0;
650 && wi[0].window_type[0] == wi[1].window_type[0]
651 && wi[0].window_shape == wi[1].window_shape) {
653 cpe->common_window = 1;
654 for (w = 0; w < wi[0].num_windows; w++) {
655 if (wi[0].grouping[w] != wi[1].grouping[w]) {
656 cpe->common_window = 0;
661 if (s->options.pns && s->coder->search_for_pns) {
662 for (ch = 0; ch < chans; ch++) {
663 s->cur_channel = start_ch + ch;
664 s->coder->search_for_pns(s, avctx, &cpe->ch[ch], s->lambda);
667 s->cur_channel = start_ch;
668 if (s->options.stereo_mode && cpe->common_window) {
669 if (s->options.stereo_mode > 0) {
670 IndividualChannelStream *ics = &cpe->ch[0].ics;
671 for (w = 0; w < ics->num_windows; w += ics->group_len[w])
672 for (g = 0; g < ics->num_swb; g++)
673 cpe->ms_mask[w*16+g] = 1;
674 } else if (s->coder->search_for_ms) {
675 s->coder->search_for_ms(s, cpe, s->lambda);
678 if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
679 s->coder->search_for_is(s, avctx, cpe, s->lambda);
680 if (cpe->is_mode) is_mode = 1;
682 if (s->coder->set_special_band_scalefactors)
683 for (ch = 0; ch < chans; ch++)
684 s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
685 adjust_frame_information(cpe, chans);
687 put_bits(&s->pb, 1, cpe->common_window);
688 if (cpe->common_window) {
689 put_ics_info(s, &cpe->ch[0].ics);
690 encode_ms_info(&s->pb, cpe);
691 if (cpe->ms_mode) ms_mode = 1;
694 for (ch = 0; ch < chans; ch++) {
695 s->cur_channel = start_ch + ch;
696 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
701 frame_bits = put_bits_count(&s->pb);
702 if (frame_bits <= 6144 * s->channels - 3) {
703 s->psy.bitres.bits = frame_bits / s->channels;
706 if (is_mode || ms_mode) {
707 for (i = 0; i < s->chan_map[0]; i++) {
708 // Must restore coeffs
709 chans = tag == TYPE_CPE ? 2 : 1;
711 for (ch = 0; ch < chans; ch++)
712 memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
716 s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
720 put_bits(&s->pb, 3, TYPE_END);
721 flush_put_bits(&s->pb);
722 avctx->frame_bits = put_bits_count(&s->pb);
724 // rate control stuff
725 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
726 float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
728 s->lambda = FFMIN(s->lambda, 65536.f);
734 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
737 avpkt->size = put_bits_count(&s->pb) >> 3;
742 static av_cold int aac_encode_end(AVCodecContext *avctx)
744 AACEncContext *s = avctx->priv_data;
746 ff_mdct_end(&s->mdct1024);
747 ff_mdct_end(&s->mdct128);
750 ff_psy_preprocess_end(s->psypp);
751 av_freep(&s->buffer.samples);
754 ff_af_queue_close(&s->afq);
758 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
762 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
764 return AVERROR(ENOMEM);
767 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
768 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
769 ff_init_ff_sine_windows(10);
770 ff_init_ff_sine_windows(7);
772 if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
774 if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
780 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
783 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
784 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
785 FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
787 for(ch = 0; ch < s->channels; ch++)
788 s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
792 return AVERROR(ENOMEM);
795 static av_cold int aac_encode_init(AVCodecContext *avctx)
797 AACEncContext *s = avctx->priv_data;
799 const uint8_t *sizes[2];
800 uint8_t grouping[AAC_MAX_CHANNELS];
803 avctx->frame_size = 1024;
805 for (i = 0; i < 16; i++)
806 if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
809 s->channels = avctx->channels;
812 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
813 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
814 "Unsupported sample rate %d\n", avctx->sample_rate);
815 ERROR_IF(s->channels > AAC_MAX_CHANNELS,
816 "Unsupported number of channels: %d\n", s->channels);
817 ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
818 "Unsupported profile %d\n", avctx->profile);
819 WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
820 "Too many bits per frame requested, clamping to max\n");
822 avctx->bit_rate = (int)FFMIN(
823 6144 * s->channels / 1024.0 * avctx->sample_rate,
826 s->samplerate_index = i;
828 s->chan_map = aac_chan_configs[s->channels-1];
830 if ((ret = dsp_init(avctx, s)) < 0)
833 if ((ret = alloc_buffers(avctx, s)) < 0)
836 avctx->extradata_size = 5;
837 put_audio_specific_config(avctx);
839 sizes[0] = swb_size_1024[i];
840 sizes[1] = swb_size_128[i];
841 lengths[0] = ff_aac_num_swb_1024[i];
842 lengths[1] = ff_aac_num_swb_128[i];
843 for (i = 0; i < s->chan_map[0]; i++)
844 grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
845 if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
846 s->chan_map[0], grouping)) < 0)
848 s->psypp = ff_psy_preprocess_init(avctx);
849 s->coder = &ff_aac_coders[s->options.aac_coder];
852 ff_aac_coder_init_mips(s);
854 s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
858 for (i = 0; i < 428; i++)
859 ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
861 avctx->initial_padding = 1024;
862 ff_af_queue_init(avctx, &s->afq);
866 aac_encode_end(avctx);
870 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
871 static const AVOption aacenc_options[] = {
872 {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
873 {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
874 {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
875 {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
876 {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
877 {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
878 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
879 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
880 {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
881 {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
882 {"disable", "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
883 {"enable", "Enable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
884 {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
885 {"disable", "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
886 {"enable", "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
890 static const AVClass aacenc_class = {
892 av_default_item_name,
894 LIBAVUTIL_VERSION_INT,
897 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
899 static const int mpeg4audio_sample_rates[16] = {
900 96000, 88200, 64000, 48000, 44100, 32000,
901 24000, 22050, 16000, 12000, 11025, 8000, 7350
904 AVCodec ff_aac_encoder = {
906 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
907 .type = AVMEDIA_TYPE_AUDIO,
908 .id = AV_CODEC_ID_AAC,
909 .priv_data_size = sizeof(AACEncContext),
910 .init = aac_encode_init,
911 .encode2 = aac_encode_frame,
912 .close = aac_encode_end,
913 .supported_samplerates = mpeg4audio_sample_rates,
914 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
915 CODEC_CAP_EXPERIMENTAL,
916 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
917 AV_SAMPLE_FMT_NONE },
918 .priv_class = &aacenc_class,