3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 /***********************************
29 * add sane pulse detection
30 * add temporal noise shaping
31 ***********************************/
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
38 #include "mpeg4audio.h"
48 #define AAC_MAX_CHANNELS 6
50 #define ERROR_IF(cond, ...) \
52 av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53 return AVERROR(EINVAL); \
56 #define WARN_IF(cond, ...) \
58 av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
62 static const uint8_t swb_size_1024_96[] = {
63 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
64 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
65 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
68 static const uint8_t swb_size_1024_64[] = {
69 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
70 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
71 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
74 static const uint8_t swb_size_1024_48[] = {
75 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
76 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
77 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
81 static const uint8_t swb_size_1024_32[] = {
82 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
83 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
84 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
87 static const uint8_t swb_size_1024_24[] = {
88 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
89 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
90 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
93 static const uint8_t swb_size_1024_16[] = {
94 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
95 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
96 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
99 static const uint8_t swb_size_1024_8[] = {
100 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
101 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
102 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
105 static const uint8_t *swb_size_1024[] = {
106 swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
107 swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
108 swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
109 swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
113 static const uint8_t swb_size_128_96[] = {
114 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
117 static const uint8_t swb_size_128_48[] = {
118 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
121 static const uint8_t swb_size_128_24[] = {
122 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
125 static const uint8_t swb_size_128_16[] = {
126 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
129 static const uint8_t swb_size_128_8[] = {
130 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
133 static const uint8_t *swb_size_128[] = {
134 /* the last entry on the following row is swb_size_128_64 but is a
135 duplicate of swb_size_128_96 */
136 swb_size_128_96, swb_size_128_96, swb_size_128_96,
137 swb_size_128_48, swb_size_128_48, swb_size_128_48,
138 swb_size_128_24, swb_size_128_24, swb_size_128_16,
139 swb_size_128_16, swb_size_128_16, swb_size_128_8,
143 /** default channel configurations */
144 static const uint8_t aac_chan_configs[6][5] = {
145 {1, TYPE_SCE}, // 1 channel - single channel element
146 {1, TYPE_CPE}, // 2 channels - channel pair
147 {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
148 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
149 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
150 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
154 * Table to remap channels from libavcodec's default order to AAC order.
156 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
162 { 2, 0, 1, 4, 5, 3 },
166 * Make AAC audio config object.
167 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
169 static void put_audio_specific_config(AVCodecContext *avctx)
172 AACEncContext *s = avctx->priv_data;
174 init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
175 put_bits(&pb, 5, 2); //object type - AAC-LC
176 put_bits(&pb, 4, s->samplerate_index); //sample rate index
177 put_bits(&pb, 4, s->channels);
179 put_bits(&pb, 1, 0); //frame length - 1024 samples
180 put_bits(&pb, 1, 0); //does not depend on core coder
181 put_bits(&pb, 1, 0); //is not extension
183 //Explicitly Mark SBR absent
184 put_bits(&pb, 11, 0x2b7); //sync extension
185 put_bits(&pb, 5, AOT_SBR);
190 #define WINDOW_FUNC(type) \
191 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
192 SingleChannelElement *sce, \
195 WINDOW_FUNC(only_long)
197 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
198 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199 float *out = sce->ret_buf;
201 fdsp->vector_fmul (out, audio, lwindow, 1024);
202 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
205 WINDOW_FUNC(long_start)
207 const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
208 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
209 float *out = sce->ret_buf;
211 fdsp->vector_fmul(out, audio, lwindow, 1024);
212 memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
213 fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
214 memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
217 WINDOW_FUNC(long_stop)
219 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
220 const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
221 float *out = sce->ret_buf;
223 memset(out, 0, sizeof(out[0]) * 448);
224 fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
225 memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
226 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
229 WINDOW_FUNC(eight_short)
231 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
232 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
233 const float *in = audio + 448;
234 float *out = sce->ret_buf;
237 for (w = 0; w < 8; w++) {
238 fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
241 fdsp->vector_fmul_reverse(out, in, swindow, 128);
246 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
247 SingleChannelElement *sce,
248 const float *audio) = {
249 [ONLY_LONG_SEQUENCE] = apply_only_long_window,
250 [LONG_START_SEQUENCE] = apply_long_start_window,
251 [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
252 [LONG_STOP_SEQUENCE] = apply_long_stop_window
255 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
259 float *output = sce->ret_buf;
261 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
263 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
264 s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
266 for (i = 0; i < 1024; i += 128)
267 s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
268 memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
269 memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
273 * Encode ics_info element.
274 * @see Table 4.6 (syntax of ics_info)
276 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
280 put_bits(&s->pb, 1, 0); // ics_reserved bit
281 put_bits(&s->pb, 2, info->window_sequence[0]);
282 put_bits(&s->pb, 1, info->use_kb_window[0]);
283 if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
284 put_bits(&s->pb, 6, info->max_sfb);
285 put_bits(&s->pb, 1, 0); // no prediction
287 put_bits(&s->pb, 4, info->max_sfb);
288 for (w = 1; w < 8; w++)
289 put_bits(&s->pb, 1, !info->group_len[w]);
295 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
297 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
301 put_bits(pb, 2, cpe->ms_mode);
302 if (cpe->ms_mode == 1)
303 for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
304 for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
305 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
309 * Produce integer coefficients from scalefactors provided by the model.
311 static void adjust_frame_information(ChannelElement *cpe, int chans)
315 IndividualChannelStream *ics;
317 if (cpe->common_window) {
318 ics = &cpe->ch[0].ics;
319 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
320 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
321 int start = (w+w2) * 128;
322 for (g = 0; g < ics->num_swb; g++) {
323 //apply Intensity stereo coeffs transformation
324 if (cpe->is_mask[w*16 + g]) {
325 int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
326 float scale = cpe->ch[0].is_ener[w*16+g];
327 for (i = 0; i < ics->swb_sizes[g]; i++) {
328 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + p*cpe->ch[1].pcoeffs[start+i]) * scale;
329 cpe->ch[1].coeffs[start+i] = 0.0f;
331 } else if (cpe->ms_mask[w*16 + g] &&
332 cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
333 cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
334 for (i = 0; i < ics->swb_sizes[g]; i++) {
335 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
336 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
339 start += ics->swb_sizes[g];
345 for (ch = 0; ch < chans; ch++) {
346 IndividualChannelStream *ics = &cpe->ch[ch].ics;
348 cpe->ch[ch].pulse.num_pulse = 0;
349 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
350 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
351 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
353 maxsfb = FFMAX(maxsfb, cmaxsfb);
356 ics->max_sfb = maxsfb;
358 //adjust zero bands for window groups
359 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
360 for (g = 0; g < ics->max_sfb; g++) {
362 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
363 if (!cpe->ch[ch].zeroes[w2*16 + g]) {
368 cpe->ch[ch].zeroes[w*16 + g] = i;
373 if (chans > 1 && cpe->common_window) {
374 IndividualChannelStream *ics0 = &cpe->ch[0].ics;
375 IndividualChannelStream *ics1 = &cpe->ch[1].ics;
377 ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
378 ics1->max_sfb = ics0->max_sfb;
379 for (w = 0; w < ics0->num_windows*16; w += 16)
380 for (i = 0; i < ics0->max_sfb; i++)
381 if (cpe->ms_mask[w+i])
383 if (msc == 0 || ics0->max_sfb == 0)
386 cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
391 * Encode scalefactor band coding type.
393 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
397 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
398 s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
402 * Encode scalefactors.
404 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
405 SingleChannelElement *sce)
407 int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
408 int off_is = 0, noise_flag = 1;
411 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
412 for (i = 0; i < sce->ics.max_sfb; i++) {
413 if (!sce->zeroes[w*16 + i]) {
414 if (sce->band_type[w*16 + i] == NOISE_BT) {
415 diff = sce->sf_idx[w*16 + i] - off_pns;
416 off_pns = sce->sf_idx[w*16 + i];
417 if (noise_flag-- > 0) {
418 put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
421 } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
422 sce->band_type[w*16 + i] == INTENSITY_BT2) {
423 diff = sce->sf_idx[w*16 + i] - off_is;
424 off_is = sce->sf_idx[w*16 + i];
426 diff = sce->sf_idx[w*16 + i] - off_sf;
427 off_sf = sce->sf_idx[w*16 + i];
429 diff += SCALE_DIFF_ZERO;
430 av_assert0(diff >= 0 && diff <= 120);
431 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
440 static void encode_pulses(AACEncContext *s, Pulse *pulse)
444 put_bits(&s->pb, 1, !!pulse->num_pulse);
445 if (!pulse->num_pulse)
448 put_bits(&s->pb, 2, pulse->num_pulse - 1);
449 put_bits(&s->pb, 6, pulse->start);
450 for (i = 0; i < pulse->num_pulse; i++) {
451 put_bits(&s->pb, 5, pulse->pos[i]);
452 put_bits(&s->pb, 4, pulse->amp[i]);
457 * Encode spectral coefficients processed by psychoacoustic model.
459 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
463 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
465 for (i = 0; i < sce->ics.max_sfb; i++) {
466 if (sce->zeroes[w*16 + i]) {
467 start += sce->ics.swb_sizes[i];
470 for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
471 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
472 sce->ics.swb_sizes[i],
473 sce->sf_idx[w*16 + i],
474 sce->band_type[w*16 + i],
476 start += sce->ics.swb_sizes[i];
482 * Encode one channel of audio data.
484 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
485 SingleChannelElement *sce,
488 put_bits(&s->pb, 8, sce->sf_idx[0]);
490 put_ics_info(s, &sce->ics);
491 encode_band_info(s, sce);
492 encode_scale_factors(avctx, s, sce);
493 encode_pulses(s, &sce->pulse);
494 put_bits(&s->pb, 1, 0); //tns
495 put_bits(&s->pb, 1, 0); //ssr
496 encode_spectral_coeffs(s, sce);
501 * Write some auxiliary information about the created AAC file.
503 static void put_bitstream_info(AACEncContext *s, const char *name)
505 int i, namelen, padbits;
507 namelen = strlen(name) + 2;
508 put_bits(&s->pb, 3, TYPE_FIL);
509 put_bits(&s->pb, 4, FFMIN(namelen, 15));
511 put_bits(&s->pb, 8, namelen - 14);
512 put_bits(&s->pb, 4, 0); //extension type - filler
513 padbits = -put_bits_count(&s->pb) & 7;
514 avpriv_align_put_bits(&s->pb);
515 for (i = 0; i < namelen - 2; i++)
516 put_bits(&s->pb, 8, name[i]);
517 put_bits(&s->pb, 12 - padbits, 0);
521 * Copy input samples.
522 * Channels are reordered from libavcodec's default order to AAC order.
524 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
527 int end = 2048 + (frame ? frame->nb_samples : 0);
528 const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
530 /* copy and remap input samples */
531 for (ch = 0; ch < s->channels; ch++) {
532 /* copy last 1024 samples of previous frame to the start of the current frame */
533 memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
535 /* copy new samples and zero any remaining samples */
537 memcpy(&s->planar_samples[ch][2048],
538 frame->extended_data[channel_map[ch]],
539 frame->nb_samples * sizeof(s->planar_samples[0][0]));
541 memset(&s->planar_samples[ch][end], 0,
542 (3072 - end) * sizeof(s->planar_samples[0][0]));
546 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
547 const AVFrame *frame, int *got_packet_ptr)
549 AACEncContext *s = avctx->priv_data;
550 float **samples = s->planar_samples, *samples2, *la, *overlap;
552 int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0, is_mode = 0;
553 int chan_el_counter[4];
554 FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
556 if (s->last_frame == 2)
559 /* add current frame to queue */
561 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
565 copy_input_samples(s, frame);
567 ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
569 if (!avctx->frame_number)
573 for (i = 0; i < s->chan_map[0]; i++) {
574 FFPsyWindowInfo* wi = windows + start_ch;
575 tag = s->chan_map[i+1];
576 chans = tag == TYPE_CPE ? 2 : 1;
578 for (ch = 0; ch < chans; ch++) {
579 IndividualChannelStream *ics = &cpe->ch[ch].ics;
580 int cur_channel = start_ch + ch;
581 overlap = &samples[cur_channel][0];
582 samples2 = overlap + 1024;
583 la = samples2 + (448+64);
586 if (tag == TYPE_LFE) {
587 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
588 wi[ch].window_shape = 0;
589 wi[ch].num_windows = 1;
590 wi[ch].grouping[0] = 1;
592 /* Only the lowest 12 coefficients are used in a LFE channel.
593 * The expression below results in only the bottom 8 coefficients
594 * being used for 11.025kHz to 16kHz sample rates.
596 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
598 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
599 ics->window_sequence[0]);
601 ics->window_sequence[1] = ics->window_sequence[0];
602 ics->window_sequence[0] = wi[ch].window_type[0];
603 ics->use_kb_window[1] = ics->use_kb_window[0];
604 ics->use_kb_window[0] = wi[ch].window_shape;
605 ics->num_windows = wi[ch].num_windows;
606 ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
607 ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
608 for (w = 0; w < ics->num_windows; w++)
609 ics->group_len[w] = wi[ch].grouping[w];
611 apply_window_and_mdct(s, &cpe->ch[ch], overlap);
612 if (isnan(cpe->ch->coeffs[0])) {
613 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
614 return AVERROR(EINVAL);
619 if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
624 init_put_bits(&s->pb, avpkt->data, avpkt->size);
626 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
627 put_bitstream_info(s, LIBAVCODEC_IDENT);
629 memset(chan_el_counter, 0, sizeof(chan_el_counter));
630 for (i = 0; i < s->chan_map[0]; i++) {
631 FFPsyWindowInfo* wi = windows + start_ch;
632 const float *coeffs[2];
633 tag = s->chan_map[i+1];
634 chans = tag == TYPE_CPE ? 2 : 1;
636 memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
637 memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
638 put_bits(&s->pb, 3, tag);
639 put_bits(&s->pb, 4, chan_el_counter[tag]++);
640 for (ch = 0; ch < chans; ch++)
641 coeffs[ch] = cpe->ch[ch].coeffs;
642 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
643 for (ch = 0; ch < chans; ch++) {
644 s->cur_channel = start_ch + ch;
645 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
647 cpe->common_window = 0;
649 && wi[0].window_type[0] == wi[1].window_type[0]
650 && wi[0].window_shape == wi[1].window_shape) {
652 cpe->common_window = 1;
653 for (w = 0; w < wi[0].num_windows; w++) {
654 if (wi[0].grouping[w] != wi[1].grouping[w]) {
655 cpe->common_window = 0;
660 if (s->options.pns && s->coder->search_for_pns) {
661 for (ch = 0; ch < chans; ch++) {
662 s->cur_channel = start_ch + ch;
663 s->coder->search_for_pns(s, avctx, &cpe->ch[ch], s->lambda);
666 s->cur_channel = start_ch;
667 if (s->options.stereo_mode && cpe->common_window) {
668 if (s->options.stereo_mode > 0) {
669 IndividualChannelStream *ics = &cpe->ch[0].ics;
670 for (w = 0; w < ics->num_windows; w += ics->group_len[w])
671 for (g = 0; g < ics->num_swb; g++)
672 cpe->ms_mask[w*16+g] = 1;
673 } else if (s->coder->search_for_ms) {
674 s->coder->search_for_ms(s, cpe, s->lambda);
677 if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
678 s->coder->search_for_is(s, avctx, cpe, s->lambda);
679 if (cpe->is_mode) is_mode = 1;
681 if (s->coder->set_special_band_scalefactors)
682 for (ch = 0; ch < chans; ch++)
683 s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
684 adjust_frame_information(cpe, chans);
686 put_bits(&s->pb, 1, cpe->common_window);
687 if (cpe->common_window) {
688 put_ics_info(s, &cpe->ch[0].ics);
689 encode_ms_info(&s->pb, cpe);
690 if (cpe->ms_mode) ms_mode = 1;
693 for (ch = 0; ch < chans; ch++) {
694 s->cur_channel = start_ch + ch;
695 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
700 frame_bits = put_bits_count(&s->pb);
701 if (frame_bits <= 6144 * s->channels - 3) {
702 s->psy.bitres.bits = frame_bits / s->channels;
705 if (is_mode || ms_mode) {
706 for (i = 0; i < s->chan_map[0]; i++) {
707 // Must restore coeffs
708 chans = tag == TYPE_CPE ? 2 : 1;
710 for (ch = 0; ch < chans; ch++)
711 memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
715 s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
719 put_bits(&s->pb, 3, TYPE_END);
720 flush_put_bits(&s->pb);
721 avctx->frame_bits = put_bits_count(&s->pb);
723 // rate control stuff
724 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
725 float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
727 s->lambda = FFMIN(s->lambda, 65536.f);
733 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
736 avpkt->size = put_bits_count(&s->pb) >> 3;
741 static av_cold int aac_encode_end(AVCodecContext *avctx)
743 AACEncContext *s = avctx->priv_data;
745 ff_mdct_end(&s->mdct1024);
746 ff_mdct_end(&s->mdct128);
749 ff_psy_preprocess_end(s->psypp);
750 av_freep(&s->buffer.samples);
753 ff_af_queue_close(&s->afq);
757 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
761 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
763 return AVERROR(ENOMEM);
766 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
767 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
768 ff_init_ff_sine_windows(10);
769 ff_init_ff_sine_windows(7);
771 if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
773 if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
779 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
782 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
783 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
784 FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
786 for(ch = 0; ch < s->channels; ch++)
787 s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
791 return AVERROR(ENOMEM);
794 static av_cold int aac_encode_init(AVCodecContext *avctx)
796 AACEncContext *s = avctx->priv_data;
798 const uint8_t *sizes[2];
799 uint8_t grouping[AAC_MAX_CHANNELS];
802 avctx->frame_size = 1024;
804 for (i = 0; i < 16; i++)
805 if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
808 s->channels = avctx->channels;
811 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
812 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
813 "Unsupported sample rate %d\n", avctx->sample_rate);
814 ERROR_IF(s->channels > AAC_MAX_CHANNELS,
815 "Unsupported number of channels: %d\n", s->channels);
816 ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
817 "Unsupported profile %d\n", avctx->profile);
818 WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
819 "Too many bits per frame requested, clamping to max\n");
821 avctx->bit_rate = (int)FFMIN(
822 6144 * s->channels / 1024.0 * avctx->sample_rate,
825 s->samplerate_index = i;
827 s->chan_map = aac_chan_configs[s->channels-1];
829 if ((ret = dsp_init(avctx, s)) < 0)
832 if ((ret = alloc_buffers(avctx, s)) < 0)
835 avctx->extradata_size = 5;
836 put_audio_specific_config(avctx);
838 sizes[0] = swb_size_1024[i];
839 sizes[1] = swb_size_128[i];
840 lengths[0] = ff_aac_num_swb_1024[i];
841 lengths[1] = ff_aac_num_swb_128[i];
842 for (i = 0; i < s->chan_map[0]; i++)
843 grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
844 if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
845 s->chan_map[0], grouping)) < 0)
847 s->psypp = ff_psy_preprocess_init(avctx);
848 s->coder = &ff_aac_coders[s->options.aac_coder];
851 ff_aac_coder_init_mips(s);
853 s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
857 avctx->initial_padding = 1024;
858 ff_af_queue_init(avctx, &s->afq);
862 aac_encode_end(avctx);
866 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
867 static const AVOption aacenc_options[] = {
868 {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
869 {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
870 {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
871 {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
872 {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
873 {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
874 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
875 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
876 {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
877 {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
878 {"disable", "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
879 {"enable", "Enable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
880 {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
881 {"disable", "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
882 {"enable", "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
886 static const AVClass aacenc_class = {
888 av_default_item_name,
890 LIBAVUTIL_VERSION_INT,
893 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
895 static const int mpeg4audio_sample_rates[16] = {
896 96000, 88200, 64000, 48000, 44100, 32000,
897 24000, 22050, 16000, 12000, 11025, 8000, 7350
900 AVCodec ff_aac_encoder = {
902 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
903 .type = AVMEDIA_TYPE_AUDIO,
904 .id = AV_CODEC_ID_AAC,
905 .priv_data_size = sizeof(AACEncContext),
906 .init = aac_encode_init,
907 .encode2 = aac_encode_frame,
908 .close = aac_encode_end,
909 .supported_samplerates = mpeg4audio_sample_rates,
910 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
911 CODEC_CAP_EXPERIMENTAL,
912 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
913 AV_SAMPLE_FMT_NONE },
914 .priv_class = &aacenc_class,