3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
34 * N (code in SoC repo) gain control
36 * Y window shapes - standard
37 * N window shapes - Low Delay
38 * Y filterbank - standard
39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction
44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution
47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch
50 * Y quantization & coding - AAC
51 * N quantization & coding - TwinVQ
52 * N quantization & coding - BSAC
53 * N AAC Error Resilience tools
54 * N Error Resilience payload syntax
55 * N Error Protection tool
57 * N Silence Compression
60 * N Structured Audio tools
61 * N Structured Audio Sample Bank Format
63 * N Harmonic and Individual Lines plus Noise
64 * N Text-To-Speech Interface
65 * Y Spectral Band Replication
66 * Y (not in this code) Layer-1
67 * Y (not in this code) Layer-2
68 * Y (not in this code) Layer-3
69 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
70 * N (planned) Parametric Stereo
71 * N Direct Stream Transfer
73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
88 #include "aacdectab.h"
89 #include "cbrt_tablegen.h"
92 #include "mpeg4audio.h"
93 #include "aac_parser.h"
101 # include "arm/aac.h"
109 static VLC vlc_scalefactors;
110 static VLC vlc_spectral[11];
112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
116 /* Some buggy encoders appear to set all elem_ids to zero and rely on
117 channels always occurring in the same order. This is expressly forbidden
118 by the spec but we will try to work around it.
121 while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
122 if (ac->output_configured < OC_LOCKED && !err_printed) {
123 av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
128 if (elem_id == MAX_ELEM_ID)
130 ac->tags_seen_this_frame[type][elem_id] = 1;
132 if (ac->tag_che_map[type][elem_id]) {
133 return ac->tag_che_map[type][elem_id];
135 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
138 switch (ac->m4ac.chan_config) {
140 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
142 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
145 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
147 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
150 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
153 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
155 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
158 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
160 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
164 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
166 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
167 } else if (ac->m4ac.chan_config == 2) {
171 if (!ac->tags_mapped && type == TYPE_SCE) {
173 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
181 * Check for the channel element in the current channel position configuration.
182 * If it exists, make sure the appropriate element is allocated and map the
183 * channel order to match the internal FFmpeg channel layout.
185 * @param che_pos current channel position configuration
186 * @param type channel element type
187 * @param id channel element id
188 * @param channels count of the number of channels in the configuration
190 * @return Returns error status. 0 - OK, !0 - error
192 static av_cold int che_configure(AACContext *ac,
193 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
197 if (che_pos[type][id]) {
198 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
199 return AVERROR(ENOMEM);
200 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 if (type != TYPE_CCE) {
202 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 if (type == TYPE_CPE) {
204 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
208 if (ac->che[type][id])
209 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
210 av_freep(&ac->che[type][id]);
216 * Configure output channel order based on the current program configuration element.
218 * @param che_pos current channel position configuration
219 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
221 * @return Returns error status. 0 - OK, !0 - error
223 static av_cold int output_configure(AACContext *ac,
224 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
225 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
226 int channel_config, enum OCStatus oc_type)
228 AVCodecContext *avctx = ac->avctx;
229 int i, type, channels = 0, ret;
231 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
233 if (channel_config) {
234 for (i = 0; i < tags_per_config[channel_config]; i++) {
235 if ((ret = che_configure(ac, che_pos,
236 aac_channel_layout_map[channel_config - 1][i][0],
237 aac_channel_layout_map[channel_config - 1][i][1],
242 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
245 avctx->channel_layout = aac_channel_layout[channel_config - 1];
247 /* Allocate or free elements depending on if they are in the
248 * current program configuration.
250 * Set up default 1:1 output mapping.
252 * For a 5.1 stream the output order will be:
253 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
256 for (i = 0; i < MAX_ELEM_ID; i++) {
257 for (type = 0; type < 4; type++) {
258 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
263 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
264 ac->tags_mapped = 4 * MAX_ELEM_ID;
266 avctx->channel_layout = 0;
269 avctx->channels = channels;
271 ac->output_configured = oc_type;
277 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
279 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
280 * @param sce_map mono (Single Channel Element) map
281 * @param type speaker type/position for these channels
283 static void decode_channel_map(enum ChannelPosition *cpe_map,
284 enum ChannelPosition *sce_map,
285 enum ChannelPosition type,
286 GetBitContext *gb, int n)
289 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
290 map[get_bits(gb, 4)] = type;
295 * Decode program configuration element; reference: table 4.2.
297 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
299 * @return Returns error status. 0 - OK, !0 - error
301 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
304 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
307 skip_bits(gb, 2); // object_type
309 sampling_index = get_bits(gb, 4);
310 if (ac->m4ac.sampling_index != sampling_index)
311 av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
313 num_front = get_bits(gb, 4);
314 num_side = get_bits(gb, 4);
315 num_back = get_bits(gb, 4);
316 num_lfe = get_bits(gb, 2);
317 num_assoc_data = get_bits(gb, 3);
318 num_cc = get_bits(gb, 4);
321 skip_bits(gb, 4); // mono_mixdown_tag
323 skip_bits(gb, 4); // stereo_mixdown_tag
326 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
328 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
329 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
330 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
331 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
333 skip_bits_long(gb, 4 * num_assoc_data);
335 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
339 /* comment field, first byte is length */
340 comment_len = get_bits(gb, 8) * 8;
341 if (get_bits_left(gb) < comment_len) {
342 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
345 skip_bits_long(gb, comment_len);
350 * Set up channel positions based on a default channel configuration
351 * as specified in table 1.17.
353 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
355 * @return Returns error status. 0 - OK, !0 - error
357 static av_cold int set_default_channel_config(AACContext *ac,
358 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
361 if (channel_config < 1 || channel_config > 7) {
362 av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
367 /* default channel configurations:
369 * 1ch : front center (mono)
370 * 2ch : L + R (stereo)
371 * 3ch : front center + L + R
372 * 4ch : front center + L + R + back center
373 * 5ch : front center + L + R + back stereo
374 * 6ch : front center + L + R + back stereo + LFE
375 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
378 if (channel_config != 2)
379 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
380 if (channel_config > 1)
381 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
382 if (channel_config == 4)
383 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center
384 if (channel_config > 4)
385 new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
386 = AAC_CHANNEL_BACK; // back stereo
387 if (channel_config > 5)
388 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE
389 if (channel_config == 7)
390 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
396 * Decode GA "General Audio" specific configuration; reference: table 4.1.
398 * @return Returns error status. 0 - OK, !0 - error
400 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
403 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
404 int extension_flag, ret;
406 if (get_bits1(gb)) { // frameLengthFlag
407 av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
411 if (get_bits1(gb)) // dependsOnCoreCoder
412 skip_bits(gb, 14); // coreCoderDelay
413 extension_flag = get_bits1(gb);
415 if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
416 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
417 skip_bits(gb, 3); // layerNr
419 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
420 if (channel_config == 0) {
421 skip_bits(gb, 4); // element_instance_tag
422 if ((ret = decode_pce(ac, new_che_pos, gb)))
425 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
428 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
431 if (extension_flag) {
432 switch (ac->m4ac.object_type) {
434 skip_bits(gb, 5); // numOfSubFrame
435 skip_bits(gb, 11); // layer_length
439 case AOT_ER_AAC_SCALABLE:
441 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
442 * aacScalefactorDataResilienceFlag
443 * aacSpectralDataResilienceFlag
447 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
453 * Decode audio specific configuration; reference: table 1.13.
455 * @param data pointer to AVCodecContext extradata
456 * @param data_size size of AVCCodecContext extradata
458 * @return Returns error status. 0 - OK, !0 - error
460 static int decode_audio_specific_config(AACContext *ac, void *data,
466 init_get_bits(&gb, data, data_size * 8);
468 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
470 if (ac->m4ac.sampling_index > 12) {
471 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
475 skip_bits_long(&gb, i);
477 switch (ac->m4ac.object_type) {
480 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
484 av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
485 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
492 * linear congruential pseudorandom number generator
494 * @param previous_val pointer to the current state of the generator
496 * @return Returns a 32-bit pseudorandom integer
498 static av_always_inline int lcg_random(int previous_val)
500 return previous_val * 1664525 + 1013904223;
503 static av_always_inline void reset_predict_state(PredictorState *ps)
513 static void reset_all_predictors(PredictorState *ps)
516 for (i = 0; i < MAX_PREDICTORS; i++)
517 reset_predict_state(&ps[i]);
520 static void reset_predictor_group(PredictorState *ps, int group_num)
523 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
524 reset_predict_state(&ps[i]);
527 static av_cold int aac_decode_init(AVCodecContext *avctx)
529 AACContext *ac = avctx->priv_data;
533 ac->m4ac.sample_rate = avctx->sample_rate;
535 if (avctx->extradata_size > 0) {
536 if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
540 avctx->sample_fmt = SAMPLE_FMT_S16;
542 AAC_INIT_VLC_STATIC( 0, 304);
543 AAC_INIT_VLC_STATIC( 1, 270);
544 AAC_INIT_VLC_STATIC( 2, 550);
545 AAC_INIT_VLC_STATIC( 3, 300);
546 AAC_INIT_VLC_STATIC( 4, 328);
547 AAC_INIT_VLC_STATIC( 5, 294);
548 AAC_INIT_VLC_STATIC( 6, 306);
549 AAC_INIT_VLC_STATIC( 7, 268);
550 AAC_INIT_VLC_STATIC( 8, 510);
551 AAC_INIT_VLC_STATIC( 9, 366);
552 AAC_INIT_VLC_STATIC(10, 462);
556 dsputil_init(&ac->dsp, avctx);
558 ac->random_state = 0x1f2e3d4c;
560 // -1024 - Compensate wrong IMDCT method.
561 // 32768 - Required to scale values to the correct range for the bias method
562 // for float to int16 conversion.
564 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
565 ac->add_bias = 385.0f;
566 ac->sf_scale = 1. / (-1024. * 32768.);
570 ac->sf_scale = 1. / -1024.;
574 #if !CONFIG_HARDCODED_TABLES
575 for (i = 0; i < 428; i++)
576 ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.);
577 #endif /* CONFIG_HARDCODED_TABLES */
579 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
580 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
581 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
584 ff_mdct_init(&ac->mdct, 11, 1, 1.0);
585 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
586 // window initialization
587 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
588 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
589 ff_init_ff_sine_windows(10);
590 ff_init_ff_sine_windows( 7);
598 * Skip data_stream_element; reference: table 4.10.
600 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
602 int byte_align = get_bits1(gb);
603 int count = get_bits(gb, 8);
605 count += get_bits(gb, 8);
609 if (get_bits_left(gb) < 8 * count) {
610 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
613 skip_bits_long(gb, 8 * count);
617 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
622 ics->predictor_reset_group = get_bits(gb, 5);
623 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
624 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
628 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
629 ics->prediction_used[sfb] = get_bits1(gb);
635 * Decode Individual Channel Stream info; reference: table 4.6.
637 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
639 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
640 GetBitContext *gb, int common_window)
643 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
644 memset(ics, 0, sizeof(IndividualChannelStream));
647 ics->window_sequence[1] = ics->window_sequence[0];
648 ics->window_sequence[0] = get_bits(gb, 2);
649 ics->use_kb_window[1] = ics->use_kb_window[0];
650 ics->use_kb_window[0] = get_bits1(gb);
651 ics->num_window_groups = 1;
652 ics->group_len[0] = 1;
653 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
655 ics->max_sfb = get_bits(gb, 4);
656 for (i = 0; i < 7; i++) {
658 ics->group_len[ics->num_window_groups - 1]++;
660 ics->num_window_groups++;
661 ics->group_len[ics->num_window_groups - 1] = 1;
664 ics->num_windows = 8;
665 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index];
666 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
667 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index];
668 ics->predictor_present = 0;
670 ics->max_sfb = get_bits(gb, 6);
671 ics->num_windows = 1;
672 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index];
673 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
674 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
675 ics->predictor_present = get_bits1(gb);
676 ics->predictor_reset_group = 0;
677 if (ics->predictor_present) {
678 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
679 if (decode_prediction(ac, ics, gb)) {
680 memset(ics, 0, sizeof(IndividualChannelStream));
683 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
684 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
685 memset(ics, 0, sizeof(IndividualChannelStream));
688 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
689 memset(ics, 0, sizeof(IndividualChannelStream));
695 if (ics->max_sfb > ics->num_swb) {
696 av_log(ac->avctx, AV_LOG_ERROR,
697 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
698 ics->max_sfb, ics->num_swb);
699 memset(ics, 0, sizeof(IndividualChannelStream));
707 * Decode band types (section_data payload); reference: table 4.46.
709 * @param band_type array of the used band type
710 * @param band_type_run_end array of the last scalefactor band of a band type run
712 * @return Returns error status. 0 - OK, !0 - error
714 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
715 int band_type_run_end[120], GetBitContext *gb,
716 IndividualChannelStream *ics)
719 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
720 for (g = 0; g < ics->num_window_groups; g++) {
722 while (k < ics->max_sfb) {
723 uint8_t sect_end = k;
725 int sect_band_type = get_bits(gb, 4);
726 if (sect_band_type == 12) {
727 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
730 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
731 sect_end += sect_len_incr;
732 sect_end += sect_len_incr;
733 if (get_bits_left(gb) < 0) {
734 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
737 if (sect_end > ics->max_sfb) {
738 av_log(ac->avctx, AV_LOG_ERROR,
739 "Number of bands (%d) exceeds limit (%d).\n",
740 sect_end, ics->max_sfb);
743 for (; k < sect_end; k++) {
744 band_type [idx] = sect_band_type;
745 band_type_run_end[idx++] = sect_end;
753 * Decode scalefactors; reference: table 4.47.
755 * @param global_gain first scalefactor value as scalefactors are differentially coded
756 * @param band_type array of the used band type
757 * @param band_type_run_end array of the last scalefactor band of a band type run
758 * @param sf array of scalefactors or intensity stereo positions
760 * @return Returns error status. 0 - OK, !0 - error
762 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
763 unsigned int global_gain,
764 IndividualChannelStream *ics,
765 enum BandType band_type[120],
766 int band_type_run_end[120])
768 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
770 int offset[3] = { global_gain, global_gain - 90, 100 };
772 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
773 for (g = 0; g < ics->num_window_groups; g++) {
774 for (i = 0; i < ics->max_sfb;) {
775 int run_end = band_type_run_end[idx];
776 if (band_type[idx] == ZERO_BT) {
777 for (; i < run_end; i++, idx++)
779 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
780 for (; i < run_end; i++, idx++) {
781 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
782 if (offset[2] > 255U) {
783 av_log(ac->avctx, AV_LOG_ERROR,
784 "%s (%d) out of range.\n", sf_str[2], offset[2]);
787 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
789 } else if (band_type[idx] == NOISE_BT) {
790 for (; i < run_end; i++, idx++) {
791 if (noise_flag-- > 0)
792 offset[1] += get_bits(gb, 9) - 256;
794 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
795 if (offset[1] > 255U) {
796 av_log(ac->avctx, AV_LOG_ERROR,
797 "%s (%d) out of range.\n", sf_str[1], offset[1]);
800 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
803 for (; i < run_end; i++, idx++) {
804 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
805 if (offset[0] > 255U) {
806 av_log(ac->avctx, AV_LOG_ERROR,
807 "%s (%d) out of range.\n", sf_str[0], offset[0]);
810 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
819 * Decode pulse data; reference: table 4.7.
821 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
822 const uint16_t *swb_offset, int num_swb)
825 pulse->num_pulse = get_bits(gb, 2) + 1;
826 pulse_swb = get_bits(gb, 6);
827 if (pulse_swb >= num_swb)
829 pulse->pos[0] = swb_offset[pulse_swb];
830 pulse->pos[0] += get_bits(gb, 5);
831 if (pulse->pos[0] > 1023)
833 pulse->amp[0] = get_bits(gb, 4);
834 for (i = 1; i < pulse->num_pulse; i++) {
835 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
836 if (pulse->pos[i] > 1023)
838 pulse->amp[i] = get_bits(gb, 4);
844 * Decode Temporal Noise Shaping data; reference: table 4.48.
846 * @return Returns error status. 0 - OK, !0 - error
848 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
849 GetBitContext *gb, const IndividualChannelStream *ics)
851 int w, filt, i, coef_len, coef_res, coef_compress;
852 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
853 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
854 for (w = 0; w < ics->num_windows; w++) {
855 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
856 coef_res = get_bits1(gb);
858 for (filt = 0; filt < tns->n_filt[w]; filt++) {
860 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
862 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
863 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
864 tns->order[w][filt], tns_max_order);
865 tns->order[w][filt] = 0;
868 if (tns->order[w][filt]) {
869 tns->direction[w][filt] = get_bits1(gb);
870 coef_compress = get_bits1(gb);
871 coef_len = coef_res + 3 - coef_compress;
872 tmp2_idx = 2 * coef_compress + coef_res;
874 for (i = 0; i < tns->order[w][filt]; i++)
875 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
884 * Decode Mid/Side data; reference: table 4.54.
886 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
887 * [1] mask is decoded from bitstream; [2] mask is all 1s;
888 * [3] reserved for scalable AAC
890 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
894 if (ms_present == 1) {
895 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
896 cpe->ms_mask[idx] = get_bits1(gb);
897 } else if (ms_present == 2) {
898 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
903 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
907 *dst++ = v[idx & 15] * s;
908 *dst++ = v[idx>>4 & 15] * s;
914 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
918 *dst++ = v[idx & 3] * s;
919 *dst++ = v[idx>>2 & 3] * s;
920 *dst++ = v[idx>>4 & 3] * s;
921 *dst++ = v[idx>>6 & 3] * s;
927 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
928 unsigned sign, const float *scale)
930 union float754 s0, s1;
932 s0.f = s1.f = *scale;
933 s0.i ^= sign >> 1 << 31;
936 *dst++ = v[idx & 15] * s0.f;
937 *dst++ = v[idx>>4 & 15] * s1.f;
944 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
945 unsigned sign, const float *scale)
947 unsigned nz = idx >> 12;
948 union float754 s = { .f = *scale };
951 t.i = s.i ^ (sign & 1<<31);
952 *dst++ = v[idx & 3] * t.f;
954 sign <<= nz & 1; nz >>= 1;
955 t.i = s.i ^ (sign & 1<<31);
956 *dst++ = v[idx>>2 & 3] * t.f;
958 sign <<= nz & 1; nz >>= 1;
959 t.i = s.i ^ (sign & 1<<31);
960 *dst++ = v[idx>>4 & 3] * t.f;
962 sign <<= nz & 1; nz >>= 1;
963 t.i = s.i ^ (sign & 1<<31);
964 *dst++ = v[idx>>6 & 3] * t.f;
971 * Decode spectral data; reference: table 4.50.
972 * Dequantize and scale spectral data; reference: 4.6.3.3.
974 * @param coef array of dequantized, scaled spectral data
975 * @param sf array of scalefactors or intensity stereo positions
976 * @param pulse_present set if pulses are present
977 * @param pulse pointer to pulse data struct
978 * @param band_type array of the used band type
980 * @return Returns error status. 0 - OK, !0 - error
982 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
983 GetBitContext *gb, const float sf[120],
984 int pulse_present, const Pulse *pulse,
985 const IndividualChannelStream *ics,
986 enum BandType band_type[120])
988 int i, k, g, idx = 0;
989 const int c = 1024 / ics->num_windows;
990 const uint16_t *offsets = ics->swb_offset;
991 float *coef_base = coef;
994 for (g = 0; g < ics->num_windows; g++)
995 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
997 for (g = 0; g < ics->num_window_groups; g++) {
998 unsigned g_len = ics->group_len[g];
1000 for (i = 0; i < ics->max_sfb; i++, idx++) {
1001 const unsigned cbt_m1 = band_type[idx] - 1;
1002 float *cfo = coef + offsets[i];
1003 int off_len = offsets[i + 1] - offsets[i];
1006 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1007 for (group = 0; group < g_len; group++, cfo+=128) {
1008 memset(cfo, 0, off_len * sizeof(float));
1010 } else if (cbt_m1 == NOISE_BT - 1) {
1011 for (group = 0; group < g_len; group++, cfo+=128) {
1015 for (k = 0; k < off_len; k++) {
1016 ac->random_state = lcg_random(ac->random_state);
1017 cfo[k] = ac->random_state;
1020 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1021 scale = sf[idx] / sqrtf(band_energy);
1022 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1025 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1026 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1027 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1028 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1029 OPEN_READER(re, gb);
1031 switch (cbt_m1 >> 1) {
1033 for (group = 0; group < g_len; group++, cfo+=128) {
1041 UPDATE_CACHE(re, gb);
1042 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1044 if (code >= cb_size) {
1046 goto err_cb_overflow;
1049 cb_idx = cb_vector_idx[code];
1050 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1056 for (group = 0; group < g_len; group++, cfo+=128) {
1066 UPDATE_CACHE(re, gb);
1067 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1069 if (code >= cb_size) {
1071 goto err_cb_overflow;
1074 #if MIN_CACHE_BITS < 20
1075 UPDATE_CACHE(re, gb);
1077 cb_idx = cb_vector_idx[code];
1078 nnz = cb_idx >> 8 & 15;
1079 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1080 LAST_SKIP_BITS(re, gb, nnz);
1081 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1087 for (group = 0; group < g_len; group++, cfo+=128) {
1095 UPDATE_CACHE(re, gb);
1096 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1098 if (code >= cb_size) {
1100 goto err_cb_overflow;
1103 cb_idx = cb_vector_idx[code];
1104 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1111 for (group = 0; group < g_len; group++, cfo+=128) {
1121 UPDATE_CACHE(re, gb);
1122 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1124 if (code >= cb_size) {
1126 goto err_cb_overflow;
1129 cb_idx = cb_vector_idx[code];
1130 nnz = cb_idx >> 8 & 15;
1131 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1132 LAST_SKIP_BITS(re, gb, nnz);
1133 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1139 for (group = 0; group < g_len; group++, cfo+=128) {
1141 uint32_t *icf = (uint32_t *) cf;
1151 UPDATE_CACHE(re, gb);
1152 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1160 if (code >= cb_size) {
1162 goto err_cb_overflow;
1165 cb_idx = cb_vector_idx[code];
1168 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1169 LAST_SKIP_BITS(re, gb, nnz);
1171 for (j = 0; j < 2; j++) {
1175 /* The total length of escape_sequence must be < 22 bits according
1176 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1177 UPDATE_CACHE(re, gb);
1178 b = GET_CACHE(re, gb);
1179 b = 31 - av_log2(~b);
1182 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1186 #if MIN_CACHE_BITS < 21
1187 LAST_SKIP_BITS(re, gb, b + 1);
1188 UPDATE_CACHE(re, gb);
1190 SKIP_BITS(re, gb, b + 1);
1193 n = (1 << b) + SHOW_UBITS(re, gb, b);
1194 LAST_SKIP_BITS(re, gb, b);
1195 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1198 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1199 *icf++ = (bits & 1<<31) | v;
1206 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1210 CLOSE_READER(re, gb);
1216 if (pulse_present) {
1218 for (i = 0; i < pulse->num_pulse; i++) {
1219 float co = coef_base[ pulse->pos[i] ];
1220 while (offsets[idx + 1] <= pulse->pos[i])
1222 if (band_type[idx] != NOISE_BT && sf[idx]) {
1223 float ico = -pulse->amp[i];
1226 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1228 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1235 av_log(ac->avctx, AV_LOG_ERROR,
1236 "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1237 band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1241 static av_always_inline float flt16_round(float pf)
1245 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1249 static av_always_inline float flt16_even(float pf)
1253 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1257 static av_always_inline float flt16_trunc(float pf)
1261 pun.i &= 0xFFFF0000U;
1265 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1268 const float a = 0.953125; // 61.0 / 64
1269 const float alpha = 0.90625; // 29.0 / 32
1274 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1275 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1277 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1279 *coef += pv * ac->sf_scale;
1281 e0 = *coef / ac->sf_scale;
1282 e1 = e0 - k1 * ps->r0;
1284 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1285 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1286 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1287 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1289 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1290 ps->r0 = flt16_trunc(a * e0);
1294 * Apply AAC-Main style frequency domain prediction.
1296 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1300 if (!sce->ics.predictor_initialized) {
1301 reset_all_predictors(sce->predictor_state);
1302 sce->ics.predictor_initialized = 1;
1305 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1306 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1307 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1308 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1309 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1312 if (sce->ics.predictor_reset_group)
1313 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1315 reset_all_predictors(sce->predictor_state);
1319 * Decode an individual_channel_stream payload; reference: table 4.44.
1321 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1322 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1324 * @return Returns error status. 0 - OK, !0 - error
1326 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1327 GetBitContext *gb, int common_window, int scale_flag)
1330 TemporalNoiseShaping *tns = &sce->tns;
1331 IndividualChannelStream *ics = &sce->ics;
1332 float *out = sce->coeffs;
1333 int global_gain, pulse_present = 0;
1335 /* This assignment is to silence a GCC warning about the variable being used
1336 * uninitialized when in fact it always is.
1338 pulse.num_pulse = 0;
1340 global_gain = get_bits(gb, 8);
1342 if (!common_window && !scale_flag) {
1343 if (decode_ics_info(ac, ics, gb, 0) < 0)
1347 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1349 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1354 if ((pulse_present = get_bits1(gb))) {
1355 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1356 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1359 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1360 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1364 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1366 if (get_bits1(gb)) {
1367 av_log_missing_feature(ac->avctx, "SSR", 1);
1372 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1375 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1376 apply_prediction(ac, sce);
1382 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1384 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1386 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1387 float *ch0 = cpe->ch[0].coeffs;
1388 float *ch1 = cpe->ch[1].coeffs;
1389 int g, i, group, idx = 0;
1390 const uint16_t *offsets = ics->swb_offset;
1391 for (g = 0; g < ics->num_window_groups; g++) {
1392 for (i = 0; i < ics->max_sfb; i++, idx++) {
1393 if (cpe->ms_mask[idx] &&
1394 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1395 for (group = 0; group < ics->group_len[g]; group++) {
1396 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1397 ch1 + group * 128 + offsets[i],
1398 offsets[i+1] - offsets[i]);
1402 ch0 += ics->group_len[g] * 128;
1403 ch1 += ics->group_len[g] * 128;
1408 * intensity stereo decoding; reference: 4.6.8.2.3
1410 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1411 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1412 * [3] reserved for scalable AAC
1414 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1416 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1417 SingleChannelElement *sce1 = &cpe->ch[1];
1418 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1419 const uint16_t *offsets = ics->swb_offset;
1420 int g, group, i, k, idx = 0;
1423 for (g = 0; g < ics->num_window_groups; g++) {
1424 for (i = 0; i < ics->max_sfb;) {
1425 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1426 const int bt_run_end = sce1->band_type_run_end[idx];
1427 for (; i < bt_run_end; i++, idx++) {
1428 c = -1 + 2 * (sce1->band_type[idx] - 14);
1430 c *= 1 - 2 * cpe->ms_mask[idx];
1431 scale = c * sce1->sf[idx];
1432 for (group = 0; group < ics->group_len[g]; group++)
1433 for (k = offsets[i]; k < offsets[i + 1]; k++)
1434 coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1437 int bt_run_end = sce1->band_type_run_end[idx];
1438 idx += bt_run_end - i;
1442 coef0 += ics->group_len[g] * 128;
1443 coef1 += ics->group_len[g] * 128;
1448 * Decode a channel_pair_element; reference: table 4.4.
1450 * @param elem_id Identifies the instance of a syntax element.
1452 * @return Returns error status. 0 - OK, !0 - error
1454 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1456 int i, ret, common_window, ms_present = 0;
1458 common_window = get_bits1(gb);
1459 if (common_window) {
1460 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1462 i = cpe->ch[1].ics.use_kb_window[0];
1463 cpe->ch[1].ics = cpe->ch[0].ics;
1464 cpe->ch[1].ics.use_kb_window[1] = i;
1465 ms_present = get_bits(gb, 2);
1466 if (ms_present == 3) {
1467 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1469 } else if (ms_present)
1470 decode_mid_side_stereo(cpe, gb, ms_present);
1472 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1474 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1477 if (common_window) {
1479 apply_mid_side_stereo(ac, cpe);
1480 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1481 apply_prediction(ac, &cpe->ch[0]);
1482 apply_prediction(ac, &cpe->ch[1]);
1486 apply_intensity_stereo(cpe, ms_present);
1491 * Decode coupling_channel_element; reference: table 4.8.
1493 * @param elem_id Identifies the instance of a syntax element.
1495 * @return Returns error status. 0 - OK, !0 - error
1497 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1503 SingleChannelElement *sce = &che->ch[0];
1504 ChannelCoupling *coup = &che->coup;
1506 coup->coupling_point = 2 * get_bits1(gb);
1507 coup->num_coupled = get_bits(gb, 3);
1508 for (c = 0; c <= coup->num_coupled; c++) {
1510 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1511 coup->id_select[c] = get_bits(gb, 4);
1512 if (coup->type[c] == TYPE_CPE) {
1513 coup->ch_select[c] = get_bits(gb, 2);
1514 if (coup->ch_select[c] == 3)
1517 coup->ch_select[c] = 2;
1519 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1521 sign = get_bits(gb, 1);
1522 scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1524 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1527 for (c = 0; c < num_gain; c++) {
1531 float gain_cache = 1.;
1533 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1534 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1535 gain_cache = pow(scale, -gain);
1537 if (coup->coupling_point == AFTER_IMDCT) {
1538 coup->gain[c][0] = gain_cache;
1540 for (g = 0; g < sce->ics.num_window_groups; g++) {
1541 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1542 if (sce->band_type[idx] != ZERO_BT) {
1544 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1552 gain_cache = pow(scale, -t) * s;
1555 coup->gain[c][idx] = gain_cache;
1565 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1567 * @return Returns number of bytes consumed.
1569 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1573 int num_excl_chan = 0;
1576 for (i = 0; i < 7; i++)
1577 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1578 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1580 return num_excl_chan / 7;
1584 * Decode dynamic range information; reference: table 4.52.
1586 * @param cnt length of TYPE_FIL syntactic element in bytes
1588 * @return Returns number of bytes consumed.
1590 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1591 GetBitContext *gb, int cnt)
1594 int drc_num_bands = 1;
1597 /* pce_tag_present? */
1598 if (get_bits1(gb)) {
1599 che_drc->pce_instance_tag = get_bits(gb, 4);
1600 skip_bits(gb, 4); // tag_reserved_bits
1604 /* excluded_chns_present? */
1605 if (get_bits1(gb)) {
1606 n += decode_drc_channel_exclusions(che_drc, gb);
1609 /* drc_bands_present? */
1610 if (get_bits1(gb)) {
1611 che_drc->band_incr = get_bits(gb, 4);
1612 che_drc->interpolation_scheme = get_bits(gb, 4);
1614 drc_num_bands += che_drc->band_incr;
1615 for (i = 0; i < drc_num_bands; i++) {
1616 che_drc->band_top[i] = get_bits(gb, 8);
1621 /* prog_ref_level_present? */
1622 if (get_bits1(gb)) {
1623 che_drc->prog_ref_level = get_bits(gb, 7);
1624 skip_bits1(gb); // prog_ref_level_reserved_bits
1628 for (i = 0; i < drc_num_bands; i++) {
1629 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1630 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1638 * Decode extension data (incomplete); reference: table 4.51.
1640 * @param cnt length of TYPE_FIL syntactic element in bytes
1642 * @return Returns number of bytes consumed
1644 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1645 ChannelElement *che, enum RawDataBlockType elem_type)
1649 switch (get_bits(gb, 4)) { // extension type
1650 case EXT_SBR_DATA_CRC:
1654 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1656 } else if (!ac->m4ac.sbr) {
1657 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1658 skip_bits_long(gb, 8 * cnt - 4);
1660 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1661 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1662 skip_bits_long(gb, 8 * cnt - 4);
1667 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1669 case EXT_DYNAMIC_RANGE:
1670 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1674 case EXT_DATA_ELEMENT:
1676 skip_bits_long(gb, 8 * cnt - 4);
1683 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1685 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1686 * @param coef spectral coefficients
1688 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1689 IndividualChannelStream *ics, int decode)
1691 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1693 int bottom, top, order, start, end, size, inc;
1694 float lpc[TNS_MAX_ORDER];
1696 for (w = 0; w < ics->num_windows; w++) {
1697 bottom = ics->num_swb;
1698 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1700 bottom = FFMAX(0, top - tns->length[w][filt]);
1701 order = tns->order[w][filt];
1706 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1708 start = ics->swb_offset[FFMIN(bottom, mmm)];
1709 end = ics->swb_offset[FFMIN( top, mmm)];
1710 if ((size = end - start) <= 0)
1712 if (tns->direction[w][filt]) {
1721 for (m = 0; m < size; m++, start += inc)
1722 for (i = 1; i <= FFMIN(m, order); i++)
1723 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1729 * Conduct IMDCT and windowing.
1731 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1733 IndividualChannelStream *ics = &sce->ics;
1734 float *in = sce->coeffs;
1735 float *out = sce->ret;
1736 float *saved = sce->saved;
1737 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1738 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1739 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1740 float *buf = ac->buf_mdct;
1741 float *temp = ac->temp;
1745 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1746 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1747 av_log(ac->avctx, AV_LOG_WARNING,
1748 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1749 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1750 for (i = 0; i < 1024; i += 128)
1751 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1753 ff_imdct_half(&ac->mdct, buf, in);
1755 /* window overlapping
1756 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1757 * and long to short transitions are considered to be short to short
1758 * transitions. This leaves just two cases (long to long and short to short)
1759 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1761 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1762 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1763 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512);
1765 for (i = 0; i < 448; i++)
1766 out[i] = saved[i] + bias;
1768 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1769 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64);
1770 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64);
1771 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64);
1772 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64);
1773 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64);
1774 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1776 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64);
1777 for (i = 576; i < 1024; i++)
1778 out[i] = buf[i-512] + bias;
1783 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1784 for (i = 0; i < 64; i++)
1785 saved[i] = temp[64 + i] - bias;
1786 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1787 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1788 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1789 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1790 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1791 memcpy( saved, buf + 512, 448 * sizeof(float));
1792 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1793 } else { // LONG_STOP or ONLY_LONG
1794 memcpy( saved, buf + 512, 512 * sizeof(float));
1799 * Apply dependent channel coupling (applied before IMDCT).
1801 * @param index index into coupling gain array
1803 static void apply_dependent_coupling(AACContext *ac,
1804 SingleChannelElement *target,
1805 ChannelElement *cce, int index)
1807 IndividualChannelStream *ics = &cce->ch[0].ics;
1808 const uint16_t *offsets = ics->swb_offset;
1809 float *dest = target->coeffs;
1810 const float *src = cce->ch[0].coeffs;
1811 int g, i, group, k, idx = 0;
1812 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1813 av_log(ac->avctx, AV_LOG_ERROR,
1814 "Dependent coupling is not supported together with LTP\n");
1817 for (g = 0; g < ics->num_window_groups; g++) {
1818 for (i = 0; i < ics->max_sfb; i++, idx++) {
1819 if (cce->ch[0].band_type[idx] != ZERO_BT) {
1820 const float gain = cce->coup.gain[index][idx];
1821 for (group = 0; group < ics->group_len[g]; group++) {
1822 for (k = offsets[i]; k < offsets[i + 1]; k++) {
1824 dest[group * 128 + k] += gain * src[group * 128 + k];
1829 dest += ics->group_len[g] * 128;
1830 src += ics->group_len[g] * 128;
1835 * Apply independent channel coupling (applied after IMDCT).
1837 * @param index index into coupling gain array
1839 static void apply_independent_coupling(AACContext *ac,
1840 SingleChannelElement *target,
1841 ChannelElement *cce, int index)
1844 const float gain = cce->coup.gain[index][0];
1845 const float bias = ac->add_bias;
1846 const float *src = cce->ch[0].ret;
1847 float *dest = target->ret;
1848 const int len = 1024 << (ac->m4ac.sbr == 1);
1850 for (i = 0; i < len; i++)
1851 dest[i] += gain * (src[i] - bias);
1855 * channel coupling transformation interface
1857 * @param index index into coupling gain array
1858 * @param apply_coupling_method pointer to (in)dependent coupling function
1860 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1861 enum RawDataBlockType type, int elem_id,
1862 enum CouplingPoint coupling_point,
1863 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1867 for (i = 0; i < MAX_ELEM_ID; i++) {
1868 ChannelElement *cce = ac->che[TYPE_CCE][i];
1871 if (cce && cce->coup.coupling_point == coupling_point) {
1872 ChannelCoupling *coup = &cce->coup;
1874 for (c = 0; c <= coup->num_coupled; c++) {
1875 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1876 if (coup->ch_select[c] != 1) {
1877 apply_coupling_method(ac, &cc->ch[0], cce, index);
1878 if (coup->ch_select[c] != 0)
1881 if (coup->ch_select[c] != 2)
1882 apply_coupling_method(ac, &cc->ch[1], cce, index++);
1884 index += 1 + (coup->ch_select[c] == 3);
1891 * Convert spectral data to float samples, applying all supported tools as appropriate.
1893 static void spectral_to_sample(AACContext *ac)
1896 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1897 for (type = 3; type >= 0; type--) {
1898 for (i = 0; i < MAX_ELEM_ID; i++) {
1899 ChannelElement *che = ac->che[type][i];
1901 if (type <= TYPE_CPE)
1902 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1903 if (che->ch[0].tns.present)
1904 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1905 if (che->ch[1].tns.present)
1906 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1907 if (type <= TYPE_CPE)
1908 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1909 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1910 imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1911 if (type == TYPE_CPE) {
1912 imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1914 if (ac->m4ac.sbr > 0) {
1915 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1918 if (type <= TYPE_CCE)
1919 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1925 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1928 AACADTSHeaderInfo hdr_info;
1930 size = ff_aac_parse_header(gb, &hdr_info);
1932 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1933 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1934 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1935 ac->m4ac.chan_config = hdr_info.chan_config;
1936 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1938 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1940 } else if (ac->output_configured != OC_LOCKED) {
1941 ac->output_configured = OC_NONE;
1943 if (ac->output_configured != OC_LOCKED)
1945 ac->m4ac.sample_rate = hdr_info.sample_rate;
1946 ac->m4ac.sampling_index = hdr_info.sampling_index;
1947 ac->m4ac.object_type = hdr_info.object_type;
1948 if (!ac->avctx->sample_rate)
1949 ac->avctx->sample_rate = hdr_info.sample_rate;
1950 if (hdr_info.num_aac_frames == 1) {
1951 if (!hdr_info.crc_absent)
1954 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1961 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1962 int *data_size, AVPacket *avpkt)
1964 const uint8_t *buf = avpkt->data;
1965 int buf_size = avpkt->size;
1966 AACContext *ac = avctx->priv_data;
1967 ChannelElement *che = NULL, *che_prev = NULL;
1969 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1970 int err, elem_id, data_size_tmp;
1972 int samples = 1024, multiplier;
1975 init_get_bits(&gb, buf, buf_size * 8);
1977 if (show_bits(&gb, 12) == 0xfff) {
1978 if (parse_adts_frame_header(ac, &gb) < 0) {
1979 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1982 if (ac->m4ac.sampling_index > 12) {
1983 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1988 memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1990 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1991 elem_id = get_bits(&gb, 4);
1993 if (elem_type < TYPE_DSE && !(che=get_che(ac, elem_type, elem_id))) {
1994 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id);
1998 switch (elem_type) {
2001 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2005 err = decode_cpe(ac, &gb, che);
2009 err = decode_cce(ac, &gb, che);
2013 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2017 err = skip_data_stream_element(ac, &gb);
2021 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2022 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2023 if ((err = decode_pce(ac, new_che_pos, &gb)))
2025 if (ac->output_configured > OC_TRIAL_PCE)
2026 av_log(avctx, AV_LOG_ERROR,
2027 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2029 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2035 elem_id += get_bits(&gb, 8) - 1;
2036 if (get_bits_left(&gb) < 8 * elem_id) {
2037 av_log(avctx, AV_LOG_ERROR, overread_err);
2041 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2042 err = 0; /* FIXME */
2046 err = -1; /* should not happen, but keeps compiler happy */
2051 elem_type_prev = elem_type;
2056 if (get_bits_left(&gb) < 3) {
2057 av_log(avctx, AV_LOG_ERROR, overread_err);
2062 spectral_to_sample(ac);
2064 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2065 samples <<= multiplier;
2066 if (ac->output_configured < OC_LOCKED) {
2067 avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2068 avctx->frame_size = samples;
2071 data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2072 if (*data_size < data_size_tmp) {
2073 av_log(avctx, AV_LOG_ERROR,
2074 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2075 *data_size, data_size_tmp);
2078 *data_size = data_size_tmp;
2080 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2082 if (ac->output_configured)
2083 ac->output_configured = OC_LOCKED;
2085 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2086 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2087 if (buf[buf_offset])
2090 return buf_size > buf_offset ? buf_consumed : buf_size;
2093 static av_cold int aac_decode_close(AVCodecContext *avctx)
2095 AACContext *ac = avctx->priv_data;
2098 for (i = 0; i < MAX_ELEM_ID; i++) {
2099 for (type = 0; type < 4; type++) {
2100 if (ac->che[type][i])
2101 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2102 av_freep(&ac->che[type][i]);
2106 ff_mdct_end(&ac->mdct);
2107 ff_mdct_end(&ac->mdct_small);
2111 AVCodec aac_decoder = {
2120 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2121 .sample_fmts = (const enum SampleFormat[]) {
2122 SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2124 .channel_layouts = aac_channel_layout,