3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
34 * N (code in SoC repo) gain control
36 * Y window shapes - standard
37 * N window shapes - Low Delay
38 * Y filterbank - standard
39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction
44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution
47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch
50 * Y quantization & coding - AAC
51 * N quantization & coding - TwinVQ
52 * N quantization & coding - BSAC
53 * N AAC Error Resilience tools
54 * N Error Resilience payload syntax
55 * N Error Protection tool
57 * N Silence Compression
60 * N Structured Audio tools
61 * N Structured Audio Sample Bank Format
63 * N Harmonic and Individual Lines plus Noise
64 * N Text-To-Speech Interface
65 * Y Spectral Band Replication
66 * Y (not in this code) Layer-1
67 * Y (not in this code) Layer-2
68 * Y (not in this code) Layer-3
69 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
70 * N (planned) Parametric Stereo
71 * N Direct Stream Transfer
73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
88 #include "aacdectab.h"
89 #include "cbrt_tablegen.h"
92 #include "mpeg4audio.h"
93 #include "aac_parser.h"
101 # include "arm/aac.h"
109 static VLC vlc_scalefactors;
110 static VLC vlc_spectral[11];
112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
116 /* Some buggy encoders appear to set all elem_ids to zero and rely on
117 channels always occurring in the same order. This is expressly forbidden
118 by the spec but we will try to work around it.
121 while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
122 if (ac->output_configured < OC_LOCKED && !err_printed) {
123 av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
128 if (elem_id == MAX_ELEM_ID)
130 ac->tags_seen_this_frame[type][elem_id] = 1;
132 if (ac->tag_che_map[type][elem_id]) {
133 return ac->tag_che_map[type][elem_id];
135 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
138 switch (ac->m4ac.chan_config) {
140 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
142 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
145 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
147 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
150 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
153 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
155 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
158 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
160 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
164 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
166 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
167 } else if (ac->m4ac.chan_config == 2) {
171 if (!ac->tags_mapped && type == TYPE_SCE) {
173 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
181 * Check for the channel element in the current channel position configuration.
182 * If it exists, make sure the appropriate element is allocated and map the
183 * channel order to match the internal FFmpeg channel layout.
185 * @param che_pos current channel position configuration
186 * @param type channel element type
187 * @param id channel element id
188 * @param channels count of the number of channels in the configuration
190 * @return Returns error status. 0 - OK, !0 - error
192 static av_cold int che_configure(AACContext *ac,
193 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
197 if (che_pos[type][id]) {
198 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
199 return AVERROR(ENOMEM);
200 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 if (type != TYPE_CCE) {
202 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 if (type == TYPE_CPE) {
204 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
208 if (ac->che[type][id])
209 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
210 av_freep(&ac->che[type][id]);
216 * Configure output channel order based on the current program configuration element.
218 * @param che_pos current channel position configuration
219 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
221 * @return Returns error status. 0 - OK, !0 - error
223 static av_cold int output_configure(AACContext *ac,
224 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
225 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
226 int channel_config, enum OCStatus oc_type)
228 AVCodecContext *avctx = ac->avctx;
229 int i, type, channels = 0, ret;
231 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
233 if (channel_config) {
234 for (i = 0; i < tags_per_config[channel_config]; i++) {
235 if ((ret = che_configure(ac, che_pos,
236 aac_channel_layout_map[channel_config - 1][i][0],
237 aac_channel_layout_map[channel_config - 1][i][1],
242 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
245 avctx->channel_layout = aac_channel_layout[channel_config - 1];
247 /* Allocate or free elements depending on if they are in the
248 * current program configuration.
250 * Set up default 1:1 output mapping.
252 * For a 5.1 stream the output order will be:
253 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
256 for (i = 0; i < MAX_ELEM_ID; i++) {
257 for (type = 0; type < 4; type++) {
258 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
263 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
264 ac->tags_mapped = 4 * MAX_ELEM_ID;
266 avctx->channel_layout = 0;
269 avctx->channels = channels;
271 ac->output_configured = oc_type;
277 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
279 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
280 * @param sce_map mono (Single Channel Element) map
281 * @param type speaker type/position for these channels
283 static void decode_channel_map(enum ChannelPosition *cpe_map,
284 enum ChannelPosition *sce_map,
285 enum ChannelPosition type,
286 GetBitContext *gb, int n)
289 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
290 map[get_bits(gb, 4)] = type;
295 * Decode program configuration element; reference: table 4.2.
297 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
299 * @return Returns error status. 0 - OK, !0 - error
301 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
304 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
307 skip_bits(gb, 2); // object_type
309 sampling_index = get_bits(gb, 4);
310 if (ac->m4ac.sampling_index != sampling_index)
311 av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
313 num_front = get_bits(gb, 4);
314 num_side = get_bits(gb, 4);
315 num_back = get_bits(gb, 4);
316 num_lfe = get_bits(gb, 2);
317 num_assoc_data = get_bits(gb, 3);
318 num_cc = get_bits(gb, 4);
321 skip_bits(gb, 4); // mono_mixdown_tag
323 skip_bits(gb, 4); // stereo_mixdown_tag
326 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
328 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
329 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
330 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
331 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
333 skip_bits_long(gb, 4 * num_assoc_data);
335 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
339 /* comment field, first byte is length */
340 comment_len = get_bits(gb, 8) * 8;
341 if (get_bits_left(gb) < comment_len) {
342 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
345 skip_bits_long(gb, comment_len);
350 * Set up channel positions based on a default channel configuration
351 * as specified in table 1.17.
353 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
355 * @return Returns error status. 0 - OK, !0 - error
357 static av_cold int set_default_channel_config(AACContext *ac,
358 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
361 if (channel_config < 1 || channel_config > 7) {
362 av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
367 /* default channel configurations:
369 * 1ch : front center (mono)
370 * 2ch : L + R (stereo)
371 * 3ch : front center + L + R
372 * 4ch : front center + L + R + back center
373 * 5ch : front center + L + R + back stereo
374 * 6ch : front center + L + R + back stereo + LFE
375 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
378 if (channel_config != 2)
379 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
380 if (channel_config > 1)
381 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
382 if (channel_config == 4)
383 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center
384 if (channel_config > 4)
385 new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
386 = AAC_CHANNEL_BACK; // back stereo
387 if (channel_config > 5)
388 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE
389 if (channel_config == 7)
390 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
396 * Decode GA "General Audio" specific configuration; reference: table 4.1.
398 * @return Returns error status. 0 - OK, !0 - error
400 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
403 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
404 int extension_flag, ret;
406 if (get_bits1(gb)) { // frameLengthFlag
407 av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
411 if (get_bits1(gb)) // dependsOnCoreCoder
412 skip_bits(gb, 14); // coreCoderDelay
413 extension_flag = get_bits1(gb);
415 if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
416 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
417 skip_bits(gb, 3); // layerNr
419 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
420 if (channel_config == 0) {
421 skip_bits(gb, 4); // element_instance_tag
422 if ((ret = decode_pce(ac, new_che_pos, gb)))
425 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
428 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
431 if (extension_flag) {
432 switch (ac->m4ac.object_type) {
434 skip_bits(gb, 5); // numOfSubFrame
435 skip_bits(gb, 11); // layer_length
439 case AOT_ER_AAC_SCALABLE:
441 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
442 * aacScalefactorDataResilienceFlag
443 * aacSpectralDataResilienceFlag
447 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
453 * Decode audio specific configuration; reference: table 1.13.
455 * @param data pointer to AVCodecContext extradata
456 * @param data_size size of AVCCodecContext extradata
458 * @return Returns error status. 0 - OK, !0 - error
460 static int decode_audio_specific_config(AACContext *ac, void *data,
466 init_get_bits(&gb, data, data_size * 8);
468 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
470 if (ac->m4ac.sampling_index > 12) {
471 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
475 skip_bits_long(&gb, i);
477 switch (ac->m4ac.object_type) {
480 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
484 av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
485 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
492 * linear congruential pseudorandom number generator
494 * @param previous_val pointer to the current state of the generator
496 * @return Returns a 32-bit pseudorandom integer
498 static av_always_inline int lcg_random(int previous_val)
500 return previous_val * 1664525 + 1013904223;
503 static av_always_inline void reset_predict_state(PredictorState *ps)
513 static void reset_all_predictors(PredictorState *ps)
516 for (i = 0; i < MAX_PREDICTORS; i++)
517 reset_predict_state(&ps[i]);
520 static void reset_predictor_group(PredictorState *ps, int group_num)
523 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
524 reset_predict_state(&ps[i]);
527 #define AAC_INIT_VLC_STATIC(num, size) \
528 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
529 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
530 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
533 static av_cold int aac_decode_init(AVCodecContext *avctx)
535 AACContext *ac = avctx->priv_data;
539 ac->m4ac.sample_rate = avctx->sample_rate;
541 if (avctx->extradata_size > 0) {
542 if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
546 avctx->sample_fmt = SAMPLE_FMT_S16;
548 AAC_INIT_VLC_STATIC( 0, 304);
549 AAC_INIT_VLC_STATIC( 1, 270);
550 AAC_INIT_VLC_STATIC( 2, 550);
551 AAC_INIT_VLC_STATIC( 3, 300);
552 AAC_INIT_VLC_STATIC( 4, 328);
553 AAC_INIT_VLC_STATIC( 5, 294);
554 AAC_INIT_VLC_STATIC( 6, 306);
555 AAC_INIT_VLC_STATIC( 7, 268);
556 AAC_INIT_VLC_STATIC( 8, 510);
557 AAC_INIT_VLC_STATIC( 9, 366);
558 AAC_INIT_VLC_STATIC(10, 462);
562 dsputil_init(&ac->dsp, avctx);
564 ac->random_state = 0x1f2e3d4c;
566 // -1024 - Compensate wrong IMDCT method.
567 // 32768 - Required to scale values to the correct range for the bias method
568 // for float to int16 conversion.
570 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
571 ac->add_bias = 385.0f;
572 ac->sf_scale = 1. / (-1024. * 32768.);
576 ac->sf_scale = 1. / -1024.;
580 #if !CONFIG_HARDCODED_TABLES
581 for (i = 0; i < 428; i++)
582 ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.);
583 #endif /* CONFIG_HARDCODED_TABLES */
585 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
586 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
587 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
590 ff_mdct_init(&ac->mdct, 11, 1, 1.0);
591 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
592 // window initialization
593 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
594 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
595 ff_init_ff_sine_windows(10);
596 ff_init_ff_sine_windows( 7);
604 * Skip data_stream_element; reference: table 4.10.
606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
608 int byte_align = get_bits1(gb);
609 int count = get_bits(gb, 8);
611 count += get_bits(gb, 8);
615 if (get_bits_left(gb) < 8 * count) {
616 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
619 skip_bits_long(gb, 8 * count);
623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
628 ics->predictor_reset_group = get_bits(gb, 5);
629 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
630 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
634 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
635 ics->prediction_used[sfb] = get_bits1(gb);
641 * Decode Individual Channel Stream info; reference: table 4.6.
643 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
646 GetBitContext *gb, int common_window)
649 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
650 memset(ics, 0, sizeof(IndividualChannelStream));
653 ics->window_sequence[1] = ics->window_sequence[0];
654 ics->window_sequence[0] = get_bits(gb, 2);
655 ics->use_kb_window[1] = ics->use_kb_window[0];
656 ics->use_kb_window[0] = get_bits1(gb);
657 ics->num_window_groups = 1;
658 ics->group_len[0] = 1;
659 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
661 ics->max_sfb = get_bits(gb, 4);
662 for (i = 0; i < 7; i++) {
664 ics->group_len[ics->num_window_groups - 1]++;
666 ics->num_window_groups++;
667 ics->group_len[ics->num_window_groups - 1] = 1;
670 ics->num_windows = 8;
671 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index];
672 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
673 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index];
674 ics->predictor_present = 0;
676 ics->max_sfb = get_bits(gb, 6);
677 ics->num_windows = 1;
678 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index];
679 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
680 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
681 ics->predictor_present = get_bits1(gb);
682 ics->predictor_reset_group = 0;
683 if (ics->predictor_present) {
684 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
685 if (decode_prediction(ac, ics, gb)) {
686 memset(ics, 0, sizeof(IndividualChannelStream));
689 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
690 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
691 memset(ics, 0, sizeof(IndividualChannelStream));
694 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
695 memset(ics, 0, sizeof(IndividualChannelStream));
701 if (ics->max_sfb > ics->num_swb) {
702 av_log(ac->avctx, AV_LOG_ERROR,
703 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
704 ics->max_sfb, ics->num_swb);
705 memset(ics, 0, sizeof(IndividualChannelStream));
713 * Decode band types (section_data payload); reference: table 4.46.
715 * @param band_type array of the used band type
716 * @param band_type_run_end array of the last scalefactor band of a band type run
718 * @return Returns error status. 0 - OK, !0 - error
720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
721 int band_type_run_end[120], GetBitContext *gb,
722 IndividualChannelStream *ics)
725 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
726 for (g = 0; g < ics->num_window_groups; g++) {
728 while (k < ics->max_sfb) {
729 uint8_t sect_end = k;
731 int sect_band_type = get_bits(gb, 4);
732 if (sect_band_type == 12) {
733 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
736 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
737 sect_end += sect_len_incr;
738 sect_end += sect_len_incr;
739 if (get_bits_left(gb) < 0) {
740 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
743 if (sect_end > ics->max_sfb) {
744 av_log(ac->avctx, AV_LOG_ERROR,
745 "Number of bands (%d) exceeds limit (%d).\n",
746 sect_end, ics->max_sfb);
749 for (; k < sect_end; k++) {
750 band_type [idx] = sect_band_type;
751 band_type_run_end[idx++] = sect_end;
759 * Decode scalefactors; reference: table 4.47.
761 * @param global_gain first scalefactor value as scalefactors are differentially coded
762 * @param band_type array of the used band type
763 * @param band_type_run_end array of the last scalefactor band of a band type run
764 * @param sf array of scalefactors or intensity stereo positions
766 * @return Returns error status. 0 - OK, !0 - error
768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
769 unsigned int global_gain,
770 IndividualChannelStream *ics,
771 enum BandType band_type[120],
772 int band_type_run_end[120])
774 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
776 int offset[3] = { global_gain, global_gain - 90, 100 };
778 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
779 for (g = 0; g < ics->num_window_groups; g++) {
780 for (i = 0; i < ics->max_sfb;) {
781 int run_end = band_type_run_end[idx];
782 if (band_type[idx] == ZERO_BT) {
783 for (; i < run_end; i++, idx++)
785 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
786 for (; i < run_end; i++, idx++) {
787 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
788 if (offset[2] > 255U) {
789 av_log(ac->avctx, AV_LOG_ERROR,
790 "%s (%d) out of range.\n", sf_str[2], offset[2]);
793 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
795 } else if (band_type[idx] == NOISE_BT) {
796 for (; i < run_end; i++, idx++) {
797 if (noise_flag-- > 0)
798 offset[1] += get_bits(gb, 9) - 256;
800 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
801 if (offset[1] > 255U) {
802 av_log(ac->avctx, AV_LOG_ERROR,
803 "%s (%d) out of range.\n", sf_str[1], offset[1]);
806 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
809 for (; i < run_end; i++, idx++) {
810 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
811 if (offset[0] > 255U) {
812 av_log(ac->avctx, AV_LOG_ERROR,
813 "%s (%d) out of range.\n", sf_str[0], offset[0]);
816 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
825 * Decode pulse data; reference: table 4.7.
827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
828 const uint16_t *swb_offset, int num_swb)
831 pulse->num_pulse = get_bits(gb, 2) + 1;
832 pulse_swb = get_bits(gb, 6);
833 if (pulse_swb >= num_swb)
835 pulse->pos[0] = swb_offset[pulse_swb];
836 pulse->pos[0] += get_bits(gb, 5);
837 if (pulse->pos[0] > 1023)
839 pulse->amp[0] = get_bits(gb, 4);
840 for (i = 1; i < pulse->num_pulse; i++) {
841 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
842 if (pulse->pos[i] > 1023)
844 pulse->amp[i] = get_bits(gb, 4);
850 * Decode Temporal Noise Shaping data; reference: table 4.48.
852 * @return Returns error status. 0 - OK, !0 - error
854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
855 GetBitContext *gb, const IndividualChannelStream *ics)
857 int w, filt, i, coef_len, coef_res, coef_compress;
858 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
859 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
860 for (w = 0; w < ics->num_windows; w++) {
861 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
862 coef_res = get_bits1(gb);
864 for (filt = 0; filt < tns->n_filt[w]; filt++) {
866 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
868 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
869 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
870 tns->order[w][filt], tns_max_order);
871 tns->order[w][filt] = 0;
874 if (tns->order[w][filt]) {
875 tns->direction[w][filt] = get_bits1(gb);
876 coef_compress = get_bits1(gb);
877 coef_len = coef_res + 3 - coef_compress;
878 tmp2_idx = 2 * coef_compress + coef_res;
880 for (i = 0; i < tns->order[w][filt]; i++)
881 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
890 * Decode Mid/Side data; reference: table 4.54.
892 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
893 * [1] mask is decoded from bitstream; [2] mask is all 1s;
894 * [3] reserved for scalable AAC
896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
900 if (ms_present == 1) {
901 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
902 cpe->ms_mask[idx] = get_bits1(gb);
903 } else if (ms_present == 2) {
904 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
913 *dst++ = v[idx & 15] * s;
914 *dst++ = v[idx>>4 & 15] * s;
920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
924 *dst++ = v[idx & 3] * s;
925 *dst++ = v[idx>>2 & 3] * s;
926 *dst++ = v[idx>>4 & 3] * s;
927 *dst++ = v[idx>>6 & 3] * s;
933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
934 unsigned sign, const float *scale)
936 union float754 s0, s1;
938 s0.f = s1.f = *scale;
939 s0.i ^= sign >> 1 << 31;
942 *dst++ = v[idx & 15] * s0.f;
943 *dst++ = v[idx>>4 & 15] * s1.f;
950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
951 unsigned sign, const float *scale)
953 unsigned nz = idx >> 12;
954 union float754 s = { .f = *scale };
957 t.i = s.i ^ (sign & 1<<31);
958 *dst++ = v[idx & 3] * t.f;
960 sign <<= nz & 1; nz >>= 1;
961 t.i = s.i ^ (sign & 1<<31);
962 *dst++ = v[idx>>2 & 3] * t.f;
964 sign <<= nz & 1; nz >>= 1;
965 t.i = s.i ^ (sign & 1<<31);
966 *dst++ = v[idx>>4 & 3] * t.f;
968 sign <<= nz & 1; nz >>= 1;
969 t.i = s.i ^ (sign & 1<<31);
970 *dst++ = v[idx>>6 & 3] * t.f;
977 * Decode spectral data; reference: table 4.50.
978 * Dequantize and scale spectral data; reference: 4.6.3.3.
980 * @param coef array of dequantized, scaled spectral data
981 * @param sf array of scalefactors or intensity stereo positions
982 * @param pulse_present set if pulses are present
983 * @param pulse pointer to pulse data struct
984 * @param band_type array of the used band type
986 * @return Returns error status. 0 - OK, !0 - error
988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
989 GetBitContext *gb, const float sf[120],
990 int pulse_present, const Pulse *pulse,
991 const IndividualChannelStream *ics,
992 enum BandType band_type[120])
994 int i, k, g, idx = 0;
995 const int c = 1024 / ics->num_windows;
996 const uint16_t *offsets = ics->swb_offset;
997 float *coef_base = coef;
1000 for (g = 0; g < ics->num_windows; g++)
1001 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1003 for (g = 0; g < ics->num_window_groups; g++) {
1004 unsigned g_len = ics->group_len[g];
1006 for (i = 0; i < ics->max_sfb; i++, idx++) {
1007 const unsigned cbt_m1 = band_type[idx] - 1;
1008 float *cfo = coef + offsets[i];
1009 int off_len = offsets[i + 1] - offsets[i];
1012 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1013 for (group = 0; group < g_len; group++, cfo+=128) {
1014 memset(cfo, 0, off_len * sizeof(float));
1016 } else if (cbt_m1 == NOISE_BT - 1) {
1017 for (group = 0; group < g_len; group++, cfo+=128) {
1021 for (k = 0; k < off_len; k++) {
1022 ac->random_state = lcg_random(ac->random_state);
1023 cfo[k] = ac->random_state;
1026 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1027 scale = sf[idx] / sqrtf(band_energy);
1028 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1031 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1032 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1033 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1034 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1035 OPEN_READER(re, gb);
1037 switch (cbt_m1 >> 1) {
1039 for (group = 0; group < g_len; group++, cfo+=128) {
1047 UPDATE_CACHE(re, gb);
1048 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1050 if (code >= cb_size) {
1052 goto err_cb_overflow;
1055 cb_idx = cb_vector_idx[code];
1056 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1062 for (group = 0; group < g_len; group++, cfo+=128) {
1072 UPDATE_CACHE(re, gb);
1073 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1075 if (code >= cb_size) {
1077 goto err_cb_overflow;
1080 #if MIN_CACHE_BITS < 20
1081 UPDATE_CACHE(re, gb);
1083 cb_idx = cb_vector_idx[code];
1084 nnz = cb_idx >> 8 & 15;
1085 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1086 LAST_SKIP_BITS(re, gb, nnz);
1087 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1093 for (group = 0; group < g_len; group++, cfo+=128) {
1101 UPDATE_CACHE(re, gb);
1102 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1104 if (code >= cb_size) {
1106 goto err_cb_overflow;
1109 cb_idx = cb_vector_idx[code];
1110 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1117 for (group = 0; group < g_len; group++, cfo+=128) {
1127 UPDATE_CACHE(re, gb);
1128 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1130 if (code >= cb_size) {
1132 goto err_cb_overflow;
1135 cb_idx = cb_vector_idx[code];
1136 nnz = cb_idx >> 8 & 15;
1137 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1138 LAST_SKIP_BITS(re, gb, nnz);
1139 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1145 for (group = 0; group < g_len; group++, cfo+=128) {
1147 uint32_t *icf = (uint32_t *) cf;
1157 UPDATE_CACHE(re, gb);
1158 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1166 if (code >= cb_size) {
1168 goto err_cb_overflow;
1171 cb_idx = cb_vector_idx[code];
1174 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1175 LAST_SKIP_BITS(re, gb, nnz);
1177 for (j = 0; j < 2; j++) {
1181 /* The total length of escape_sequence must be < 22 bits according
1182 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1183 UPDATE_CACHE(re, gb);
1184 b = GET_CACHE(re, gb);
1185 b = 31 - av_log2(~b);
1188 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1192 #if MIN_CACHE_BITS < 21
1193 LAST_SKIP_BITS(re, gb, b + 1);
1194 UPDATE_CACHE(re, gb);
1196 SKIP_BITS(re, gb, b + 1);
1199 n = (1 << b) + SHOW_UBITS(re, gb, b);
1200 LAST_SKIP_BITS(re, gb, b);
1201 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1204 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1205 *icf++ = (bits & 1<<31) | v;
1212 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1216 CLOSE_READER(re, gb);
1222 if (pulse_present) {
1224 for (i = 0; i < pulse->num_pulse; i++) {
1225 float co = coef_base[ pulse->pos[i] ];
1226 while (offsets[idx + 1] <= pulse->pos[i])
1228 if (band_type[idx] != NOISE_BT && sf[idx]) {
1229 float ico = -pulse->amp[i];
1232 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1234 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1241 av_log(ac->avctx, AV_LOG_ERROR,
1242 "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1243 band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1247 static av_always_inline float flt16_round(float pf)
1251 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1255 static av_always_inline float flt16_even(float pf)
1259 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1263 static av_always_inline float flt16_trunc(float pf)
1267 pun.i &= 0xFFFF0000U;
1271 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1274 const float a = 0.953125; // 61.0 / 64
1275 const float alpha = 0.90625; // 29.0 / 32
1280 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1281 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1283 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1285 *coef += pv * ac->sf_scale;
1287 e0 = *coef / ac->sf_scale;
1288 e1 = e0 - k1 * ps->r0;
1290 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1291 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1292 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1293 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1295 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1296 ps->r0 = flt16_trunc(a * e0);
1300 * Apply AAC-Main style frequency domain prediction.
1302 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1306 if (!sce->ics.predictor_initialized) {
1307 reset_all_predictors(sce->predictor_state);
1308 sce->ics.predictor_initialized = 1;
1311 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1312 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1313 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1314 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1315 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1318 if (sce->ics.predictor_reset_group)
1319 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1321 reset_all_predictors(sce->predictor_state);
1325 * Decode an individual_channel_stream payload; reference: table 4.44.
1327 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1328 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1330 * @return Returns error status. 0 - OK, !0 - error
1332 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1333 GetBitContext *gb, int common_window, int scale_flag)
1336 TemporalNoiseShaping *tns = &sce->tns;
1337 IndividualChannelStream *ics = &sce->ics;
1338 float *out = sce->coeffs;
1339 int global_gain, pulse_present = 0;
1341 /* This assignment is to silence a GCC warning about the variable being used
1342 * uninitialized when in fact it always is.
1344 pulse.num_pulse = 0;
1346 global_gain = get_bits(gb, 8);
1348 if (!common_window && !scale_flag) {
1349 if (decode_ics_info(ac, ics, gb, 0) < 0)
1353 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1355 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1360 if ((pulse_present = get_bits1(gb))) {
1361 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1362 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1365 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1366 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1370 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1372 if (get_bits1(gb)) {
1373 av_log_missing_feature(ac->avctx, "SSR", 1);
1378 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1381 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1382 apply_prediction(ac, sce);
1388 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1390 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1392 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1393 float *ch0 = cpe->ch[0].coeffs;
1394 float *ch1 = cpe->ch[1].coeffs;
1395 int g, i, group, idx = 0;
1396 const uint16_t *offsets = ics->swb_offset;
1397 for (g = 0; g < ics->num_window_groups; g++) {
1398 for (i = 0; i < ics->max_sfb; i++, idx++) {
1399 if (cpe->ms_mask[idx] &&
1400 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1401 for (group = 0; group < ics->group_len[g]; group++) {
1402 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1403 ch1 + group * 128 + offsets[i],
1404 offsets[i+1] - offsets[i]);
1408 ch0 += ics->group_len[g] * 128;
1409 ch1 += ics->group_len[g] * 128;
1414 * intensity stereo decoding; reference: 4.6.8.2.3
1416 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1417 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1418 * [3] reserved for scalable AAC
1420 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1422 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1423 SingleChannelElement *sce1 = &cpe->ch[1];
1424 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1425 const uint16_t *offsets = ics->swb_offset;
1426 int g, group, i, k, idx = 0;
1429 for (g = 0; g < ics->num_window_groups; g++) {
1430 for (i = 0; i < ics->max_sfb;) {
1431 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1432 const int bt_run_end = sce1->band_type_run_end[idx];
1433 for (; i < bt_run_end; i++, idx++) {
1434 c = -1 + 2 * (sce1->band_type[idx] - 14);
1436 c *= 1 - 2 * cpe->ms_mask[idx];
1437 scale = c * sce1->sf[idx];
1438 for (group = 0; group < ics->group_len[g]; group++)
1439 for (k = offsets[i]; k < offsets[i + 1]; k++)
1440 coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1443 int bt_run_end = sce1->band_type_run_end[idx];
1444 idx += bt_run_end - i;
1448 coef0 += ics->group_len[g] * 128;
1449 coef1 += ics->group_len[g] * 128;
1454 * Decode a channel_pair_element; reference: table 4.4.
1456 * @param elem_id Identifies the instance of a syntax element.
1458 * @return Returns error status. 0 - OK, !0 - error
1460 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1462 int i, ret, common_window, ms_present = 0;
1464 common_window = get_bits1(gb);
1465 if (common_window) {
1466 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1468 i = cpe->ch[1].ics.use_kb_window[0];
1469 cpe->ch[1].ics = cpe->ch[0].ics;
1470 cpe->ch[1].ics.use_kb_window[1] = i;
1471 ms_present = get_bits(gb, 2);
1472 if (ms_present == 3) {
1473 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1475 } else if (ms_present)
1476 decode_mid_side_stereo(cpe, gb, ms_present);
1478 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1480 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1483 if (common_window) {
1485 apply_mid_side_stereo(ac, cpe);
1486 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1487 apply_prediction(ac, &cpe->ch[0]);
1488 apply_prediction(ac, &cpe->ch[1]);
1492 apply_intensity_stereo(cpe, ms_present);
1497 * Decode coupling_channel_element; reference: table 4.8.
1499 * @param elem_id Identifies the instance of a syntax element.
1501 * @return Returns error status. 0 - OK, !0 - error
1503 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1509 SingleChannelElement *sce = &che->ch[0];
1510 ChannelCoupling *coup = &che->coup;
1512 coup->coupling_point = 2 * get_bits1(gb);
1513 coup->num_coupled = get_bits(gb, 3);
1514 for (c = 0; c <= coup->num_coupled; c++) {
1516 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1517 coup->id_select[c] = get_bits(gb, 4);
1518 if (coup->type[c] == TYPE_CPE) {
1519 coup->ch_select[c] = get_bits(gb, 2);
1520 if (coup->ch_select[c] == 3)
1523 coup->ch_select[c] = 2;
1525 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1527 sign = get_bits(gb, 1);
1528 scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1530 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1533 for (c = 0; c < num_gain; c++) {
1537 float gain_cache = 1.;
1539 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1540 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1541 gain_cache = pow(scale, -gain);
1543 if (coup->coupling_point == AFTER_IMDCT) {
1544 coup->gain[c][0] = gain_cache;
1546 for (g = 0; g < sce->ics.num_window_groups; g++) {
1547 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1548 if (sce->band_type[idx] != ZERO_BT) {
1550 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1558 gain_cache = pow(scale, -t) * s;
1561 coup->gain[c][idx] = gain_cache;
1571 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1573 * @return Returns number of bytes consumed.
1575 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1579 int num_excl_chan = 0;
1582 for (i = 0; i < 7; i++)
1583 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1584 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1586 return num_excl_chan / 7;
1590 * Decode dynamic range information; reference: table 4.52.
1592 * @param cnt length of TYPE_FIL syntactic element in bytes
1594 * @return Returns number of bytes consumed.
1596 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1597 GetBitContext *gb, int cnt)
1600 int drc_num_bands = 1;
1603 /* pce_tag_present? */
1604 if (get_bits1(gb)) {
1605 che_drc->pce_instance_tag = get_bits(gb, 4);
1606 skip_bits(gb, 4); // tag_reserved_bits
1610 /* excluded_chns_present? */
1611 if (get_bits1(gb)) {
1612 n += decode_drc_channel_exclusions(che_drc, gb);
1615 /* drc_bands_present? */
1616 if (get_bits1(gb)) {
1617 che_drc->band_incr = get_bits(gb, 4);
1618 che_drc->interpolation_scheme = get_bits(gb, 4);
1620 drc_num_bands += che_drc->band_incr;
1621 for (i = 0; i < drc_num_bands; i++) {
1622 che_drc->band_top[i] = get_bits(gb, 8);
1627 /* prog_ref_level_present? */
1628 if (get_bits1(gb)) {
1629 che_drc->prog_ref_level = get_bits(gb, 7);
1630 skip_bits1(gb); // prog_ref_level_reserved_bits
1634 for (i = 0; i < drc_num_bands; i++) {
1635 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1636 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1644 * Decode extension data (incomplete); reference: table 4.51.
1646 * @param cnt length of TYPE_FIL syntactic element in bytes
1648 * @return Returns number of bytes consumed
1650 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1651 ChannelElement *che, enum RawDataBlockType elem_type)
1655 switch (get_bits(gb, 4)) { // extension type
1656 case EXT_SBR_DATA_CRC:
1660 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1662 } else if (!ac->m4ac.sbr) {
1663 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1664 skip_bits_long(gb, 8 * cnt - 4);
1666 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1667 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1668 skip_bits_long(gb, 8 * cnt - 4);
1673 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1675 case EXT_DYNAMIC_RANGE:
1676 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1680 case EXT_DATA_ELEMENT:
1682 skip_bits_long(gb, 8 * cnt - 4);
1689 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1691 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1692 * @param coef spectral coefficients
1694 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1695 IndividualChannelStream *ics, int decode)
1697 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1699 int bottom, top, order, start, end, size, inc;
1700 float lpc[TNS_MAX_ORDER];
1702 for (w = 0; w < ics->num_windows; w++) {
1703 bottom = ics->num_swb;
1704 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1706 bottom = FFMAX(0, top - tns->length[w][filt]);
1707 order = tns->order[w][filt];
1712 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1714 start = ics->swb_offset[FFMIN(bottom, mmm)];
1715 end = ics->swb_offset[FFMIN( top, mmm)];
1716 if ((size = end - start) <= 0)
1718 if (tns->direction[w][filt]) {
1727 for (m = 0; m < size; m++, start += inc)
1728 for (i = 1; i <= FFMIN(m, order); i++)
1729 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1735 * Conduct IMDCT and windowing.
1737 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1739 IndividualChannelStream *ics = &sce->ics;
1740 float *in = sce->coeffs;
1741 float *out = sce->ret;
1742 float *saved = sce->saved;
1743 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1744 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1745 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1746 float *buf = ac->buf_mdct;
1747 float *temp = ac->temp;
1751 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1752 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1753 av_log(ac->avctx, AV_LOG_WARNING,
1754 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1755 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1756 for (i = 0; i < 1024; i += 128)
1757 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1759 ff_imdct_half(&ac->mdct, buf, in);
1761 /* window overlapping
1762 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1763 * and long to short transitions are considered to be short to short
1764 * transitions. This leaves just two cases (long to long and short to short)
1765 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1767 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1768 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1769 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512);
1771 for (i = 0; i < 448; i++)
1772 out[i] = saved[i] + bias;
1774 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1775 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64);
1776 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64);
1777 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64);
1778 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64);
1779 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64);
1780 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1782 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64);
1783 for (i = 576; i < 1024; i++)
1784 out[i] = buf[i-512] + bias;
1789 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1790 for (i = 0; i < 64; i++)
1791 saved[i] = temp[64 + i] - bias;
1792 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1793 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1794 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1795 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1796 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1797 memcpy( saved, buf + 512, 448 * sizeof(float));
1798 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1799 } else { // LONG_STOP or ONLY_LONG
1800 memcpy( saved, buf + 512, 512 * sizeof(float));
1805 * Apply dependent channel coupling (applied before IMDCT).
1807 * @param index index into coupling gain array
1809 static void apply_dependent_coupling(AACContext *ac,
1810 SingleChannelElement *target,
1811 ChannelElement *cce, int index)
1813 IndividualChannelStream *ics = &cce->ch[0].ics;
1814 const uint16_t *offsets = ics->swb_offset;
1815 float *dest = target->coeffs;
1816 const float *src = cce->ch[0].coeffs;
1817 int g, i, group, k, idx = 0;
1818 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1819 av_log(ac->avctx, AV_LOG_ERROR,
1820 "Dependent coupling is not supported together with LTP\n");
1823 for (g = 0; g < ics->num_window_groups; g++) {
1824 for (i = 0; i < ics->max_sfb; i++, idx++) {
1825 if (cce->ch[0].band_type[idx] != ZERO_BT) {
1826 const float gain = cce->coup.gain[index][idx];
1827 for (group = 0; group < ics->group_len[g]; group++) {
1828 for (k = offsets[i]; k < offsets[i + 1]; k++) {
1830 dest[group * 128 + k] += gain * src[group * 128 + k];
1835 dest += ics->group_len[g] * 128;
1836 src += ics->group_len[g] * 128;
1841 * Apply independent channel coupling (applied after IMDCT).
1843 * @param index index into coupling gain array
1845 static void apply_independent_coupling(AACContext *ac,
1846 SingleChannelElement *target,
1847 ChannelElement *cce, int index)
1850 const float gain = cce->coup.gain[index][0];
1851 const float bias = ac->add_bias;
1852 const float *src = cce->ch[0].ret;
1853 float *dest = target->ret;
1854 const int len = 1024 << (ac->m4ac.sbr == 1);
1856 for (i = 0; i < len; i++)
1857 dest[i] += gain * (src[i] - bias);
1861 * channel coupling transformation interface
1863 * @param index index into coupling gain array
1864 * @param apply_coupling_method pointer to (in)dependent coupling function
1866 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1867 enum RawDataBlockType type, int elem_id,
1868 enum CouplingPoint coupling_point,
1869 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1873 for (i = 0; i < MAX_ELEM_ID; i++) {
1874 ChannelElement *cce = ac->che[TYPE_CCE][i];
1877 if (cce && cce->coup.coupling_point == coupling_point) {
1878 ChannelCoupling *coup = &cce->coup;
1880 for (c = 0; c <= coup->num_coupled; c++) {
1881 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1882 if (coup->ch_select[c] != 1) {
1883 apply_coupling_method(ac, &cc->ch[0], cce, index);
1884 if (coup->ch_select[c] != 0)
1887 if (coup->ch_select[c] != 2)
1888 apply_coupling_method(ac, &cc->ch[1], cce, index++);
1890 index += 1 + (coup->ch_select[c] == 3);
1897 * Convert spectral data to float samples, applying all supported tools as appropriate.
1899 static void spectral_to_sample(AACContext *ac)
1902 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1903 for (type = 3; type >= 0; type--) {
1904 for (i = 0; i < MAX_ELEM_ID; i++) {
1905 ChannelElement *che = ac->che[type][i];
1907 if (type <= TYPE_CPE)
1908 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1909 if (che->ch[0].tns.present)
1910 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1911 if (che->ch[1].tns.present)
1912 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1913 if (type <= TYPE_CPE)
1914 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1915 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1916 imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1917 if (type == TYPE_CPE) {
1918 imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1920 if (ac->m4ac.sbr > 0) {
1921 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1924 if (type <= TYPE_CCE)
1925 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1931 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1934 AACADTSHeaderInfo hdr_info;
1936 size = ff_aac_parse_header(gb, &hdr_info);
1938 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1939 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1940 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1941 ac->m4ac.chan_config = hdr_info.chan_config;
1942 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1944 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1946 } else if (ac->output_configured != OC_LOCKED) {
1947 ac->output_configured = OC_NONE;
1949 if (ac->output_configured != OC_LOCKED)
1951 ac->m4ac.sample_rate = hdr_info.sample_rate;
1952 ac->m4ac.sampling_index = hdr_info.sampling_index;
1953 ac->m4ac.object_type = hdr_info.object_type;
1954 if (!ac->avctx->sample_rate)
1955 ac->avctx->sample_rate = hdr_info.sample_rate;
1956 if (hdr_info.num_aac_frames == 1) {
1957 if (!hdr_info.crc_absent)
1960 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1967 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1968 int *data_size, AVPacket *avpkt)
1970 const uint8_t *buf = avpkt->data;
1971 int buf_size = avpkt->size;
1972 AACContext *ac = avctx->priv_data;
1973 ChannelElement *che = NULL, *che_prev = NULL;
1975 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1976 int err, elem_id, data_size_tmp;
1978 int samples = 1024, multiplier;
1981 init_get_bits(&gb, buf, buf_size * 8);
1983 if (show_bits(&gb, 12) == 0xfff) {
1984 if (parse_adts_frame_header(ac, &gb) < 0) {
1985 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1988 if (ac->m4ac.sampling_index > 12) {
1989 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1994 memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1996 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1997 elem_id = get_bits(&gb, 4);
1999 if (elem_type < TYPE_DSE && !(che=get_che(ac, elem_type, elem_id))) {
2000 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id);
2004 switch (elem_type) {
2007 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2011 err = decode_cpe(ac, &gb, che);
2015 err = decode_cce(ac, &gb, che);
2019 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2023 err = skip_data_stream_element(ac, &gb);
2027 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2028 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2029 if ((err = decode_pce(ac, new_che_pos, &gb)))
2031 if (ac->output_configured > OC_TRIAL_PCE)
2032 av_log(avctx, AV_LOG_ERROR,
2033 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2035 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2041 elem_id += get_bits(&gb, 8) - 1;
2042 if (get_bits_left(&gb) < 8 * elem_id) {
2043 av_log(avctx, AV_LOG_ERROR, overread_err);
2047 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2048 err = 0; /* FIXME */
2052 err = -1; /* should not happen, but keeps compiler happy */
2057 elem_type_prev = elem_type;
2062 if (get_bits_left(&gb) < 3) {
2063 av_log(avctx, AV_LOG_ERROR, overread_err);
2068 spectral_to_sample(ac);
2070 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2071 samples <<= multiplier;
2072 if (ac->output_configured < OC_LOCKED) {
2073 avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2074 avctx->frame_size = samples;
2077 data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2078 if (*data_size < data_size_tmp) {
2079 av_log(avctx, AV_LOG_ERROR,
2080 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2081 *data_size, data_size_tmp);
2084 *data_size = data_size_tmp;
2086 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2088 if (ac->output_configured)
2089 ac->output_configured = OC_LOCKED;
2091 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2092 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2093 if (buf[buf_offset])
2096 return buf_size > buf_offset ? buf_consumed : buf_size;
2099 static av_cold int aac_decode_close(AVCodecContext *avctx)
2101 AACContext *ac = avctx->priv_data;
2104 for (i = 0; i < MAX_ELEM_ID; i++) {
2105 for (type = 0; type < 4; type++) {
2106 if (ac->che[type][i])
2107 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2108 av_freep(&ac->che[type][i]);
2112 ff_mdct_end(&ac->mdct);
2113 ff_mdct_end(&ac->mdct_small);
2117 AVCodec aac_decoder = {
2126 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2127 .sample_fmts = (const enum SampleFormat[]) {
2128 SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2130 .channel_layouts = aac_channel_layout,