3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
34 * N (code in SoC repo) gain control
36 * Y window shapes - standard
37 * N window shapes - Low Delay
38 * Y filterbank - standard
39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction
44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution
47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch
50 * Y quantization & coding - AAC
51 * N quantization & coding - TwinVQ
52 * N quantization & coding - BSAC
53 * N AAC Error Resilience tools
54 * N Error Resilience payload syntax
55 * N Error Protection tool
57 * N Silence Compression
60 * N Structured Audio tools
61 * N Structured Audio Sample Bank Format
63 * N Harmonic and Individual Lines plus Noise
64 * N Text-To-Speech Interface
65 * Y Spectral Band Replication
66 * Y (not in this code) Layer-1
67 * Y (not in this code) Layer-2
68 * Y (not in this code) Layer-3
69 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
71 * N Direct Stream Transfer
73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
88 #include "aacdectab.h"
89 #include "cbrt_tablegen.h"
92 #include "mpeg4audio.h"
93 #include "aac_parser.h"
101 # include "arm/aac.h"
109 static VLC vlc_scalefactors;
110 static VLC vlc_spectral[11];
112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
116 /* Some buggy encoders appear to set all elem_ids to zero and rely on
117 channels always occurring in the same order. This is expressly forbidden
118 by the spec but we will try to work around it.
121 while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
122 if (ac->output_configured < OC_LOCKED && !err_printed) {
123 av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
128 if (elem_id == MAX_ELEM_ID)
130 ac->tags_seen_this_frame[type][elem_id] = 1;
132 if (ac->tag_che_map[type][elem_id]) {
133 return ac->tag_che_map[type][elem_id];
135 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
138 switch (ac->m4ac.chan_config) {
140 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
142 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
145 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
147 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
150 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
153 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
155 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
158 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
160 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
164 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
166 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
167 } else if (ac->m4ac.chan_config == 2) {
171 if (!ac->tags_mapped && type == TYPE_SCE) {
173 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
181 * Check for the channel element in the current channel position configuration.
182 * If it exists, make sure the appropriate element is allocated and map the
183 * channel order to match the internal FFmpeg channel layout.
185 * @param che_pos current channel position configuration
186 * @param type channel element type
187 * @param id channel element id
188 * @param channels count of the number of channels in the configuration
190 * @return Returns error status. 0 - OK, !0 - error
192 static av_cold int che_configure(AACContext *ac,
193 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
197 if (che_pos[type][id]) {
198 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
199 return AVERROR(ENOMEM);
200 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 if (type != TYPE_CCE) {
202 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 if (type == TYPE_CPE ||
204 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
205 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
209 if (ac->che[type][id])
210 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
211 av_freep(&ac->che[type][id]);
217 * Configure output channel order based on the current program configuration element.
219 * @param che_pos current channel position configuration
220 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
222 * @return Returns error status. 0 - OK, !0 - error
224 static av_cold int output_configure(AACContext *ac,
225 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
226 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
227 int channel_config, enum OCStatus oc_type)
229 AVCodecContext *avctx = ac->avctx;
230 int i, type, channels = 0, ret;
232 if (new_che_pos != che_pos)
233 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
235 if (channel_config) {
236 for (i = 0; i < tags_per_config[channel_config]; i++) {
237 if ((ret = che_configure(ac, che_pos,
238 aac_channel_layout_map[channel_config - 1][i][0],
239 aac_channel_layout_map[channel_config - 1][i][1],
244 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
247 avctx->channel_layout = aac_channel_layout[channel_config - 1];
249 /* Allocate or free elements depending on if they are in the
250 * current program configuration.
252 * Set up default 1:1 output mapping.
254 * For a 5.1 stream the output order will be:
255 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
258 for (i = 0; i < MAX_ELEM_ID; i++) {
259 for (type = 0; type < 4; type++) {
260 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
265 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
266 ac->tags_mapped = 4 * MAX_ELEM_ID;
268 avctx->channel_layout = 0;
271 avctx->channels = channels;
273 ac->output_configured = oc_type;
279 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
281 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
282 * @param sce_map mono (Single Channel Element) map
283 * @param type speaker type/position for these channels
285 static void decode_channel_map(enum ChannelPosition *cpe_map,
286 enum ChannelPosition *sce_map,
287 enum ChannelPosition type,
288 GetBitContext *gb, int n)
291 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
292 map[get_bits(gb, 4)] = type;
297 * Decode program configuration element; reference: table 4.2.
299 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
301 * @return Returns error status. 0 - OK, !0 - error
303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
306 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
309 skip_bits(gb, 2); // object_type
311 sampling_index = get_bits(gb, 4);
312 if (ac->m4ac.sampling_index != sampling_index)
313 av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
315 num_front = get_bits(gb, 4);
316 num_side = get_bits(gb, 4);
317 num_back = get_bits(gb, 4);
318 num_lfe = get_bits(gb, 2);
319 num_assoc_data = get_bits(gb, 3);
320 num_cc = get_bits(gb, 4);
323 skip_bits(gb, 4); // mono_mixdown_tag
325 skip_bits(gb, 4); // stereo_mixdown_tag
328 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
330 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
331 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
332 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
333 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
335 skip_bits_long(gb, 4 * num_assoc_data);
337 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
341 /* comment field, first byte is length */
342 comment_len = get_bits(gb, 8) * 8;
343 if (get_bits_left(gb) < comment_len) {
344 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
347 skip_bits_long(gb, comment_len);
352 * Set up channel positions based on a default channel configuration
353 * as specified in table 1.17.
355 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
357 * @return Returns error status. 0 - OK, !0 - error
359 static av_cold int set_default_channel_config(AACContext *ac,
360 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
363 if (channel_config < 1 || channel_config > 7) {
364 av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
369 /* default channel configurations:
371 * 1ch : front center (mono)
372 * 2ch : L + R (stereo)
373 * 3ch : front center + L + R
374 * 4ch : front center + L + R + back center
375 * 5ch : front center + L + R + back stereo
376 * 6ch : front center + L + R + back stereo + LFE
377 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
380 if (channel_config != 2)
381 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
382 if (channel_config > 1)
383 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
384 if (channel_config == 4)
385 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center
386 if (channel_config > 4)
387 new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
388 = AAC_CHANNEL_BACK; // back stereo
389 if (channel_config > 5)
390 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE
391 if (channel_config == 7)
392 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
398 * Decode GA "General Audio" specific configuration; reference: table 4.1.
400 * @return Returns error status. 0 - OK, !0 - error
402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
405 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
406 int extension_flag, ret;
408 if (get_bits1(gb)) { // frameLengthFlag
409 av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
413 if (get_bits1(gb)) // dependsOnCoreCoder
414 skip_bits(gb, 14); // coreCoderDelay
415 extension_flag = get_bits1(gb);
417 if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
418 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
419 skip_bits(gb, 3); // layerNr
421 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
422 if (channel_config == 0) {
423 skip_bits(gb, 4); // element_instance_tag
424 if ((ret = decode_pce(ac, new_che_pos, gb)))
427 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
430 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
433 if (extension_flag) {
434 switch (ac->m4ac.object_type) {
436 skip_bits(gb, 5); // numOfSubFrame
437 skip_bits(gb, 11); // layer_length
441 case AOT_ER_AAC_SCALABLE:
443 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
444 * aacScalefactorDataResilienceFlag
445 * aacSpectralDataResilienceFlag
449 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
455 * Decode audio specific configuration; reference: table 1.13.
457 * @param data pointer to AVCodecContext extradata
458 * @param data_size size of AVCCodecContext extradata
460 * @return Returns error status. 0 - OK, !0 - error
462 static int decode_audio_specific_config(AACContext *ac, void *data,
468 init_get_bits(&gb, data, data_size * 8);
470 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
472 if (ac->m4ac.sampling_index > 12) {
473 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
476 if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
479 skip_bits_long(&gb, i);
481 switch (ac->m4ac.object_type) {
484 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
488 av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
489 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
496 * linear congruential pseudorandom number generator
498 * @param previous_val pointer to the current state of the generator
500 * @return Returns a 32-bit pseudorandom integer
502 static av_always_inline int lcg_random(int previous_val)
504 return previous_val * 1664525 + 1013904223;
507 static av_always_inline void reset_predict_state(PredictorState *ps)
517 static void reset_all_predictors(PredictorState *ps)
520 for (i = 0; i < MAX_PREDICTORS; i++)
521 reset_predict_state(&ps[i]);
524 static void reset_predictor_group(PredictorState *ps, int group_num)
527 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
528 reset_predict_state(&ps[i]);
531 #define AAC_INIT_VLC_STATIC(num, size) \
532 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
533 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
534 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
537 static av_cold int aac_decode_init(AVCodecContext *avctx)
539 AACContext *ac = avctx->priv_data;
543 ac->m4ac.sample_rate = avctx->sample_rate;
545 if (avctx->extradata_size > 0) {
546 if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
550 avctx->sample_fmt = SAMPLE_FMT_S16;
552 AAC_INIT_VLC_STATIC( 0, 304);
553 AAC_INIT_VLC_STATIC( 1, 270);
554 AAC_INIT_VLC_STATIC( 2, 550);
555 AAC_INIT_VLC_STATIC( 3, 300);
556 AAC_INIT_VLC_STATIC( 4, 328);
557 AAC_INIT_VLC_STATIC( 5, 294);
558 AAC_INIT_VLC_STATIC( 6, 306);
559 AAC_INIT_VLC_STATIC( 7, 268);
560 AAC_INIT_VLC_STATIC( 8, 510);
561 AAC_INIT_VLC_STATIC( 9, 366);
562 AAC_INIT_VLC_STATIC(10, 462);
566 dsputil_init(&ac->dsp, avctx);
568 ac->random_state = 0x1f2e3d4c;
570 // -1024 - Compensate wrong IMDCT method.
571 // 32768 - Required to scale values to the correct range for the bias method
572 // for float to int16 conversion.
574 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
575 ac->add_bias = 385.0f;
576 ac->sf_scale = 1. / (-1024. * 32768.);
580 ac->sf_scale = 1. / -1024.;
584 #if !CONFIG_HARDCODED_TABLES
585 for (i = 0; i < 428; i++)
586 ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.);
587 #endif /* CONFIG_HARDCODED_TABLES */
589 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
590 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
591 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
594 ff_mdct_init(&ac->mdct, 11, 1, 1.0);
595 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
596 // window initialization
597 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
598 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
599 ff_init_ff_sine_windows(10);
600 ff_init_ff_sine_windows( 7);
608 * Skip data_stream_element; reference: table 4.10.
610 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
612 int byte_align = get_bits1(gb);
613 int count = get_bits(gb, 8);
615 count += get_bits(gb, 8);
619 if (get_bits_left(gb) < 8 * count) {
620 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
623 skip_bits_long(gb, 8 * count);
627 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
632 ics->predictor_reset_group = get_bits(gb, 5);
633 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
634 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
638 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
639 ics->prediction_used[sfb] = get_bits1(gb);
645 * Decode Individual Channel Stream info; reference: table 4.6.
647 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
649 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
650 GetBitContext *gb, int common_window)
653 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
654 memset(ics, 0, sizeof(IndividualChannelStream));
657 ics->window_sequence[1] = ics->window_sequence[0];
658 ics->window_sequence[0] = get_bits(gb, 2);
659 ics->use_kb_window[1] = ics->use_kb_window[0];
660 ics->use_kb_window[0] = get_bits1(gb);
661 ics->num_window_groups = 1;
662 ics->group_len[0] = 1;
663 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
665 ics->max_sfb = get_bits(gb, 4);
666 for (i = 0; i < 7; i++) {
668 ics->group_len[ics->num_window_groups - 1]++;
670 ics->num_window_groups++;
671 ics->group_len[ics->num_window_groups - 1] = 1;
674 ics->num_windows = 8;
675 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index];
676 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
677 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index];
678 ics->predictor_present = 0;
680 ics->max_sfb = get_bits(gb, 6);
681 ics->num_windows = 1;
682 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index];
683 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
684 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
685 ics->predictor_present = get_bits1(gb);
686 ics->predictor_reset_group = 0;
687 if (ics->predictor_present) {
688 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
689 if (decode_prediction(ac, ics, gb)) {
690 memset(ics, 0, sizeof(IndividualChannelStream));
693 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
694 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
695 memset(ics, 0, sizeof(IndividualChannelStream));
698 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
699 memset(ics, 0, sizeof(IndividualChannelStream));
705 if (ics->max_sfb > ics->num_swb) {
706 av_log(ac->avctx, AV_LOG_ERROR,
707 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
708 ics->max_sfb, ics->num_swb);
709 memset(ics, 0, sizeof(IndividualChannelStream));
717 * Decode band types (section_data payload); reference: table 4.46.
719 * @param band_type array of the used band type
720 * @param band_type_run_end array of the last scalefactor band of a band type run
722 * @return Returns error status. 0 - OK, !0 - error
724 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
725 int band_type_run_end[120], GetBitContext *gb,
726 IndividualChannelStream *ics)
729 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
730 for (g = 0; g < ics->num_window_groups; g++) {
732 while (k < ics->max_sfb) {
733 uint8_t sect_end = k;
735 int sect_band_type = get_bits(gb, 4);
736 if (sect_band_type == 12) {
737 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
740 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
741 sect_end += sect_len_incr;
742 sect_end += sect_len_incr;
743 if (get_bits_left(gb) < 0) {
744 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
747 if (sect_end > ics->max_sfb) {
748 av_log(ac->avctx, AV_LOG_ERROR,
749 "Number of bands (%d) exceeds limit (%d).\n",
750 sect_end, ics->max_sfb);
753 for (; k < sect_end; k++) {
754 band_type [idx] = sect_band_type;
755 band_type_run_end[idx++] = sect_end;
763 * Decode scalefactors; reference: table 4.47.
765 * @param global_gain first scalefactor value as scalefactors are differentially coded
766 * @param band_type array of the used band type
767 * @param band_type_run_end array of the last scalefactor band of a band type run
768 * @param sf array of scalefactors or intensity stereo positions
770 * @return Returns error status. 0 - OK, !0 - error
772 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
773 unsigned int global_gain,
774 IndividualChannelStream *ics,
775 enum BandType band_type[120],
776 int band_type_run_end[120])
778 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
780 int offset[3] = { global_gain, global_gain - 90, 100 };
782 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
783 for (g = 0; g < ics->num_window_groups; g++) {
784 for (i = 0; i < ics->max_sfb;) {
785 int run_end = band_type_run_end[idx];
786 if (band_type[idx] == ZERO_BT) {
787 for (; i < run_end; i++, idx++)
789 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
790 for (; i < run_end; i++, idx++) {
791 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
792 if (offset[2] > 255U) {
793 av_log(ac->avctx, AV_LOG_ERROR,
794 "%s (%d) out of range.\n", sf_str[2], offset[2]);
797 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
799 } else if (band_type[idx] == NOISE_BT) {
800 for (; i < run_end; i++, idx++) {
801 if (noise_flag-- > 0)
802 offset[1] += get_bits(gb, 9) - 256;
804 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
805 if (offset[1] > 255U) {
806 av_log(ac->avctx, AV_LOG_ERROR,
807 "%s (%d) out of range.\n", sf_str[1], offset[1]);
810 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
813 for (; i < run_end; i++, idx++) {
814 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
815 if (offset[0] > 255U) {
816 av_log(ac->avctx, AV_LOG_ERROR,
817 "%s (%d) out of range.\n", sf_str[0], offset[0]);
820 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
829 * Decode pulse data; reference: table 4.7.
831 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
832 const uint16_t *swb_offset, int num_swb)
835 pulse->num_pulse = get_bits(gb, 2) + 1;
836 pulse_swb = get_bits(gb, 6);
837 if (pulse_swb >= num_swb)
839 pulse->pos[0] = swb_offset[pulse_swb];
840 pulse->pos[0] += get_bits(gb, 5);
841 if (pulse->pos[0] > 1023)
843 pulse->amp[0] = get_bits(gb, 4);
844 for (i = 1; i < pulse->num_pulse; i++) {
845 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
846 if (pulse->pos[i] > 1023)
848 pulse->amp[i] = get_bits(gb, 4);
854 * Decode Temporal Noise Shaping data; reference: table 4.48.
856 * @return Returns error status. 0 - OK, !0 - error
858 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
859 GetBitContext *gb, const IndividualChannelStream *ics)
861 int w, filt, i, coef_len, coef_res, coef_compress;
862 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
863 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
864 for (w = 0; w < ics->num_windows; w++) {
865 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
866 coef_res = get_bits1(gb);
868 for (filt = 0; filt < tns->n_filt[w]; filt++) {
870 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
872 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
873 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
874 tns->order[w][filt], tns_max_order);
875 tns->order[w][filt] = 0;
878 if (tns->order[w][filt]) {
879 tns->direction[w][filt] = get_bits1(gb);
880 coef_compress = get_bits1(gb);
881 coef_len = coef_res + 3 - coef_compress;
882 tmp2_idx = 2 * coef_compress + coef_res;
884 for (i = 0; i < tns->order[w][filt]; i++)
885 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
894 * Decode Mid/Side data; reference: table 4.54.
896 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
897 * [1] mask is decoded from bitstream; [2] mask is all 1s;
898 * [3] reserved for scalable AAC
900 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
904 if (ms_present == 1) {
905 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
906 cpe->ms_mask[idx] = get_bits1(gb);
907 } else if (ms_present == 2) {
908 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
913 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
917 *dst++ = v[idx & 15] * s;
918 *dst++ = v[idx>>4 & 15] * s;
924 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
928 *dst++ = v[idx & 3] * s;
929 *dst++ = v[idx>>2 & 3] * s;
930 *dst++ = v[idx>>4 & 3] * s;
931 *dst++ = v[idx>>6 & 3] * s;
937 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
938 unsigned sign, const float *scale)
940 union float754 s0, s1;
942 s0.f = s1.f = *scale;
943 s0.i ^= sign >> 1 << 31;
946 *dst++ = v[idx & 15] * s0.f;
947 *dst++ = v[idx>>4 & 15] * s1.f;
954 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
955 unsigned sign, const float *scale)
957 unsigned nz = idx >> 12;
958 union float754 s = { .f = *scale };
961 t.i = s.i ^ (sign & 1<<31);
962 *dst++ = v[idx & 3] * t.f;
964 sign <<= nz & 1; nz >>= 1;
965 t.i = s.i ^ (sign & 1<<31);
966 *dst++ = v[idx>>2 & 3] * t.f;
968 sign <<= nz & 1; nz >>= 1;
969 t.i = s.i ^ (sign & 1<<31);
970 *dst++ = v[idx>>4 & 3] * t.f;
972 sign <<= nz & 1; nz >>= 1;
973 t.i = s.i ^ (sign & 1<<31);
974 *dst++ = v[idx>>6 & 3] * t.f;
981 * Decode spectral data; reference: table 4.50.
982 * Dequantize and scale spectral data; reference: 4.6.3.3.
984 * @param coef array of dequantized, scaled spectral data
985 * @param sf array of scalefactors or intensity stereo positions
986 * @param pulse_present set if pulses are present
987 * @param pulse pointer to pulse data struct
988 * @param band_type array of the used band type
990 * @return Returns error status. 0 - OK, !0 - error
992 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
993 GetBitContext *gb, const float sf[120],
994 int pulse_present, const Pulse *pulse,
995 const IndividualChannelStream *ics,
996 enum BandType band_type[120])
998 int i, k, g, idx = 0;
999 const int c = 1024 / ics->num_windows;
1000 const uint16_t *offsets = ics->swb_offset;
1001 float *coef_base = coef;
1004 for (g = 0; g < ics->num_windows; g++)
1005 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1007 for (g = 0; g < ics->num_window_groups; g++) {
1008 unsigned g_len = ics->group_len[g];
1010 for (i = 0; i < ics->max_sfb; i++, idx++) {
1011 const unsigned cbt_m1 = band_type[idx] - 1;
1012 float *cfo = coef + offsets[i];
1013 int off_len = offsets[i + 1] - offsets[i];
1016 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1017 for (group = 0; group < g_len; group++, cfo+=128) {
1018 memset(cfo, 0, off_len * sizeof(float));
1020 } else if (cbt_m1 == NOISE_BT - 1) {
1021 for (group = 0; group < g_len; group++, cfo+=128) {
1025 for (k = 0; k < off_len; k++) {
1026 ac->random_state = lcg_random(ac->random_state);
1027 cfo[k] = ac->random_state;
1030 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1031 scale = sf[idx] / sqrtf(band_energy);
1032 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1035 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1036 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1037 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1038 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1039 OPEN_READER(re, gb);
1041 switch (cbt_m1 >> 1) {
1043 for (group = 0; group < g_len; group++, cfo+=128) {
1051 UPDATE_CACHE(re, gb);
1052 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1054 if (code >= cb_size) {
1056 goto err_cb_overflow;
1059 cb_idx = cb_vector_idx[code];
1060 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1066 for (group = 0; group < g_len; group++, cfo+=128) {
1076 UPDATE_CACHE(re, gb);
1077 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1079 if (code >= cb_size) {
1081 goto err_cb_overflow;
1084 #if MIN_CACHE_BITS < 20
1085 UPDATE_CACHE(re, gb);
1087 cb_idx = cb_vector_idx[code];
1088 nnz = cb_idx >> 8 & 15;
1089 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1090 LAST_SKIP_BITS(re, gb, nnz);
1091 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1097 for (group = 0; group < g_len; group++, cfo+=128) {
1105 UPDATE_CACHE(re, gb);
1106 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1108 if (code >= cb_size) {
1110 goto err_cb_overflow;
1113 cb_idx = cb_vector_idx[code];
1114 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1121 for (group = 0; group < g_len; group++, cfo+=128) {
1131 UPDATE_CACHE(re, gb);
1132 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1134 if (code >= cb_size) {
1136 goto err_cb_overflow;
1139 cb_idx = cb_vector_idx[code];
1140 nnz = cb_idx >> 8 & 15;
1141 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1142 LAST_SKIP_BITS(re, gb, nnz);
1143 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1149 for (group = 0; group < g_len; group++, cfo+=128) {
1151 uint32_t *icf = (uint32_t *) cf;
1161 UPDATE_CACHE(re, gb);
1162 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1170 if (code >= cb_size) {
1172 goto err_cb_overflow;
1175 cb_idx = cb_vector_idx[code];
1178 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1179 LAST_SKIP_BITS(re, gb, nnz);
1181 for (j = 0; j < 2; j++) {
1185 /* The total length of escape_sequence must be < 22 bits according
1186 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1187 UPDATE_CACHE(re, gb);
1188 b = GET_CACHE(re, gb);
1189 b = 31 - av_log2(~b);
1192 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1196 #if MIN_CACHE_BITS < 21
1197 LAST_SKIP_BITS(re, gb, b + 1);
1198 UPDATE_CACHE(re, gb);
1200 SKIP_BITS(re, gb, b + 1);
1203 n = (1 << b) + SHOW_UBITS(re, gb, b);
1204 LAST_SKIP_BITS(re, gb, b);
1205 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1208 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1209 *icf++ = (bits & 1<<31) | v;
1216 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1220 CLOSE_READER(re, gb);
1226 if (pulse_present) {
1228 for (i = 0; i < pulse->num_pulse; i++) {
1229 float co = coef_base[ pulse->pos[i] ];
1230 while (offsets[idx + 1] <= pulse->pos[i])
1232 if (band_type[idx] != NOISE_BT && sf[idx]) {
1233 float ico = -pulse->amp[i];
1236 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1238 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1245 av_log(ac->avctx, AV_LOG_ERROR,
1246 "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1247 band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1251 static av_always_inline float flt16_round(float pf)
1255 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1259 static av_always_inline float flt16_even(float pf)
1263 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1267 static av_always_inline float flt16_trunc(float pf)
1271 pun.i &= 0xFFFF0000U;
1275 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1278 const float a = 0.953125; // 61.0 / 64
1279 const float alpha = 0.90625; // 29.0 / 32
1284 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1285 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1287 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1289 *coef += pv * ac->sf_scale;
1291 e0 = *coef / ac->sf_scale;
1292 e1 = e0 - k1 * ps->r0;
1294 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1295 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1296 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1297 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1299 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1300 ps->r0 = flt16_trunc(a * e0);
1304 * Apply AAC-Main style frequency domain prediction.
1306 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1310 if (!sce->ics.predictor_initialized) {
1311 reset_all_predictors(sce->predictor_state);
1312 sce->ics.predictor_initialized = 1;
1315 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1316 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1317 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1318 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1319 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1322 if (sce->ics.predictor_reset_group)
1323 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1325 reset_all_predictors(sce->predictor_state);
1329 * Decode an individual_channel_stream payload; reference: table 4.44.
1331 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1332 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1334 * @return Returns error status. 0 - OK, !0 - error
1336 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1337 GetBitContext *gb, int common_window, int scale_flag)
1340 TemporalNoiseShaping *tns = &sce->tns;
1341 IndividualChannelStream *ics = &sce->ics;
1342 float *out = sce->coeffs;
1343 int global_gain, pulse_present = 0;
1345 /* This assignment is to silence a GCC warning about the variable being used
1346 * uninitialized when in fact it always is.
1348 pulse.num_pulse = 0;
1350 global_gain = get_bits(gb, 8);
1352 if (!common_window && !scale_flag) {
1353 if (decode_ics_info(ac, ics, gb, 0) < 0)
1357 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1359 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1364 if ((pulse_present = get_bits1(gb))) {
1365 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1366 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1369 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1370 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1374 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1376 if (get_bits1(gb)) {
1377 av_log_missing_feature(ac->avctx, "SSR", 1);
1382 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1385 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1386 apply_prediction(ac, sce);
1392 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1394 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1396 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1397 float *ch0 = cpe->ch[0].coeffs;
1398 float *ch1 = cpe->ch[1].coeffs;
1399 int g, i, group, idx = 0;
1400 const uint16_t *offsets = ics->swb_offset;
1401 for (g = 0; g < ics->num_window_groups; g++) {
1402 for (i = 0; i < ics->max_sfb; i++, idx++) {
1403 if (cpe->ms_mask[idx] &&
1404 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1405 for (group = 0; group < ics->group_len[g]; group++) {
1406 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1407 ch1 + group * 128 + offsets[i],
1408 offsets[i+1] - offsets[i]);
1412 ch0 += ics->group_len[g] * 128;
1413 ch1 += ics->group_len[g] * 128;
1418 * intensity stereo decoding; reference: 4.6.8.2.3
1420 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1421 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1422 * [3] reserved for scalable AAC
1424 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1426 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1427 SingleChannelElement *sce1 = &cpe->ch[1];
1428 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1429 const uint16_t *offsets = ics->swb_offset;
1430 int g, group, i, k, idx = 0;
1433 for (g = 0; g < ics->num_window_groups; g++) {
1434 for (i = 0; i < ics->max_sfb;) {
1435 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1436 const int bt_run_end = sce1->band_type_run_end[idx];
1437 for (; i < bt_run_end; i++, idx++) {
1438 c = -1 + 2 * (sce1->band_type[idx] - 14);
1440 c *= 1 - 2 * cpe->ms_mask[idx];
1441 scale = c * sce1->sf[idx];
1442 for (group = 0; group < ics->group_len[g]; group++)
1443 for (k = offsets[i]; k < offsets[i + 1]; k++)
1444 coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1447 int bt_run_end = sce1->band_type_run_end[idx];
1448 idx += bt_run_end - i;
1452 coef0 += ics->group_len[g] * 128;
1453 coef1 += ics->group_len[g] * 128;
1458 * Decode a channel_pair_element; reference: table 4.4.
1460 * @param elem_id Identifies the instance of a syntax element.
1462 * @return Returns error status. 0 - OK, !0 - error
1464 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1466 int i, ret, common_window, ms_present = 0;
1468 common_window = get_bits1(gb);
1469 if (common_window) {
1470 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1472 i = cpe->ch[1].ics.use_kb_window[0];
1473 cpe->ch[1].ics = cpe->ch[0].ics;
1474 cpe->ch[1].ics.use_kb_window[1] = i;
1475 ms_present = get_bits(gb, 2);
1476 if (ms_present == 3) {
1477 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1479 } else if (ms_present)
1480 decode_mid_side_stereo(cpe, gb, ms_present);
1482 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1484 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1487 if (common_window) {
1489 apply_mid_side_stereo(ac, cpe);
1490 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1491 apply_prediction(ac, &cpe->ch[0]);
1492 apply_prediction(ac, &cpe->ch[1]);
1496 apply_intensity_stereo(cpe, ms_present);
1501 * Decode coupling_channel_element; reference: table 4.8.
1503 * @param elem_id Identifies the instance of a syntax element.
1505 * @return Returns error status. 0 - OK, !0 - error
1507 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1513 SingleChannelElement *sce = &che->ch[0];
1514 ChannelCoupling *coup = &che->coup;
1516 coup->coupling_point = 2 * get_bits1(gb);
1517 coup->num_coupled = get_bits(gb, 3);
1518 for (c = 0; c <= coup->num_coupled; c++) {
1520 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1521 coup->id_select[c] = get_bits(gb, 4);
1522 if (coup->type[c] == TYPE_CPE) {
1523 coup->ch_select[c] = get_bits(gb, 2);
1524 if (coup->ch_select[c] == 3)
1527 coup->ch_select[c] = 2;
1529 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1531 sign = get_bits(gb, 1);
1532 scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1534 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1537 for (c = 0; c < num_gain; c++) {
1541 float gain_cache = 1.;
1543 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1544 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1545 gain_cache = pow(scale, -gain);
1547 if (coup->coupling_point == AFTER_IMDCT) {
1548 coup->gain[c][0] = gain_cache;
1550 for (g = 0; g < sce->ics.num_window_groups; g++) {
1551 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1552 if (sce->band_type[idx] != ZERO_BT) {
1554 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1562 gain_cache = pow(scale, -t) * s;
1565 coup->gain[c][idx] = gain_cache;
1575 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1577 * @return Returns number of bytes consumed.
1579 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1583 int num_excl_chan = 0;
1586 for (i = 0; i < 7; i++)
1587 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1588 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1590 return num_excl_chan / 7;
1594 * Decode dynamic range information; reference: table 4.52.
1596 * @param cnt length of TYPE_FIL syntactic element in bytes
1598 * @return Returns number of bytes consumed.
1600 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1601 GetBitContext *gb, int cnt)
1604 int drc_num_bands = 1;
1607 /* pce_tag_present? */
1608 if (get_bits1(gb)) {
1609 che_drc->pce_instance_tag = get_bits(gb, 4);
1610 skip_bits(gb, 4); // tag_reserved_bits
1614 /* excluded_chns_present? */
1615 if (get_bits1(gb)) {
1616 n += decode_drc_channel_exclusions(che_drc, gb);
1619 /* drc_bands_present? */
1620 if (get_bits1(gb)) {
1621 che_drc->band_incr = get_bits(gb, 4);
1622 che_drc->interpolation_scheme = get_bits(gb, 4);
1624 drc_num_bands += che_drc->band_incr;
1625 for (i = 0; i < drc_num_bands; i++) {
1626 che_drc->band_top[i] = get_bits(gb, 8);
1631 /* prog_ref_level_present? */
1632 if (get_bits1(gb)) {
1633 che_drc->prog_ref_level = get_bits(gb, 7);
1634 skip_bits1(gb); // prog_ref_level_reserved_bits
1638 for (i = 0; i < drc_num_bands; i++) {
1639 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1640 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1648 * Decode extension data (incomplete); reference: table 4.51.
1650 * @param cnt length of TYPE_FIL syntactic element in bytes
1652 * @return Returns number of bytes consumed
1654 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1655 ChannelElement *che, enum RawDataBlockType elem_type)
1659 switch (get_bits(gb, 4)) { // extension type
1660 case EXT_SBR_DATA_CRC:
1664 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1666 } else if (!ac->m4ac.sbr) {
1667 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1668 skip_bits_long(gb, 8 * cnt - 4);
1670 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1671 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1672 skip_bits_long(gb, 8 * cnt - 4);
1674 } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1677 output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1681 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1683 case EXT_DYNAMIC_RANGE:
1684 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1688 case EXT_DATA_ELEMENT:
1690 skip_bits_long(gb, 8 * cnt - 4);
1697 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1699 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1700 * @param coef spectral coefficients
1702 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1703 IndividualChannelStream *ics, int decode)
1705 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1707 int bottom, top, order, start, end, size, inc;
1708 float lpc[TNS_MAX_ORDER];
1710 for (w = 0; w < ics->num_windows; w++) {
1711 bottom = ics->num_swb;
1712 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1714 bottom = FFMAX(0, top - tns->length[w][filt]);
1715 order = tns->order[w][filt];
1720 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1722 start = ics->swb_offset[FFMIN(bottom, mmm)];
1723 end = ics->swb_offset[FFMIN( top, mmm)];
1724 if ((size = end - start) <= 0)
1726 if (tns->direction[w][filt]) {
1735 for (m = 0; m < size; m++, start += inc)
1736 for (i = 1; i <= FFMIN(m, order); i++)
1737 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1743 * Conduct IMDCT and windowing.
1745 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1747 IndividualChannelStream *ics = &sce->ics;
1748 float *in = sce->coeffs;
1749 float *out = sce->ret;
1750 float *saved = sce->saved;
1751 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1752 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1753 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1754 float *buf = ac->buf_mdct;
1755 float *temp = ac->temp;
1759 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1760 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1761 av_log(ac->avctx, AV_LOG_WARNING,
1762 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1763 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1764 for (i = 0; i < 1024; i += 128)
1765 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1767 ff_imdct_half(&ac->mdct, buf, in);
1769 /* window overlapping
1770 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1771 * and long to short transitions are considered to be short to short
1772 * transitions. This leaves just two cases (long to long and short to short)
1773 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1775 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1776 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1777 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512);
1779 for (i = 0; i < 448; i++)
1780 out[i] = saved[i] + bias;
1782 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1783 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64);
1784 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64);
1785 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64);
1786 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64);
1787 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64);
1788 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1790 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64);
1791 for (i = 576; i < 1024; i++)
1792 out[i] = buf[i-512] + bias;
1797 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1798 for (i = 0; i < 64; i++)
1799 saved[i] = temp[64 + i] - bias;
1800 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1801 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1802 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1803 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1804 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1805 memcpy( saved, buf + 512, 448 * sizeof(float));
1806 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1807 } else { // LONG_STOP or ONLY_LONG
1808 memcpy( saved, buf + 512, 512 * sizeof(float));
1813 * Apply dependent channel coupling (applied before IMDCT).
1815 * @param index index into coupling gain array
1817 static void apply_dependent_coupling(AACContext *ac,
1818 SingleChannelElement *target,
1819 ChannelElement *cce, int index)
1821 IndividualChannelStream *ics = &cce->ch[0].ics;
1822 const uint16_t *offsets = ics->swb_offset;
1823 float *dest = target->coeffs;
1824 const float *src = cce->ch[0].coeffs;
1825 int g, i, group, k, idx = 0;
1826 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1827 av_log(ac->avctx, AV_LOG_ERROR,
1828 "Dependent coupling is not supported together with LTP\n");
1831 for (g = 0; g < ics->num_window_groups; g++) {
1832 for (i = 0; i < ics->max_sfb; i++, idx++) {
1833 if (cce->ch[0].band_type[idx] != ZERO_BT) {
1834 const float gain = cce->coup.gain[index][idx];
1835 for (group = 0; group < ics->group_len[g]; group++) {
1836 for (k = offsets[i]; k < offsets[i + 1]; k++) {
1838 dest[group * 128 + k] += gain * src[group * 128 + k];
1843 dest += ics->group_len[g] * 128;
1844 src += ics->group_len[g] * 128;
1849 * Apply independent channel coupling (applied after IMDCT).
1851 * @param index index into coupling gain array
1853 static void apply_independent_coupling(AACContext *ac,
1854 SingleChannelElement *target,
1855 ChannelElement *cce, int index)
1858 const float gain = cce->coup.gain[index][0];
1859 const float bias = ac->add_bias;
1860 const float *src = cce->ch[0].ret;
1861 float *dest = target->ret;
1862 const int len = 1024 << (ac->m4ac.sbr == 1);
1864 for (i = 0; i < len; i++)
1865 dest[i] += gain * (src[i] - bias);
1869 * channel coupling transformation interface
1871 * @param index index into coupling gain array
1872 * @param apply_coupling_method pointer to (in)dependent coupling function
1874 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1875 enum RawDataBlockType type, int elem_id,
1876 enum CouplingPoint coupling_point,
1877 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1881 for (i = 0; i < MAX_ELEM_ID; i++) {
1882 ChannelElement *cce = ac->che[TYPE_CCE][i];
1885 if (cce && cce->coup.coupling_point == coupling_point) {
1886 ChannelCoupling *coup = &cce->coup;
1888 for (c = 0; c <= coup->num_coupled; c++) {
1889 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1890 if (coup->ch_select[c] != 1) {
1891 apply_coupling_method(ac, &cc->ch[0], cce, index);
1892 if (coup->ch_select[c] != 0)
1895 if (coup->ch_select[c] != 2)
1896 apply_coupling_method(ac, &cc->ch[1], cce, index++);
1898 index += 1 + (coup->ch_select[c] == 3);
1905 * Convert spectral data to float samples, applying all supported tools as appropriate.
1907 static void spectral_to_sample(AACContext *ac)
1910 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1911 for (type = 3; type >= 0; type--) {
1912 for (i = 0; i < MAX_ELEM_ID; i++) {
1913 ChannelElement *che = ac->che[type][i];
1915 if (type <= TYPE_CPE)
1916 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1917 if (che->ch[0].tns.present)
1918 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1919 if (che->ch[1].tns.present)
1920 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1921 if (type <= TYPE_CPE)
1922 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1923 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1924 imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1925 if (type == TYPE_CPE) {
1926 imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1928 if (ac->m4ac.sbr > 0) {
1929 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1932 if (type <= TYPE_CCE)
1933 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1939 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1942 AACADTSHeaderInfo hdr_info;
1944 size = ff_aac_parse_header(gb, &hdr_info);
1946 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1947 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1948 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1949 ac->m4ac.chan_config = hdr_info.chan_config;
1950 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1952 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1954 } else if (ac->output_configured != OC_LOCKED) {
1955 ac->output_configured = OC_NONE;
1957 if (ac->output_configured != OC_LOCKED) {
1961 ac->m4ac.sample_rate = hdr_info.sample_rate;
1962 ac->m4ac.sampling_index = hdr_info.sampling_index;
1963 ac->m4ac.object_type = hdr_info.object_type;
1964 if (!ac->avctx->sample_rate)
1965 ac->avctx->sample_rate = hdr_info.sample_rate;
1966 if (hdr_info.num_aac_frames == 1) {
1967 if (!hdr_info.crc_absent)
1970 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1977 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1978 int *data_size, AVPacket *avpkt)
1980 const uint8_t *buf = avpkt->data;
1981 int buf_size = avpkt->size;
1982 AACContext *ac = avctx->priv_data;
1983 ChannelElement *che = NULL, *che_prev = NULL;
1985 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1986 int err, elem_id, data_size_tmp;
1988 int samples = 0, multiplier;
1991 init_get_bits(&gb, buf, buf_size * 8);
1993 if (show_bits(&gb, 12) == 0xfff) {
1994 if (parse_adts_frame_header(ac, &gb) < 0) {
1995 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1998 if (ac->m4ac.sampling_index > 12) {
1999 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
2004 memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
2006 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
2007 elem_id = get_bits(&gb, 4);
2009 if (elem_type < TYPE_DSE) {
2010 if (!(che=get_che(ac, elem_type, elem_id))) {
2011 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id);
2017 switch (elem_type) {
2020 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2024 err = decode_cpe(ac, &gb, che);
2028 err = decode_cce(ac, &gb, che);
2032 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2036 err = skip_data_stream_element(ac, &gb);
2040 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2041 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2042 if ((err = decode_pce(ac, new_che_pos, &gb)))
2044 if (ac->output_configured > OC_TRIAL_PCE)
2045 av_log(avctx, AV_LOG_ERROR,
2046 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2048 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2054 elem_id += get_bits(&gb, 8) - 1;
2055 if (get_bits_left(&gb) < 8 * elem_id) {
2056 av_log(avctx, AV_LOG_ERROR, overread_err);
2060 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2061 err = 0; /* FIXME */
2065 err = -1; /* should not happen, but keeps compiler happy */
2070 elem_type_prev = elem_type;
2075 if (get_bits_left(&gb) < 3) {
2076 av_log(avctx, AV_LOG_ERROR, overread_err);
2081 spectral_to_sample(ac);
2083 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2084 samples <<= multiplier;
2085 if (ac->output_configured < OC_LOCKED) {
2086 avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2087 avctx->frame_size = samples;
2090 data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2091 if (*data_size < data_size_tmp) {
2092 av_log(avctx, AV_LOG_ERROR,
2093 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2094 *data_size, data_size_tmp);
2097 *data_size = data_size_tmp;
2100 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2102 if (ac->output_configured)
2103 ac->output_configured = OC_LOCKED;
2105 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2106 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2107 if (buf[buf_offset])
2110 return buf_size > buf_offset ? buf_consumed : buf_size;
2113 static av_cold int aac_decode_close(AVCodecContext *avctx)
2115 AACContext *ac = avctx->priv_data;
2118 for (i = 0; i < MAX_ELEM_ID; i++) {
2119 for (type = 0; type < 4; type++) {
2120 if (ac->che[type][i])
2121 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2122 av_freep(&ac->che[type][i]);
2126 ff_mdct_end(&ac->mdct);
2127 ff_mdct_end(&ac->mdct_small);
2131 AVCodec aac_decoder = {
2140 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2141 .sample_fmts = (const enum SampleFormat[]) {
2142 SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2144 .channel_layouts = aac_channel_layout,