3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
34 * N (code in SoC repo) gain control
36 * Y window shapes - standard
37 * N window shapes - Low Delay
38 * Y filterbank - standard
39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction
44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution
47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch
50 * Y quantization & coding - AAC
51 * N quantization & coding - TwinVQ
52 * N quantization & coding - BSAC
53 * N AAC Error Resilience tools
54 * N Error Resilience payload syntax
55 * N Error Protection tool
57 * N Silence Compression
60 * N Structured Audio tools
61 * N Structured Audio Sample Bank Format
63 * N Harmonic and Individual Lines plus Noise
64 * N Text-To-Speech Interface
65 * Y Spectral Band Replication
66 * Y (not in this code) Layer-1
67 * Y (not in this code) Layer-2
68 * Y (not in this code) Layer-3
69 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
71 * N Direct Stream Transfer
73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
88 #include "aacdectab.h"
89 #include "cbrt_tablegen.h"
92 #include "mpeg4audio.h"
93 #include "aacadtsdec.h"
101 # include "arm/aac.h"
109 static VLC vlc_scalefactors;
110 static VLC vlc_spectral[11];
112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
116 /* Some buggy encoders appear to set all elem_ids to zero and rely on
117 channels always occurring in the same order. This is expressly forbidden
118 by the spec but we will try to work around it.
121 while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
122 if (ac->output_configured < OC_LOCKED && !err_printed) {
123 av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
128 if (elem_id == MAX_ELEM_ID)
130 ac->tags_seen_this_frame[type][elem_id] = 1;
132 if (ac->tag_che_map[type][elem_id]) {
133 return ac->tag_che_map[type][elem_id];
135 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
138 switch (ac->m4ac.chan_config) {
140 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
142 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
145 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
147 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
150 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
153 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
155 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
158 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
160 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
164 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
166 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
167 } else if (ac->m4ac.chan_config == 2) {
171 if (!ac->tags_mapped && type == TYPE_SCE) {
173 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
181 * Check for the channel element in the current channel position configuration.
182 * If it exists, make sure the appropriate element is allocated and map the
183 * channel order to match the internal FFmpeg channel layout.
185 * @param che_pos current channel position configuration
186 * @param type channel element type
187 * @param id channel element id
188 * @param channels count of the number of channels in the configuration
190 * @return Returns error status. 0 - OK, !0 - error
192 static av_cold int che_configure(AACContext *ac,
193 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
197 if (che_pos[type][id]) {
198 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
199 return AVERROR(ENOMEM);
200 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 if (type != TYPE_CCE) {
202 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 if (type == TYPE_CPE ||
204 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
205 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
209 if (ac->che[type][id])
210 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
211 av_freep(&ac->che[type][id]);
217 * Configure output channel order based on the current program configuration element.
219 * @param che_pos current channel position configuration
220 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
222 * @return Returns error status. 0 - OK, !0 - error
224 static av_cold int output_configure(AACContext *ac,
225 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
226 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
227 int channel_config, enum OCStatus oc_type)
229 AVCodecContext *avctx = ac->avctx;
230 int i, type, channels = 0, ret;
232 if (new_che_pos != che_pos)
233 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
235 if (channel_config) {
236 for (i = 0; i < tags_per_config[channel_config]; i++) {
237 if ((ret = che_configure(ac, che_pos,
238 aac_channel_layout_map[channel_config - 1][i][0],
239 aac_channel_layout_map[channel_config - 1][i][1],
244 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
247 avctx->channel_layout = aac_channel_layout[channel_config - 1];
249 /* Allocate or free elements depending on if they are in the
250 * current program configuration.
252 * Set up default 1:1 output mapping.
254 * For a 5.1 stream the output order will be:
255 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
258 for (i = 0; i < MAX_ELEM_ID; i++) {
259 for (type = 0; type < 4; type++) {
260 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
265 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
266 ac->tags_mapped = 4 * MAX_ELEM_ID;
268 avctx->channel_layout = 0;
271 avctx->channels = channels;
273 ac->output_configured = oc_type;
279 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
281 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
282 * @param sce_map mono (Single Channel Element) map
283 * @param type speaker type/position for these channels
285 static void decode_channel_map(enum ChannelPosition *cpe_map,
286 enum ChannelPosition *sce_map,
287 enum ChannelPosition type,
288 GetBitContext *gb, int n)
291 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
292 map[get_bits(gb, 4)] = type;
297 * Decode program configuration element; reference: table 4.2.
299 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
301 * @return Returns error status. 0 - OK, !0 - error
303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
306 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
309 skip_bits(gb, 2); // object_type
311 sampling_index = get_bits(gb, 4);
312 if (ac->m4ac.sampling_index != sampling_index)
313 av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
315 num_front = get_bits(gb, 4);
316 num_side = get_bits(gb, 4);
317 num_back = get_bits(gb, 4);
318 num_lfe = get_bits(gb, 2);
319 num_assoc_data = get_bits(gb, 3);
320 num_cc = get_bits(gb, 4);
323 skip_bits(gb, 4); // mono_mixdown_tag
325 skip_bits(gb, 4); // stereo_mixdown_tag
328 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
330 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
331 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
332 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
333 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
335 skip_bits_long(gb, 4 * num_assoc_data);
337 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
341 /* comment field, first byte is length */
342 comment_len = get_bits(gb, 8) * 8;
343 if (get_bits_left(gb) < comment_len) {
344 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
347 skip_bits_long(gb, comment_len);
352 * Set up channel positions based on a default channel configuration
353 * as specified in table 1.17.
355 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
357 * @return Returns error status. 0 - OK, !0 - error
359 static av_cold int set_default_channel_config(AACContext *ac,
360 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
363 if (channel_config < 1 || channel_config > 7) {
364 av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
369 /* default channel configurations:
371 * 1ch : front center (mono)
372 * 2ch : L + R (stereo)
373 * 3ch : front center + L + R
374 * 4ch : front center + L + R + back center
375 * 5ch : front center + L + R + back stereo
376 * 6ch : front center + L + R + back stereo + LFE
377 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
380 if (channel_config != 2)
381 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
382 if (channel_config > 1)
383 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
384 if (channel_config == 4)
385 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center
386 if (channel_config > 4)
387 new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
388 = AAC_CHANNEL_BACK; // back stereo
389 if (channel_config > 5)
390 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE
391 if (channel_config == 7)
392 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
398 * Decode GA "General Audio" specific configuration; reference: table 4.1.
400 * @return Returns error status. 0 - OK, !0 - error
402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
405 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
406 int extension_flag, ret;
408 if (get_bits1(gb)) { // frameLengthFlag
409 av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
413 if (get_bits1(gb)) // dependsOnCoreCoder
414 skip_bits(gb, 14); // coreCoderDelay
415 extension_flag = get_bits1(gb);
417 if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
418 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
419 skip_bits(gb, 3); // layerNr
421 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
422 if (channel_config == 0) {
423 skip_bits(gb, 4); // element_instance_tag
424 if ((ret = decode_pce(ac, new_che_pos, gb)))
427 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
430 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
433 if (extension_flag) {
434 switch (ac->m4ac.object_type) {
436 skip_bits(gb, 5); // numOfSubFrame
437 skip_bits(gb, 11); // layer_length
441 case AOT_ER_AAC_SCALABLE:
443 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
444 * aacScalefactorDataResilienceFlag
445 * aacSpectralDataResilienceFlag
449 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
455 * Decode audio specific configuration; reference: table 1.13.
457 * @param data pointer to AVCodecContext extradata
458 * @param data_size size of AVCCodecContext extradata
460 * @return Returns error status. 0 - OK, !0 - error
462 static int decode_audio_specific_config(AACContext *ac, void *data,
468 init_get_bits(&gb, data, data_size * 8);
470 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
472 if (ac->m4ac.sampling_index > 12) {
473 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
476 if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
479 skip_bits_long(&gb, i);
481 switch (ac->m4ac.object_type) {
484 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
488 av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
489 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
496 * linear congruential pseudorandom number generator
498 * @param previous_val pointer to the current state of the generator
500 * @return Returns a 32-bit pseudorandom integer
502 static av_always_inline int lcg_random(int previous_val)
504 return previous_val * 1664525 + 1013904223;
507 static av_always_inline void reset_predict_state(PredictorState *ps)
517 static void reset_all_predictors(PredictorState *ps)
520 for (i = 0; i < MAX_PREDICTORS; i++)
521 reset_predict_state(&ps[i]);
524 static void reset_predictor_group(PredictorState *ps, int group_num)
527 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
528 reset_predict_state(&ps[i]);
531 #define AAC_INIT_VLC_STATIC(num, size) \
532 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
533 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
534 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
537 static av_cold int aac_decode_init(AVCodecContext *avctx)
539 AACContext *ac = avctx->priv_data;
542 ac->m4ac.sample_rate = avctx->sample_rate;
544 if (avctx->extradata_size > 0) {
545 if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
549 avctx->sample_fmt = SAMPLE_FMT_S16;
551 AAC_INIT_VLC_STATIC( 0, 304);
552 AAC_INIT_VLC_STATIC( 1, 270);
553 AAC_INIT_VLC_STATIC( 2, 550);
554 AAC_INIT_VLC_STATIC( 3, 300);
555 AAC_INIT_VLC_STATIC( 4, 328);
556 AAC_INIT_VLC_STATIC( 5, 294);
557 AAC_INIT_VLC_STATIC( 6, 306);
558 AAC_INIT_VLC_STATIC( 7, 268);
559 AAC_INIT_VLC_STATIC( 8, 510);
560 AAC_INIT_VLC_STATIC( 9, 366);
561 AAC_INIT_VLC_STATIC(10, 462);
565 dsputil_init(&ac->dsp, avctx);
567 ac->random_state = 0x1f2e3d4c;
569 // -1024 - Compensate wrong IMDCT method.
570 // 32768 - Required to scale values to the correct range for the bias method
571 // for float to int16 conversion.
573 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
574 ac->add_bias = 385.0f;
575 ac->sf_scale = 1. / (-1024. * 32768.);
579 ac->sf_scale = 1. / -1024.;
585 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
586 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
587 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
590 ff_mdct_init(&ac->mdct, 11, 1, 1.0);
591 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
592 // window initialization
593 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
594 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
595 ff_init_ff_sine_windows(10);
596 ff_init_ff_sine_windows( 7);
604 * Skip data_stream_element; reference: table 4.10.
606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
608 int byte_align = get_bits1(gb);
609 int count = get_bits(gb, 8);
611 count += get_bits(gb, 8);
615 if (get_bits_left(gb) < 8 * count) {
616 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
619 skip_bits_long(gb, 8 * count);
623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
628 ics->predictor_reset_group = get_bits(gb, 5);
629 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
630 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
634 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
635 ics->prediction_used[sfb] = get_bits1(gb);
641 * Decode Individual Channel Stream info; reference: table 4.6.
643 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
646 GetBitContext *gb, int common_window)
649 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
650 memset(ics, 0, sizeof(IndividualChannelStream));
653 ics->window_sequence[1] = ics->window_sequence[0];
654 ics->window_sequence[0] = get_bits(gb, 2);
655 ics->use_kb_window[1] = ics->use_kb_window[0];
656 ics->use_kb_window[0] = get_bits1(gb);
657 ics->num_window_groups = 1;
658 ics->group_len[0] = 1;
659 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
661 ics->max_sfb = get_bits(gb, 4);
662 for (i = 0; i < 7; i++) {
664 ics->group_len[ics->num_window_groups - 1]++;
666 ics->num_window_groups++;
667 ics->group_len[ics->num_window_groups - 1] = 1;
670 ics->num_windows = 8;
671 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index];
672 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
673 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index];
674 ics->predictor_present = 0;
676 ics->max_sfb = get_bits(gb, 6);
677 ics->num_windows = 1;
678 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index];
679 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
680 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
681 ics->predictor_present = get_bits1(gb);
682 ics->predictor_reset_group = 0;
683 if (ics->predictor_present) {
684 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
685 if (decode_prediction(ac, ics, gb)) {
686 memset(ics, 0, sizeof(IndividualChannelStream));
689 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
690 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
691 memset(ics, 0, sizeof(IndividualChannelStream));
694 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
695 memset(ics, 0, sizeof(IndividualChannelStream));
701 if (ics->max_sfb > ics->num_swb) {
702 av_log(ac->avctx, AV_LOG_ERROR,
703 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
704 ics->max_sfb, ics->num_swb);
705 memset(ics, 0, sizeof(IndividualChannelStream));
713 * Decode band types (section_data payload); reference: table 4.46.
715 * @param band_type array of the used band type
716 * @param band_type_run_end array of the last scalefactor band of a band type run
718 * @return Returns error status. 0 - OK, !0 - error
720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
721 int band_type_run_end[120], GetBitContext *gb,
722 IndividualChannelStream *ics)
725 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
726 for (g = 0; g < ics->num_window_groups; g++) {
728 while (k < ics->max_sfb) {
729 uint8_t sect_end = k;
731 int sect_band_type = get_bits(gb, 4);
732 if (sect_band_type == 12) {
733 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
736 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
737 sect_end += sect_len_incr;
738 sect_end += sect_len_incr;
739 if (get_bits_left(gb) < 0) {
740 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
743 if (sect_end > ics->max_sfb) {
744 av_log(ac->avctx, AV_LOG_ERROR,
745 "Number of bands (%d) exceeds limit (%d).\n",
746 sect_end, ics->max_sfb);
749 for (; k < sect_end; k++) {
750 band_type [idx] = sect_band_type;
751 band_type_run_end[idx++] = sect_end;
759 * Decode scalefactors; reference: table 4.47.
761 * @param global_gain first scalefactor value as scalefactors are differentially coded
762 * @param band_type array of the used band type
763 * @param band_type_run_end array of the last scalefactor band of a band type run
764 * @param sf array of scalefactors or intensity stereo positions
766 * @return Returns error status. 0 - OK, !0 - error
768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
769 unsigned int global_gain,
770 IndividualChannelStream *ics,
771 enum BandType band_type[120],
772 int band_type_run_end[120])
774 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
776 int offset[3] = { global_gain, global_gain - 90, 100 };
778 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
779 for (g = 0; g < ics->num_window_groups; g++) {
780 for (i = 0; i < ics->max_sfb;) {
781 int run_end = band_type_run_end[idx];
782 if (band_type[idx] == ZERO_BT) {
783 for (; i < run_end; i++, idx++)
785 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
786 for (; i < run_end; i++, idx++) {
787 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
788 if (offset[2] > 255U) {
789 av_log(ac->avctx, AV_LOG_ERROR,
790 "%s (%d) out of range.\n", sf_str[2], offset[2]);
793 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
795 } else if (band_type[idx] == NOISE_BT) {
796 for (; i < run_end; i++, idx++) {
797 if (noise_flag-- > 0)
798 offset[1] += get_bits(gb, 9) - 256;
800 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
801 if (offset[1] > 255U) {
802 av_log(ac->avctx, AV_LOG_ERROR,
803 "%s (%d) out of range.\n", sf_str[1], offset[1]);
806 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
809 for (; i < run_end; i++, idx++) {
810 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
811 if (offset[0] > 255U) {
812 av_log(ac->avctx, AV_LOG_ERROR,
813 "%s (%d) out of range.\n", sf_str[0], offset[0]);
816 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
825 * Decode pulse data; reference: table 4.7.
827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
828 const uint16_t *swb_offset, int num_swb)
831 pulse->num_pulse = get_bits(gb, 2) + 1;
832 pulse_swb = get_bits(gb, 6);
833 if (pulse_swb >= num_swb)
835 pulse->pos[0] = swb_offset[pulse_swb];
836 pulse->pos[0] += get_bits(gb, 5);
837 if (pulse->pos[0] > 1023)
839 pulse->amp[0] = get_bits(gb, 4);
840 for (i = 1; i < pulse->num_pulse; i++) {
841 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
842 if (pulse->pos[i] > 1023)
844 pulse->amp[i] = get_bits(gb, 4);
850 * Decode Temporal Noise Shaping data; reference: table 4.48.
852 * @return Returns error status. 0 - OK, !0 - error
854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
855 GetBitContext *gb, const IndividualChannelStream *ics)
857 int w, filt, i, coef_len, coef_res, coef_compress;
858 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
859 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
860 for (w = 0; w < ics->num_windows; w++) {
861 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
862 coef_res = get_bits1(gb);
864 for (filt = 0; filt < tns->n_filt[w]; filt++) {
866 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
868 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
869 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
870 tns->order[w][filt], tns_max_order);
871 tns->order[w][filt] = 0;
874 if (tns->order[w][filt]) {
875 tns->direction[w][filt] = get_bits1(gb);
876 coef_compress = get_bits1(gb);
877 coef_len = coef_res + 3 - coef_compress;
878 tmp2_idx = 2 * coef_compress + coef_res;
880 for (i = 0; i < tns->order[w][filt]; i++)
881 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
890 * Decode Mid/Side data; reference: table 4.54.
892 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
893 * [1] mask is decoded from bitstream; [2] mask is all 1s;
894 * [3] reserved for scalable AAC
896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
900 if (ms_present == 1) {
901 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
902 cpe->ms_mask[idx] = get_bits1(gb);
903 } else if (ms_present == 2) {
904 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
913 *dst++ = v[idx & 15] * s;
914 *dst++ = v[idx>>4 & 15] * s;
920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
924 *dst++ = v[idx & 3] * s;
925 *dst++ = v[idx>>2 & 3] * s;
926 *dst++ = v[idx>>4 & 3] * s;
927 *dst++ = v[idx>>6 & 3] * s;
933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
934 unsigned sign, const float *scale)
936 union float754 s0, s1;
938 s0.f = s1.f = *scale;
939 s0.i ^= sign >> 1 << 31;
942 *dst++ = v[idx & 15] * s0.f;
943 *dst++ = v[idx>>4 & 15] * s1.f;
950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
951 unsigned sign, const float *scale)
953 unsigned nz = idx >> 12;
954 union float754 s = { .f = *scale };
957 t.i = s.i ^ (sign & 1<<31);
958 *dst++ = v[idx & 3] * t.f;
960 sign <<= nz & 1; nz >>= 1;
961 t.i = s.i ^ (sign & 1<<31);
962 *dst++ = v[idx>>2 & 3] * t.f;
964 sign <<= nz & 1; nz >>= 1;
965 t.i = s.i ^ (sign & 1<<31);
966 *dst++ = v[idx>>4 & 3] * t.f;
968 sign <<= nz & 1; nz >>= 1;
969 t.i = s.i ^ (sign & 1<<31);
970 *dst++ = v[idx>>6 & 3] * t.f;
977 * Decode spectral data; reference: table 4.50.
978 * Dequantize and scale spectral data; reference: 4.6.3.3.
980 * @param coef array of dequantized, scaled spectral data
981 * @param sf array of scalefactors or intensity stereo positions
982 * @param pulse_present set if pulses are present
983 * @param pulse pointer to pulse data struct
984 * @param band_type array of the used band type
986 * @return Returns error status. 0 - OK, !0 - error
988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
989 GetBitContext *gb, const float sf[120],
990 int pulse_present, const Pulse *pulse,
991 const IndividualChannelStream *ics,
992 enum BandType band_type[120])
994 int i, k, g, idx = 0;
995 const int c = 1024 / ics->num_windows;
996 const uint16_t *offsets = ics->swb_offset;
997 float *coef_base = coef;
999 for (g = 0; g < ics->num_windows; g++)
1000 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1002 for (g = 0; g < ics->num_window_groups; g++) {
1003 unsigned g_len = ics->group_len[g];
1005 for (i = 0; i < ics->max_sfb; i++, idx++) {
1006 const unsigned cbt_m1 = band_type[idx] - 1;
1007 float *cfo = coef + offsets[i];
1008 int off_len = offsets[i + 1] - offsets[i];
1011 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1012 for (group = 0; group < g_len; group++, cfo+=128) {
1013 memset(cfo, 0, off_len * sizeof(float));
1015 } else if (cbt_m1 == NOISE_BT - 1) {
1016 for (group = 0; group < g_len; group++, cfo+=128) {
1020 for (k = 0; k < off_len; k++) {
1021 ac->random_state = lcg_random(ac->random_state);
1022 cfo[k] = ac->random_state;
1025 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1026 scale = sf[idx] / sqrtf(band_energy);
1027 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1030 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1031 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1032 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1033 OPEN_READER(re, gb);
1035 switch (cbt_m1 >> 1) {
1037 for (group = 0; group < g_len; group++, cfo+=128) {
1045 UPDATE_CACHE(re, gb);
1046 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1047 cb_idx = cb_vector_idx[code];
1048 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1054 for (group = 0; group < g_len; group++, cfo+=128) {
1064 UPDATE_CACHE(re, gb);
1065 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1066 #if MIN_CACHE_BITS < 20
1067 UPDATE_CACHE(re, gb);
1069 cb_idx = cb_vector_idx[code];
1070 nnz = cb_idx >> 8 & 15;
1071 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1072 LAST_SKIP_BITS(re, gb, nnz);
1073 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1079 for (group = 0; group < g_len; group++, cfo+=128) {
1087 UPDATE_CACHE(re, gb);
1088 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1089 cb_idx = cb_vector_idx[code];
1090 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1097 for (group = 0; group < g_len; group++, cfo+=128) {
1107 UPDATE_CACHE(re, gb);
1108 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1109 cb_idx = cb_vector_idx[code];
1110 nnz = cb_idx >> 8 & 15;
1111 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1112 LAST_SKIP_BITS(re, gb, nnz);
1113 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1119 for (group = 0; group < g_len; group++, cfo+=128) {
1121 uint32_t *icf = (uint32_t *) cf;
1131 UPDATE_CACHE(re, gb);
1132 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1140 cb_idx = cb_vector_idx[code];
1143 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1144 LAST_SKIP_BITS(re, gb, nnz);
1146 for (j = 0; j < 2; j++) {
1150 /* The total length of escape_sequence must be < 22 bits according
1151 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1152 UPDATE_CACHE(re, gb);
1153 b = GET_CACHE(re, gb);
1154 b = 31 - av_log2(~b);
1157 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1161 #if MIN_CACHE_BITS < 21
1162 LAST_SKIP_BITS(re, gb, b + 1);
1163 UPDATE_CACHE(re, gb);
1165 SKIP_BITS(re, gb, b + 1);
1168 n = (1 << b) + SHOW_UBITS(re, gb, b);
1169 LAST_SKIP_BITS(re, gb, b);
1170 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1173 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1174 *icf++ = (bits & 1<<31) | v;
1181 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1185 CLOSE_READER(re, gb);
1191 if (pulse_present) {
1193 for (i = 0; i < pulse->num_pulse; i++) {
1194 float co = coef_base[ pulse->pos[i] ];
1195 while (offsets[idx + 1] <= pulse->pos[i])
1197 if (band_type[idx] != NOISE_BT && sf[idx]) {
1198 float ico = -pulse->amp[i];
1201 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1203 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1210 static av_always_inline float flt16_round(float pf)
1214 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1218 static av_always_inline float flt16_even(float pf)
1222 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1226 static av_always_inline float flt16_trunc(float pf)
1230 pun.i &= 0xFFFF0000U;
1234 static av_always_inline void predict(PredictorState *ps, float *coef,
1235 float sf_scale, float inv_sf_scale,
1238 const float a = 0.953125; // 61.0 / 64
1239 const float alpha = 0.90625; // 29.0 / 32
1243 float r0 = ps->r0, r1 = ps->r1;
1244 float cor0 = ps->cor0, cor1 = ps->cor1;
1245 float var0 = ps->var0, var1 = ps->var1;
1247 k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1248 k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1250 pv = flt16_round(k1 * r0 + k2 * r1);
1252 *coef += pv * sf_scale;
1254 e0 = *coef * inv_sf_scale;
1257 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1258 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1259 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1260 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1262 ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1263 ps->r0 = flt16_trunc(a * e0);
1267 * Apply AAC-Main style frequency domain prediction.
1269 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1272 float sf_scale = ac->sf_scale, inv_sf_scale = 1 / ac->sf_scale;
1274 if (!sce->ics.predictor_initialized) {
1275 reset_all_predictors(sce->predictor_state);
1276 sce->ics.predictor_initialized = 1;
1279 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1280 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1281 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1282 predict(&sce->predictor_state[k], &sce->coeffs[k],
1283 sf_scale, inv_sf_scale,
1284 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1287 if (sce->ics.predictor_reset_group)
1288 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1290 reset_all_predictors(sce->predictor_state);
1294 * Decode an individual_channel_stream payload; reference: table 4.44.
1296 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1297 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1299 * @return Returns error status. 0 - OK, !0 - error
1301 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1302 GetBitContext *gb, int common_window, int scale_flag)
1305 TemporalNoiseShaping *tns = &sce->tns;
1306 IndividualChannelStream *ics = &sce->ics;
1307 float *out = sce->coeffs;
1308 int global_gain, pulse_present = 0;
1310 /* This assignment is to silence a GCC warning about the variable being used
1311 * uninitialized when in fact it always is.
1313 pulse.num_pulse = 0;
1315 global_gain = get_bits(gb, 8);
1317 if (!common_window && !scale_flag) {
1318 if (decode_ics_info(ac, ics, gb, 0) < 0)
1322 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1324 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1329 if ((pulse_present = get_bits1(gb))) {
1330 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1331 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1334 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1335 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1339 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1341 if (get_bits1(gb)) {
1342 av_log_missing_feature(ac->avctx, "SSR", 1);
1347 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1350 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1351 apply_prediction(ac, sce);
1357 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1359 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1361 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1362 float *ch0 = cpe->ch[0].coeffs;
1363 float *ch1 = cpe->ch[1].coeffs;
1364 int g, i, group, idx = 0;
1365 const uint16_t *offsets = ics->swb_offset;
1366 for (g = 0; g < ics->num_window_groups; g++) {
1367 for (i = 0; i < ics->max_sfb; i++, idx++) {
1368 if (cpe->ms_mask[idx] &&
1369 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1370 for (group = 0; group < ics->group_len[g]; group++) {
1371 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1372 ch1 + group * 128 + offsets[i],
1373 offsets[i+1] - offsets[i]);
1377 ch0 += ics->group_len[g] * 128;
1378 ch1 += ics->group_len[g] * 128;
1383 * intensity stereo decoding; reference: 4.6.8.2.3
1385 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1386 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1387 * [3] reserved for scalable AAC
1389 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1391 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1392 SingleChannelElement *sce1 = &cpe->ch[1];
1393 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1394 const uint16_t *offsets = ics->swb_offset;
1395 int g, group, i, k, idx = 0;
1398 for (g = 0; g < ics->num_window_groups; g++) {
1399 for (i = 0; i < ics->max_sfb;) {
1400 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1401 const int bt_run_end = sce1->band_type_run_end[idx];
1402 for (; i < bt_run_end; i++, idx++) {
1403 c = -1 + 2 * (sce1->band_type[idx] - 14);
1405 c *= 1 - 2 * cpe->ms_mask[idx];
1406 scale = c * sce1->sf[idx];
1407 for (group = 0; group < ics->group_len[g]; group++)
1408 for (k = offsets[i]; k < offsets[i + 1]; k++)
1409 coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1412 int bt_run_end = sce1->band_type_run_end[idx];
1413 idx += bt_run_end - i;
1417 coef0 += ics->group_len[g] * 128;
1418 coef1 += ics->group_len[g] * 128;
1423 * Decode a channel_pair_element; reference: table 4.4.
1425 * @return Returns error status. 0 - OK, !0 - error
1427 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1429 int i, ret, common_window, ms_present = 0;
1431 common_window = get_bits1(gb);
1432 if (common_window) {
1433 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1435 i = cpe->ch[1].ics.use_kb_window[0];
1436 cpe->ch[1].ics = cpe->ch[0].ics;
1437 cpe->ch[1].ics.use_kb_window[1] = i;
1438 ms_present = get_bits(gb, 2);
1439 if (ms_present == 3) {
1440 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1442 } else if (ms_present)
1443 decode_mid_side_stereo(cpe, gb, ms_present);
1445 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1447 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1450 if (common_window) {
1452 apply_mid_side_stereo(ac, cpe);
1453 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1454 apply_prediction(ac, &cpe->ch[0]);
1455 apply_prediction(ac, &cpe->ch[1]);
1459 apply_intensity_stereo(cpe, ms_present);
1463 static const float cce_scale[] = {
1464 1.09050773266525765921, //2^(1/8)
1465 1.18920711500272106672, //2^(1/4)
1471 * Decode coupling_channel_element; reference: table 4.8.
1473 * @return Returns error status. 0 - OK, !0 - error
1475 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1481 SingleChannelElement *sce = &che->ch[0];
1482 ChannelCoupling *coup = &che->coup;
1484 coup->coupling_point = 2 * get_bits1(gb);
1485 coup->num_coupled = get_bits(gb, 3);
1486 for (c = 0; c <= coup->num_coupled; c++) {
1488 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1489 coup->id_select[c] = get_bits(gb, 4);
1490 if (coup->type[c] == TYPE_CPE) {
1491 coup->ch_select[c] = get_bits(gb, 2);
1492 if (coup->ch_select[c] == 3)
1495 coup->ch_select[c] = 2;
1497 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1499 sign = get_bits(gb, 1);
1500 scale = cce_scale[get_bits(gb, 2)];
1502 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1505 for (c = 0; c < num_gain; c++) {
1509 float gain_cache = 1.;
1511 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1512 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1513 gain_cache = powf(scale, -gain);
1515 if (coup->coupling_point == AFTER_IMDCT) {
1516 coup->gain[c][0] = gain_cache;
1518 for (g = 0; g < sce->ics.num_window_groups; g++) {
1519 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1520 if (sce->band_type[idx] != ZERO_BT) {
1522 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1530 gain_cache = powf(scale, -t) * s;
1533 coup->gain[c][idx] = gain_cache;
1543 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1545 * @return Returns number of bytes consumed.
1547 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1551 int num_excl_chan = 0;
1554 for (i = 0; i < 7; i++)
1555 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1556 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1558 return num_excl_chan / 7;
1562 * Decode dynamic range information; reference: table 4.52.
1564 * @param cnt length of TYPE_FIL syntactic element in bytes
1566 * @return Returns number of bytes consumed.
1568 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1569 GetBitContext *gb, int cnt)
1572 int drc_num_bands = 1;
1575 /* pce_tag_present? */
1576 if (get_bits1(gb)) {
1577 che_drc->pce_instance_tag = get_bits(gb, 4);
1578 skip_bits(gb, 4); // tag_reserved_bits
1582 /* excluded_chns_present? */
1583 if (get_bits1(gb)) {
1584 n += decode_drc_channel_exclusions(che_drc, gb);
1587 /* drc_bands_present? */
1588 if (get_bits1(gb)) {
1589 che_drc->band_incr = get_bits(gb, 4);
1590 che_drc->interpolation_scheme = get_bits(gb, 4);
1592 drc_num_bands += che_drc->band_incr;
1593 for (i = 0; i < drc_num_bands; i++) {
1594 che_drc->band_top[i] = get_bits(gb, 8);
1599 /* prog_ref_level_present? */
1600 if (get_bits1(gb)) {
1601 che_drc->prog_ref_level = get_bits(gb, 7);
1602 skip_bits1(gb); // prog_ref_level_reserved_bits
1606 for (i = 0; i < drc_num_bands; i++) {
1607 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1608 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1616 * Decode extension data (incomplete); reference: table 4.51.
1618 * @param cnt length of TYPE_FIL syntactic element in bytes
1620 * @return Returns number of bytes consumed
1622 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1623 ChannelElement *che, enum RawDataBlockType elem_type)
1627 switch (get_bits(gb, 4)) { // extension type
1628 case EXT_SBR_DATA_CRC:
1632 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1634 } else if (!ac->m4ac.sbr) {
1635 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1636 skip_bits_long(gb, 8 * cnt - 4);
1638 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1639 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1640 skip_bits_long(gb, 8 * cnt - 4);
1642 } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1645 output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1649 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1651 case EXT_DYNAMIC_RANGE:
1652 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1656 case EXT_DATA_ELEMENT:
1658 skip_bits_long(gb, 8 * cnt - 4);
1665 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1667 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1668 * @param coef spectral coefficients
1670 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1671 IndividualChannelStream *ics, int decode)
1673 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1675 int bottom, top, order, start, end, size, inc;
1676 float lpc[TNS_MAX_ORDER];
1678 for (w = 0; w < ics->num_windows; w++) {
1679 bottom = ics->num_swb;
1680 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1682 bottom = FFMAX(0, top - tns->length[w][filt]);
1683 order = tns->order[w][filt];
1688 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1690 start = ics->swb_offset[FFMIN(bottom, mmm)];
1691 end = ics->swb_offset[FFMIN( top, mmm)];
1692 if ((size = end - start) <= 0)
1694 if (tns->direction[w][filt]) {
1703 for (m = 0; m < size; m++, start += inc)
1704 for (i = 1; i <= FFMIN(m, order); i++)
1705 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1711 * Conduct IMDCT and windowing.
1713 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1715 IndividualChannelStream *ics = &sce->ics;
1716 float *in = sce->coeffs;
1717 float *out = sce->ret;
1718 float *saved = sce->saved;
1719 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1720 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1721 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1722 float *buf = ac->buf_mdct;
1723 float *temp = ac->temp;
1727 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1728 for (i = 0; i < 1024; i += 128)
1729 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1731 ff_imdct_half(&ac->mdct, buf, in);
1733 /* window overlapping
1734 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1735 * and long to short transitions are considered to be short to short
1736 * transitions. This leaves just two cases (long to long and short to short)
1737 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1739 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1740 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1741 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512);
1743 for (i = 0; i < 448; i++)
1744 out[i] = saved[i] + bias;
1746 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1747 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64);
1748 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64);
1749 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64);
1750 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64);
1751 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64);
1752 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1754 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64);
1755 for (i = 576; i < 1024; i++)
1756 out[i] = buf[i-512] + bias;
1761 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1762 for (i = 0; i < 64; i++)
1763 saved[i] = temp[64 + i] - bias;
1764 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1765 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1766 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1767 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1768 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1769 memcpy( saved, buf + 512, 448 * sizeof(float));
1770 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1771 } else { // LONG_STOP or ONLY_LONG
1772 memcpy( saved, buf + 512, 512 * sizeof(float));
1777 * Apply dependent channel coupling (applied before IMDCT).
1779 * @param index index into coupling gain array
1781 static void apply_dependent_coupling(AACContext *ac,
1782 SingleChannelElement *target,
1783 ChannelElement *cce, int index)
1785 IndividualChannelStream *ics = &cce->ch[0].ics;
1786 const uint16_t *offsets = ics->swb_offset;
1787 float *dest = target->coeffs;
1788 const float *src = cce->ch[0].coeffs;
1789 int g, i, group, k, idx = 0;
1790 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1791 av_log(ac->avctx, AV_LOG_ERROR,
1792 "Dependent coupling is not supported together with LTP\n");
1795 for (g = 0; g < ics->num_window_groups; g++) {
1796 for (i = 0; i < ics->max_sfb; i++, idx++) {
1797 if (cce->ch[0].band_type[idx] != ZERO_BT) {
1798 const float gain = cce->coup.gain[index][idx];
1799 for (group = 0; group < ics->group_len[g]; group++) {
1800 for (k = offsets[i]; k < offsets[i + 1]; k++) {
1802 dest[group * 128 + k] += gain * src[group * 128 + k];
1807 dest += ics->group_len[g] * 128;
1808 src += ics->group_len[g] * 128;
1813 * Apply independent channel coupling (applied after IMDCT).
1815 * @param index index into coupling gain array
1817 static void apply_independent_coupling(AACContext *ac,
1818 SingleChannelElement *target,
1819 ChannelElement *cce, int index)
1822 const float gain = cce->coup.gain[index][0];
1823 const float bias = ac->add_bias;
1824 const float *src = cce->ch[0].ret;
1825 float *dest = target->ret;
1826 const int len = 1024 << (ac->m4ac.sbr == 1);
1828 for (i = 0; i < len; i++)
1829 dest[i] += gain * (src[i] - bias);
1833 * channel coupling transformation interface
1835 * @param apply_coupling_method pointer to (in)dependent coupling function
1837 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1838 enum RawDataBlockType type, int elem_id,
1839 enum CouplingPoint coupling_point,
1840 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1844 for (i = 0; i < MAX_ELEM_ID; i++) {
1845 ChannelElement *cce = ac->che[TYPE_CCE][i];
1848 if (cce && cce->coup.coupling_point == coupling_point) {
1849 ChannelCoupling *coup = &cce->coup;
1851 for (c = 0; c <= coup->num_coupled; c++) {
1852 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1853 if (coup->ch_select[c] != 1) {
1854 apply_coupling_method(ac, &cc->ch[0], cce, index);
1855 if (coup->ch_select[c] != 0)
1858 if (coup->ch_select[c] != 2)
1859 apply_coupling_method(ac, &cc->ch[1], cce, index++);
1861 index += 1 + (coup->ch_select[c] == 3);
1868 * Convert spectral data to float samples, applying all supported tools as appropriate.
1870 static void spectral_to_sample(AACContext *ac)
1873 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1874 for (type = 3; type >= 0; type--) {
1875 for (i = 0; i < MAX_ELEM_ID; i++) {
1876 ChannelElement *che = ac->che[type][i];
1878 if (type <= TYPE_CPE)
1879 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1880 if (che->ch[0].tns.present)
1881 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1882 if (che->ch[1].tns.present)
1883 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1884 if (type <= TYPE_CPE)
1885 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1886 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1887 imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1888 if (type == TYPE_CPE) {
1889 imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1891 if (ac->m4ac.sbr > 0) {
1892 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1895 if (type <= TYPE_CCE)
1896 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1902 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1905 AACADTSHeaderInfo hdr_info;
1907 size = ff_aac_parse_header(gb, &hdr_info);
1909 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1910 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1911 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1912 ac->m4ac.chan_config = hdr_info.chan_config;
1913 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1915 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1917 } else if (ac->output_configured != OC_LOCKED) {
1918 ac->output_configured = OC_NONE;
1920 if (ac->output_configured != OC_LOCKED) {
1924 ac->m4ac.sample_rate = hdr_info.sample_rate;
1925 ac->m4ac.sampling_index = hdr_info.sampling_index;
1926 ac->m4ac.object_type = hdr_info.object_type;
1927 if (!ac->avctx->sample_rate)
1928 ac->avctx->sample_rate = hdr_info.sample_rate;
1929 if (hdr_info.num_aac_frames == 1) {
1930 if (!hdr_info.crc_absent)
1933 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1940 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1941 int *data_size, AVPacket *avpkt)
1943 const uint8_t *buf = avpkt->data;
1944 int buf_size = avpkt->size;
1945 AACContext *ac = avctx->priv_data;
1946 ChannelElement *che = NULL, *che_prev = NULL;
1948 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1949 int err, elem_id, data_size_tmp;
1951 int samples = 0, multiplier;
1954 init_get_bits(&gb, buf, buf_size * 8);
1956 if (show_bits(&gb, 12) == 0xfff) {
1957 if (parse_adts_frame_header(ac, &gb) < 0) {
1958 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1961 if (ac->m4ac.sampling_index > 12) {
1962 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1967 memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1969 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1970 elem_id = get_bits(&gb, 4);
1972 if (elem_type < TYPE_DSE) {
1973 if (!(che=get_che(ac, elem_type, elem_id))) {
1974 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
1975 elem_type, elem_id);
1981 switch (elem_type) {
1984 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1988 err = decode_cpe(ac, &gb, che);
1992 err = decode_cce(ac, &gb, che);
1996 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2000 err = skip_data_stream_element(ac, &gb);
2004 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2005 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2006 if ((err = decode_pce(ac, new_che_pos, &gb)))
2008 if (ac->output_configured > OC_TRIAL_PCE)
2009 av_log(avctx, AV_LOG_ERROR,
2010 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2012 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2018 elem_id += get_bits(&gb, 8) - 1;
2019 if (get_bits_left(&gb) < 8 * elem_id) {
2020 av_log(avctx, AV_LOG_ERROR, overread_err);
2024 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2025 err = 0; /* FIXME */
2029 err = -1; /* should not happen, but keeps compiler happy */
2034 elem_type_prev = elem_type;
2039 if (get_bits_left(&gb) < 3) {
2040 av_log(avctx, AV_LOG_ERROR, overread_err);
2045 spectral_to_sample(ac);
2047 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2048 samples <<= multiplier;
2049 if (ac->output_configured < OC_LOCKED) {
2050 avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2051 avctx->frame_size = samples;
2054 data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2055 if (*data_size < data_size_tmp) {
2056 av_log(avctx, AV_LOG_ERROR,
2057 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2058 *data_size, data_size_tmp);
2061 *data_size = data_size_tmp;
2064 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2066 if (ac->output_configured)
2067 ac->output_configured = OC_LOCKED;
2069 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2070 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2071 if (buf[buf_offset])
2074 return buf_size > buf_offset ? buf_consumed : buf_size;
2077 static av_cold int aac_decode_close(AVCodecContext *avctx)
2079 AACContext *ac = avctx->priv_data;
2082 for (i = 0; i < MAX_ELEM_ID; i++) {
2083 for (type = 0; type < 4; type++) {
2084 if (ac->che[type][i])
2085 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2086 av_freep(&ac->che[type][i]);
2090 ff_mdct_end(&ac->mdct);
2091 ff_mdct_end(&ac->mdct_small);
2095 AVCodec aac_decoder = {
2104 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2105 .sample_fmts = (const enum SampleFormat[]) {
2106 SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2108 .channel_layouts = aac_channel_layout,