3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
34 * N (code in SoC repo) gain control
36 * Y window shapes - standard
37 * N window shapes - Low Delay
38 * Y filterbank - standard
39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction
44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution
47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch
50 * Y quantization & coding - AAC
51 * N quantization & coding - TwinVQ
52 * N quantization & coding - BSAC
53 * N AAC Error Resilience tools
54 * N Error Resilience payload syntax
55 * N Error Protection tool
57 * N Silence Compression
60 * N Structured Audio tools
61 * N Structured Audio Sample Bank Format
63 * N Harmonic and Individual Lines plus Noise
64 * N Text-To-Speech Interface
65 * Y Spectral Band Replication
66 * Y (not in this code) Layer-1
67 * Y (not in this code) Layer-2
68 * Y (not in this code) Layer-3
69 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
71 * N Direct Stream Transfer
73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
88 #include "aacdectab.h"
89 #include "cbrt_tablegen.h"
92 #include "mpeg4audio.h"
93 #include "aac_parser.h"
101 # include "arm/aac.h"
109 static VLC vlc_scalefactors;
110 static VLC vlc_spectral[11];
112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
116 /* Some buggy encoders appear to set all elem_ids to zero and rely on
117 channels always occurring in the same order. This is expressly forbidden
118 by the spec but we will try to work around it.
121 while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
122 if (ac->output_configured < OC_LOCKED && !err_printed) {
123 av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
128 if (elem_id == MAX_ELEM_ID)
130 ac->tags_seen_this_frame[type][elem_id] = 1;
132 if (ac->tag_che_map[type][elem_id]) {
133 return ac->tag_che_map[type][elem_id];
135 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
138 switch (ac->m4ac.chan_config) {
140 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
142 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
145 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
147 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
150 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
153 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
155 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
158 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
160 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
164 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
166 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
167 } else if (ac->m4ac.chan_config == 2) {
171 if (!ac->tags_mapped && type == TYPE_SCE) {
173 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
181 * Check for the channel element in the current channel position configuration.
182 * If it exists, make sure the appropriate element is allocated and map the
183 * channel order to match the internal FFmpeg channel layout.
185 * @param che_pos current channel position configuration
186 * @param type channel element type
187 * @param id channel element id
188 * @param channels count of the number of channels in the configuration
190 * @return Returns error status. 0 - OK, !0 - error
192 static av_cold int che_configure(AACContext *ac,
193 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
197 if (che_pos[type][id]) {
198 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
199 return AVERROR(ENOMEM);
200 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 if (type != TYPE_CCE) {
202 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 if (type == TYPE_CPE ||
204 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
205 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
209 if (ac->che[type][id])
210 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
211 av_freep(&ac->che[type][id]);
217 * Configure output channel order based on the current program configuration element.
219 * @param che_pos current channel position configuration
220 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
222 * @return Returns error status. 0 - OK, !0 - error
224 static av_cold int output_configure(AACContext *ac,
225 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
226 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
227 int channel_config, enum OCStatus oc_type)
229 AVCodecContext *avctx = ac->avctx;
230 int i, type, channels = 0, ret;
232 if (new_che_pos != che_pos)
233 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
235 if (channel_config) {
236 for (i = 0; i < tags_per_config[channel_config]; i++) {
237 if ((ret = che_configure(ac, che_pos,
238 aac_channel_layout_map[channel_config - 1][i][0],
239 aac_channel_layout_map[channel_config - 1][i][1],
244 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
247 avctx->channel_layout = aac_channel_layout[channel_config - 1];
249 /* Allocate or free elements depending on if they are in the
250 * current program configuration.
252 * Set up default 1:1 output mapping.
254 * For a 5.1 stream the output order will be:
255 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
258 for (i = 0; i < MAX_ELEM_ID; i++) {
259 for (type = 0; type < 4; type++) {
260 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
265 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
266 ac->tags_mapped = 4 * MAX_ELEM_ID;
268 avctx->channel_layout = 0;
271 avctx->channels = channels;
273 ac->output_configured = oc_type;
279 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
281 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
282 * @param sce_map mono (Single Channel Element) map
283 * @param type speaker type/position for these channels
285 static void decode_channel_map(enum ChannelPosition *cpe_map,
286 enum ChannelPosition *sce_map,
287 enum ChannelPosition type,
288 GetBitContext *gb, int n)
291 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
292 map[get_bits(gb, 4)] = type;
297 * Decode program configuration element; reference: table 4.2.
299 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
301 * @return Returns error status. 0 - OK, !0 - error
303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
306 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
309 skip_bits(gb, 2); // object_type
311 sampling_index = get_bits(gb, 4);
312 if (ac->m4ac.sampling_index != sampling_index)
313 av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
315 num_front = get_bits(gb, 4);
316 num_side = get_bits(gb, 4);
317 num_back = get_bits(gb, 4);
318 num_lfe = get_bits(gb, 2);
319 num_assoc_data = get_bits(gb, 3);
320 num_cc = get_bits(gb, 4);
323 skip_bits(gb, 4); // mono_mixdown_tag
325 skip_bits(gb, 4); // stereo_mixdown_tag
328 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
330 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
331 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
332 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
333 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
335 skip_bits_long(gb, 4 * num_assoc_data);
337 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
341 /* comment field, first byte is length */
342 comment_len = get_bits(gb, 8) * 8;
343 if (get_bits_left(gb) < comment_len) {
344 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
347 skip_bits_long(gb, comment_len);
352 * Set up channel positions based on a default channel configuration
353 * as specified in table 1.17.
355 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
357 * @return Returns error status. 0 - OK, !0 - error
359 static av_cold int set_default_channel_config(AACContext *ac,
360 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
363 if (channel_config < 1 || channel_config > 7) {
364 av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
369 /* default channel configurations:
371 * 1ch : front center (mono)
372 * 2ch : L + R (stereo)
373 * 3ch : front center + L + R
374 * 4ch : front center + L + R + back center
375 * 5ch : front center + L + R + back stereo
376 * 6ch : front center + L + R + back stereo + LFE
377 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
380 if (channel_config != 2)
381 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
382 if (channel_config > 1)
383 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
384 if (channel_config == 4)
385 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center
386 if (channel_config > 4)
387 new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
388 = AAC_CHANNEL_BACK; // back stereo
389 if (channel_config > 5)
390 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE
391 if (channel_config == 7)
392 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
398 * Decode GA "General Audio" specific configuration; reference: table 4.1.
400 * @return Returns error status. 0 - OK, !0 - error
402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
405 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
406 int extension_flag, ret;
408 if (get_bits1(gb)) { // frameLengthFlag
409 av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
413 if (get_bits1(gb)) // dependsOnCoreCoder
414 skip_bits(gb, 14); // coreCoderDelay
415 extension_flag = get_bits1(gb);
417 if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
418 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
419 skip_bits(gb, 3); // layerNr
421 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
422 if (channel_config == 0) {
423 skip_bits(gb, 4); // element_instance_tag
424 if ((ret = decode_pce(ac, new_che_pos, gb)))
427 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
430 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
433 if (extension_flag) {
434 switch (ac->m4ac.object_type) {
436 skip_bits(gb, 5); // numOfSubFrame
437 skip_bits(gb, 11); // layer_length
441 case AOT_ER_AAC_SCALABLE:
443 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
444 * aacScalefactorDataResilienceFlag
445 * aacSpectralDataResilienceFlag
449 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
455 * Decode audio specific configuration; reference: table 1.13.
457 * @param data pointer to AVCodecContext extradata
458 * @param data_size size of AVCCodecContext extradata
460 * @return Returns error status. 0 - OK, !0 - error
462 static int decode_audio_specific_config(AACContext *ac, void *data,
468 init_get_bits(&gb, data, data_size * 8);
470 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
472 if (ac->m4ac.sampling_index > 12) {
473 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
476 if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
479 skip_bits_long(&gb, i);
481 switch (ac->m4ac.object_type) {
484 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
488 av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
489 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
496 * linear congruential pseudorandom number generator
498 * @param previous_val pointer to the current state of the generator
500 * @return Returns a 32-bit pseudorandom integer
502 static av_always_inline int lcg_random(int previous_val)
504 return previous_val * 1664525 + 1013904223;
507 static av_always_inline void reset_predict_state(PredictorState *ps)
517 static void reset_all_predictors(PredictorState *ps)
520 for (i = 0; i < MAX_PREDICTORS; i++)
521 reset_predict_state(&ps[i]);
524 static void reset_predictor_group(PredictorState *ps, int group_num)
527 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
528 reset_predict_state(&ps[i]);
531 #define AAC_INIT_VLC_STATIC(num, size) \
532 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
533 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
534 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
537 static av_cold int aac_decode_init(AVCodecContext *avctx)
539 AACContext *ac = avctx->priv_data;
542 ac->m4ac.sample_rate = avctx->sample_rate;
544 if (avctx->extradata_size > 0) {
545 if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
549 avctx->sample_fmt = SAMPLE_FMT_S16;
551 AAC_INIT_VLC_STATIC( 0, 304);
552 AAC_INIT_VLC_STATIC( 1, 270);
553 AAC_INIT_VLC_STATIC( 2, 550);
554 AAC_INIT_VLC_STATIC( 3, 300);
555 AAC_INIT_VLC_STATIC( 4, 328);
556 AAC_INIT_VLC_STATIC( 5, 294);
557 AAC_INIT_VLC_STATIC( 6, 306);
558 AAC_INIT_VLC_STATIC( 7, 268);
559 AAC_INIT_VLC_STATIC( 8, 510);
560 AAC_INIT_VLC_STATIC( 9, 366);
561 AAC_INIT_VLC_STATIC(10, 462);
565 dsputil_init(&ac->dsp, avctx);
567 ac->random_state = 0x1f2e3d4c;
569 // -1024 - Compensate wrong IMDCT method.
570 // 32768 - Required to scale values to the correct range for the bias method
571 // for float to int16 conversion.
573 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
574 ac->add_bias = 385.0f;
575 ac->sf_scale = 1. / (-1024. * 32768.);
579 ac->sf_scale = 1. / -1024.;
585 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
586 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
587 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
590 ff_mdct_init(&ac->mdct, 11, 1, 1.0);
591 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
592 // window initialization
593 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
594 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
595 ff_init_ff_sine_windows(10);
596 ff_init_ff_sine_windows( 7);
604 * Skip data_stream_element; reference: table 4.10.
606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
608 int byte_align = get_bits1(gb);
609 int count = get_bits(gb, 8);
611 count += get_bits(gb, 8);
615 if (get_bits_left(gb) < 8 * count) {
616 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
619 skip_bits_long(gb, 8 * count);
623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
628 ics->predictor_reset_group = get_bits(gb, 5);
629 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
630 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
634 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
635 ics->prediction_used[sfb] = get_bits1(gb);
641 * Decode Individual Channel Stream info; reference: table 4.6.
643 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
646 GetBitContext *gb, int common_window)
649 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
650 memset(ics, 0, sizeof(IndividualChannelStream));
653 ics->window_sequence[1] = ics->window_sequence[0];
654 ics->window_sequence[0] = get_bits(gb, 2);
655 ics->use_kb_window[1] = ics->use_kb_window[0];
656 ics->use_kb_window[0] = get_bits1(gb);
657 ics->num_window_groups = 1;
658 ics->group_len[0] = 1;
659 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
661 ics->max_sfb = get_bits(gb, 4);
662 for (i = 0; i < 7; i++) {
664 ics->group_len[ics->num_window_groups - 1]++;
666 ics->num_window_groups++;
667 ics->group_len[ics->num_window_groups - 1] = 1;
670 ics->num_windows = 8;
671 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index];
672 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
673 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index];
674 ics->predictor_present = 0;
676 ics->max_sfb = get_bits(gb, 6);
677 ics->num_windows = 1;
678 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index];
679 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
680 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
681 ics->predictor_present = get_bits1(gb);
682 ics->predictor_reset_group = 0;
683 if (ics->predictor_present) {
684 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
685 if (decode_prediction(ac, ics, gb)) {
686 memset(ics, 0, sizeof(IndividualChannelStream));
689 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
690 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
691 memset(ics, 0, sizeof(IndividualChannelStream));
694 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
695 memset(ics, 0, sizeof(IndividualChannelStream));
701 if (ics->max_sfb > ics->num_swb) {
702 av_log(ac->avctx, AV_LOG_ERROR,
703 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
704 ics->max_sfb, ics->num_swb);
705 memset(ics, 0, sizeof(IndividualChannelStream));
713 * Decode band types (section_data payload); reference: table 4.46.
715 * @param band_type array of the used band type
716 * @param band_type_run_end array of the last scalefactor band of a band type run
718 * @return Returns error status. 0 - OK, !0 - error
720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
721 int band_type_run_end[120], GetBitContext *gb,
722 IndividualChannelStream *ics)
725 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
726 for (g = 0; g < ics->num_window_groups; g++) {
728 while (k < ics->max_sfb) {
729 uint8_t sect_end = k;
731 int sect_band_type = get_bits(gb, 4);
732 if (sect_band_type == 12) {
733 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
736 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
737 sect_end += sect_len_incr;
738 sect_end += sect_len_incr;
739 if (get_bits_left(gb) < 0) {
740 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
743 if (sect_end > ics->max_sfb) {
744 av_log(ac->avctx, AV_LOG_ERROR,
745 "Number of bands (%d) exceeds limit (%d).\n",
746 sect_end, ics->max_sfb);
749 for (; k < sect_end; k++) {
750 band_type [idx] = sect_band_type;
751 band_type_run_end[idx++] = sect_end;
759 * Decode scalefactors; reference: table 4.47.
761 * @param global_gain first scalefactor value as scalefactors are differentially coded
762 * @param band_type array of the used band type
763 * @param band_type_run_end array of the last scalefactor band of a band type run
764 * @param sf array of scalefactors or intensity stereo positions
766 * @return Returns error status. 0 - OK, !0 - error
768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
769 unsigned int global_gain,
770 IndividualChannelStream *ics,
771 enum BandType band_type[120],
772 int band_type_run_end[120])
774 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
776 int offset[3] = { global_gain, global_gain - 90, 100 };
778 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
779 for (g = 0; g < ics->num_window_groups; g++) {
780 for (i = 0; i < ics->max_sfb;) {
781 int run_end = band_type_run_end[idx];
782 if (band_type[idx] == ZERO_BT) {
783 for (; i < run_end; i++, idx++)
785 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
786 for (; i < run_end; i++, idx++) {
787 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
788 if (offset[2] > 255U) {
789 av_log(ac->avctx, AV_LOG_ERROR,
790 "%s (%d) out of range.\n", sf_str[2], offset[2]);
793 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
795 } else if (band_type[idx] == NOISE_BT) {
796 for (; i < run_end; i++, idx++) {
797 if (noise_flag-- > 0)
798 offset[1] += get_bits(gb, 9) - 256;
800 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
801 if (offset[1] > 255U) {
802 av_log(ac->avctx, AV_LOG_ERROR,
803 "%s (%d) out of range.\n", sf_str[1], offset[1]);
806 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
809 for (; i < run_end; i++, idx++) {
810 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
811 if (offset[0] > 255U) {
812 av_log(ac->avctx, AV_LOG_ERROR,
813 "%s (%d) out of range.\n", sf_str[0], offset[0]);
816 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
825 * Decode pulse data; reference: table 4.7.
827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
828 const uint16_t *swb_offset, int num_swb)
831 pulse->num_pulse = get_bits(gb, 2) + 1;
832 pulse_swb = get_bits(gb, 6);
833 if (pulse_swb >= num_swb)
835 pulse->pos[0] = swb_offset[pulse_swb];
836 pulse->pos[0] += get_bits(gb, 5);
837 if (pulse->pos[0] > 1023)
839 pulse->amp[0] = get_bits(gb, 4);
840 for (i = 1; i < pulse->num_pulse; i++) {
841 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
842 if (pulse->pos[i] > 1023)
844 pulse->amp[i] = get_bits(gb, 4);
850 * Decode Temporal Noise Shaping data; reference: table 4.48.
852 * @return Returns error status. 0 - OK, !0 - error
854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
855 GetBitContext *gb, const IndividualChannelStream *ics)
857 int w, filt, i, coef_len, coef_res, coef_compress;
858 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
859 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
860 for (w = 0; w < ics->num_windows; w++) {
861 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
862 coef_res = get_bits1(gb);
864 for (filt = 0; filt < tns->n_filt[w]; filt++) {
866 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
868 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
869 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
870 tns->order[w][filt], tns_max_order);
871 tns->order[w][filt] = 0;
874 if (tns->order[w][filt]) {
875 tns->direction[w][filt] = get_bits1(gb);
876 coef_compress = get_bits1(gb);
877 coef_len = coef_res + 3 - coef_compress;
878 tmp2_idx = 2 * coef_compress + coef_res;
880 for (i = 0; i < tns->order[w][filt]; i++)
881 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
890 * Decode Mid/Side data; reference: table 4.54.
892 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
893 * [1] mask is decoded from bitstream; [2] mask is all 1s;
894 * [3] reserved for scalable AAC
896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
900 if (ms_present == 1) {
901 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
902 cpe->ms_mask[idx] = get_bits1(gb);
903 } else if (ms_present == 2) {
904 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
913 *dst++ = v[idx & 15] * s;
914 *dst++ = v[idx>>4 & 15] * s;
920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
924 *dst++ = v[idx & 3] * s;
925 *dst++ = v[idx>>2 & 3] * s;
926 *dst++ = v[idx>>4 & 3] * s;
927 *dst++ = v[idx>>6 & 3] * s;
933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
934 unsigned sign, const float *scale)
936 union float754 s0, s1;
938 s0.f = s1.f = *scale;
939 s0.i ^= sign >> 1 << 31;
942 *dst++ = v[idx & 15] * s0.f;
943 *dst++ = v[idx>>4 & 15] * s1.f;
950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
951 unsigned sign, const float *scale)
953 unsigned nz = idx >> 12;
954 union float754 s = { .f = *scale };
957 t.i = s.i ^ (sign & 1<<31);
958 *dst++ = v[idx & 3] * t.f;
960 sign <<= nz & 1; nz >>= 1;
961 t.i = s.i ^ (sign & 1<<31);
962 *dst++ = v[idx>>2 & 3] * t.f;
964 sign <<= nz & 1; nz >>= 1;
965 t.i = s.i ^ (sign & 1<<31);
966 *dst++ = v[idx>>4 & 3] * t.f;
968 sign <<= nz & 1; nz >>= 1;
969 t.i = s.i ^ (sign & 1<<31);
970 *dst++ = v[idx>>6 & 3] * t.f;
977 * Decode spectral data; reference: table 4.50.
978 * Dequantize and scale spectral data; reference: 4.6.3.3.
980 * @param coef array of dequantized, scaled spectral data
981 * @param sf array of scalefactors or intensity stereo positions
982 * @param pulse_present set if pulses are present
983 * @param pulse pointer to pulse data struct
984 * @param band_type array of the used band type
986 * @return Returns error status. 0 - OK, !0 - error
988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
989 GetBitContext *gb, const float sf[120],
990 int pulse_present, const Pulse *pulse,
991 const IndividualChannelStream *ics,
992 enum BandType band_type[120])
994 int i, k, g, idx = 0;
995 const int c = 1024 / ics->num_windows;
996 const uint16_t *offsets = ics->swb_offset;
997 float *coef_base = coef;
1000 for (g = 0; g < ics->num_windows; g++)
1001 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1003 for (g = 0; g < ics->num_window_groups; g++) {
1004 unsigned g_len = ics->group_len[g];
1006 for (i = 0; i < ics->max_sfb; i++, idx++) {
1007 const unsigned cbt_m1 = band_type[idx] - 1;
1008 float *cfo = coef + offsets[i];
1009 int off_len = offsets[i + 1] - offsets[i];
1012 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1013 for (group = 0; group < g_len; group++, cfo+=128) {
1014 memset(cfo, 0, off_len * sizeof(float));
1016 } else if (cbt_m1 == NOISE_BT - 1) {
1017 for (group = 0; group < g_len; group++, cfo+=128) {
1021 for (k = 0; k < off_len; k++) {
1022 ac->random_state = lcg_random(ac->random_state);
1023 cfo[k] = ac->random_state;
1026 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1027 scale = sf[idx] / sqrtf(band_energy);
1028 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1031 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1032 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1033 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1034 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1035 OPEN_READER(re, gb);
1037 switch (cbt_m1 >> 1) {
1039 for (group = 0; group < g_len; group++, cfo+=128) {
1047 UPDATE_CACHE(re, gb);
1048 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1050 if (code >= cb_size) {
1052 goto err_cb_overflow;
1055 cb_idx = cb_vector_idx[code];
1056 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1062 for (group = 0; group < g_len; group++, cfo+=128) {
1072 UPDATE_CACHE(re, gb);
1073 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1075 if (code >= cb_size) {
1077 goto err_cb_overflow;
1080 #if MIN_CACHE_BITS < 20
1081 UPDATE_CACHE(re, gb);
1083 cb_idx = cb_vector_idx[code];
1084 nnz = cb_idx >> 8 & 15;
1085 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1086 LAST_SKIP_BITS(re, gb, nnz);
1087 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1093 for (group = 0; group < g_len; group++, cfo+=128) {
1101 UPDATE_CACHE(re, gb);
1102 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1104 if (code >= cb_size) {
1106 goto err_cb_overflow;
1109 cb_idx = cb_vector_idx[code];
1110 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1117 for (group = 0; group < g_len; group++, cfo+=128) {
1127 UPDATE_CACHE(re, gb);
1128 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1130 if (code >= cb_size) {
1132 goto err_cb_overflow;
1135 cb_idx = cb_vector_idx[code];
1136 nnz = cb_idx >> 8 & 15;
1137 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1138 LAST_SKIP_BITS(re, gb, nnz);
1139 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1145 for (group = 0; group < g_len; group++, cfo+=128) {
1147 uint32_t *icf = (uint32_t *) cf;
1157 UPDATE_CACHE(re, gb);
1158 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1166 if (code >= cb_size) {
1168 goto err_cb_overflow;
1171 cb_idx = cb_vector_idx[code];
1174 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1175 LAST_SKIP_BITS(re, gb, nnz);
1177 for (j = 0; j < 2; j++) {
1181 /* The total length of escape_sequence must be < 22 bits according
1182 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1183 UPDATE_CACHE(re, gb);
1184 b = GET_CACHE(re, gb);
1185 b = 31 - av_log2(~b);
1188 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1192 #if MIN_CACHE_BITS < 21
1193 LAST_SKIP_BITS(re, gb, b + 1);
1194 UPDATE_CACHE(re, gb);
1196 SKIP_BITS(re, gb, b + 1);
1199 n = (1 << b) + SHOW_UBITS(re, gb, b);
1200 LAST_SKIP_BITS(re, gb, b);
1201 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1204 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1205 *icf++ = (bits & 1<<31) | v;
1212 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1216 CLOSE_READER(re, gb);
1222 if (pulse_present) {
1224 for (i = 0; i < pulse->num_pulse; i++) {
1225 float co = coef_base[ pulse->pos[i] ];
1226 while (offsets[idx + 1] <= pulse->pos[i])
1228 if (band_type[idx] != NOISE_BT && sf[idx]) {
1229 float ico = -pulse->amp[i];
1232 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1234 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1241 av_log(ac->avctx, AV_LOG_ERROR,
1242 "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1243 band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1247 static av_always_inline float flt16_round(float pf)
1251 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1255 static av_always_inline float flt16_even(float pf)
1259 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1263 static av_always_inline float flt16_trunc(float pf)
1267 pun.i &= 0xFFFF0000U;
1271 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1274 const float a = 0.953125; // 61.0 / 64
1275 const float alpha = 0.90625; // 29.0 / 32
1280 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1281 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1283 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1285 *coef += pv * ac->sf_scale;
1287 e0 = *coef / ac->sf_scale;
1288 e1 = e0 - k1 * ps->r0;
1290 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1291 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1292 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1293 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1295 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1296 ps->r0 = flt16_trunc(a * e0);
1300 * Apply AAC-Main style frequency domain prediction.
1302 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1306 if (!sce->ics.predictor_initialized) {
1307 reset_all_predictors(sce->predictor_state);
1308 sce->ics.predictor_initialized = 1;
1311 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1312 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1313 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1314 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1315 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1318 if (sce->ics.predictor_reset_group)
1319 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1321 reset_all_predictors(sce->predictor_state);
1325 * Decode an individual_channel_stream payload; reference: table 4.44.
1327 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1328 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1330 * @return Returns error status. 0 - OK, !0 - error
1332 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1333 GetBitContext *gb, int common_window, int scale_flag)
1336 TemporalNoiseShaping *tns = &sce->tns;
1337 IndividualChannelStream *ics = &sce->ics;
1338 float *out = sce->coeffs;
1339 int global_gain, pulse_present = 0;
1341 /* This assignment is to silence a GCC warning about the variable being used
1342 * uninitialized when in fact it always is.
1344 pulse.num_pulse = 0;
1346 global_gain = get_bits(gb, 8);
1348 if (!common_window && !scale_flag) {
1349 if (decode_ics_info(ac, ics, gb, 0) < 0)
1353 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1355 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1360 if ((pulse_present = get_bits1(gb))) {
1361 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1362 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1365 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1366 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1370 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1372 if (get_bits1(gb)) {
1373 av_log_missing_feature(ac->avctx, "SSR", 1);
1378 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1381 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1382 apply_prediction(ac, sce);
1388 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1390 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1392 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1393 float *ch0 = cpe->ch[0].coeffs;
1394 float *ch1 = cpe->ch[1].coeffs;
1395 int g, i, group, idx = 0;
1396 const uint16_t *offsets = ics->swb_offset;
1397 for (g = 0; g < ics->num_window_groups; g++) {
1398 for (i = 0; i < ics->max_sfb; i++, idx++) {
1399 if (cpe->ms_mask[idx] &&
1400 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1401 for (group = 0; group < ics->group_len[g]; group++) {
1402 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1403 ch1 + group * 128 + offsets[i],
1404 offsets[i+1] - offsets[i]);
1408 ch0 += ics->group_len[g] * 128;
1409 ch1 += ics->group_len[g] * 128;
1414 * intensity stereo decoding; reference: 4.6.8.2.3
1416 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1417 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1418 * [3] reserved for scalable AAC
1420 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1422 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1423 SingleChannelElement *sce1 = &cpe->ch[1];
1424 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1425 const uint16_t *offsets = ics->swb_offset;
1426 int g, group, i, k, idx = 0;
1429 for (g = 0; g < ics->num_window_groups; g++) {
1430 for (i = 0; i < ics->max_sfb;) {
1431 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1432 const int bt_run_end = sce1->band_type_run_end[idx];
1433 for (; i < bt_run_end; i++, idx++) {
1434 c = -1 + 2 * (sce1->band_type[idx] - 14);
1436 c *= 1 - 2 * cpe->ms_mask[idx];
1437 scale = c * sce1->sf[idx];
1438 for (group = 0; group < ics->group_len[g]; group++)
1439 for (k = offsets[i]; k < offsets[i + 1]; k++)
1440 coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1443 int bt_run_end = sce1->band_type_run_end[idx];
1444 idx += bt_run_end - i;
1448 coef0 += ics->group_len[g] * 128;
1449 coef1 += ics->group_len[g] * 128;
1454 * Decode a channel_pair_element; reference: table 4.4.
1456 * @param elem_id Identifies the instance of a syntax element.
1458 * @return Returns error status. 0 - OK, !0 - error
1460 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1462 int i, ret, common_window, ms_present = 0;
1464 common_window = get_bits1(gb);
1465 if (common_window) {
1466 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1468 i = cpe->ch[1].ics.use_kb_window[0];
1469 cpe->ch[1].ics = cpe->ch[0].ics;
1470 cpe->ch[1].ics.use_kb_window[1] = i;
1471 ms_present = get_bits(gb, 2);
1472 if (ms_present == 3) {
1473 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1475 } else if (ms_present)
1476 decode_mid_side_stereo(cpe, gb, ms_present);
1478 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1480 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1483 if (common_window) {
1485 apply_mid_side_stereo(ac, cpe);
1486 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1487 apply_prediction(ac, &cpe->ch[0]);
1488 apply_prediction(ac, &cpe->ch[1]);
1492 apply_intensity_stereo(cpe, ms_present);
1497 * Decode coupling_channel_element; reference: table 4.8.
1499 * @param elem_id Identifies the instance of a syntax element.
1501 * @return Returns error status. 0 - OK, !0 - error
1503 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1509 SingleChannelElement *sce = &che->ch[0];
1510 ChannelCoupling *coup = &che->coup;
1512 coup->coupling_point = 2 * get_bits1(gb);
1513 coup->num_coupled = get_bits(gb, 3);
1514 for (c = 0; c <= coup->num_coupled; c++) {
1516 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1517 coup->id_select[c] = get_bits(gb, 4);
1518 if (coup->type[c] == TYPE_CPE) {
1519 coup->ch_select[c] = get_bits(gb, 2);
1520 if (coup->ch_select[c] == 3)
1523 coup->ch_select[c] = 2;
1525 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1527 sign = get_bits(gb, 1);
1528 scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1530 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1533 for (c = 0; c < num_gain; c++) {
1537 float gain_cache = 1.;
1539 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1540 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1541 gain_cache = pow(scale, -gain);
1543 if (coup->coupling_point == AFTER_IMDCT) {
1544 coup->gain[c][0] = gain_cache;
1546 for (g = 0; g < sce->ics.num_window_groups; g++) {
1547 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1548 if (sce->band_type[idx] != ZERO_BT) {
1550 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1558 gain_cache = pow(scale, -t) * s;
1561 coup->gain[c][idx] = gain_cache;
1571 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1573 * @return Returns number of bytes consumed.
1575 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1579 int num_excl_chan = 0;
1582 for (i = 0; i < 7; i++)
1583 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1584 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1586 return num_excl_chan / 7;
1590 * Decode dynamic range information; reference: table 4.52.
1592 * @param cnt length of TYPE_FIL syntactic element in bytes
1594 * @return Returns number of bytes consumed.
1596 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1597 GetBitContext *gb, int cnt)
1600 int drc_num_bands = 1;
1603 /* pce_tag_present? */
1604 if (get_bits1(gb)) {
1605 che_drc->pce_instance_tag = get_bits(gb, 4);
1606 skip_bits(gb, 4); // tag_reserved_bits
1610 /* excluded_chns_present? */
1611 if (get_bits1(gb)) {
1612 n += decode_drc_channel_exclusions(che_drc, gb);
1615 /* drc_bands_present? */
1616 if (get_bits1(gb)) {
1617 che_drc->band_incr = get_bits(gb, 4);
1618 che_drc->interpolation_scheme = get_bits(gb, 4);
1620 drc_num_bands += che_drc->band_incr;
1621 for (i = 0; i < drc_num_bands; i++) {
1622 che_drc->band_top[i] = get_bits(gb, 8);
1627 /* prog_ref_level_present? */
1628 if (get_bits1(gb)) {
1629 che_drc->prog_ref_level = get_bits(gb, 7);
1630 skip_bits1(gb); // prog_ref_level_reserved_bits
1634 for (i = 0; i < drc_num_bands; i++) {
1635 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1636 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1644 * Decode extension data (incomplete); reference: table 4.51.
1646 * @param cnt length of TYPE_FIL syntactic element in bytes
1648 * @return Returns number of bytes consumed
1650 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1651 ChannelElement *che, enum RawDataBlockType elem_type)
1655 switch (get_bits(gb, 4)) { // extension type
1656 case EXT_SBR_DATA_CRC:
1660 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1662 } else if (!ac->m4ac.sbr) {
1663 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1664 skip_bits_long(gb, 8 * cnt - 4);
1666 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1667 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1668 skip_bits_long(gb, 8 * cnt - 4);
1670 } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1673 output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1677 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1679 case EXT_DYNAMIC_RANGE:
1680 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1684 case EXT_DATA_ELEMENT:
1686 skip_bits_long(gb, 8 * cnt - 4);
1693 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1695 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1696 * @param coef spectral coefficients
1698 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1699 IndividualChannelStream *ics, int decode)
1701 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1703 int bottom, top, order, start, end, size, inc;
1704 float lpc[TNS_MAX_ORDER];
1706 for (w = 0; w < ics->num_windows; w++) {
1707 bottom = ics->num_swb;
1708 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1710 bottom = FFMAX(0, top - tns->length[w][filt]);
1711 order = tns->order[w][filt];
1716 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1718 start = ics->swb_offset[FFMIN(bottom, mmm)];
1719 end = ics->swb_offset[FFMIN( top, mmm)];
1720 if ((size = end - start) <= 0)
1722 if (tns->direction[w][filt]) {
1731 for (m = 0; m < size; m++, start += inc)
1732 for (i = 1; i <= FFMIN(m, order); i++)
1733 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1739 * Conduct IMDCT and windowing.
1741 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1743 IndividualChannelStream *ics = &sce->ics;
1744 float *in = sce->coeffs;
1745 float *out = sce->ret;
1746 float *saved = sce->saved;
1747 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1748 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1749 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1750 float *buf = ac->buf_mdct;
1751 float *temp = ac->temp;
1755 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1756 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1757 av_log(ac->avctx, AV_LOG_WARNING,
1758 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1759 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1760 for (i = 0; i < 1024; i += 128)
1761 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1763 ff_imdct_half(&ac->mdct, buf, in);
1765 /* window overlapping
1766 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1767 * and long to short transitions are considered to be short to short
1768 * transitions. This leaves just two cases (long to long and short to short)
1769 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1771 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1772 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1773 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512);
1775 for (i = 0; i < 448; i++)
1776 out[i] = saved[i] + bias;
1778 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1779 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64);
1780 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64);
1781 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64);
1782 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64);
1783 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64);
1784 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1786 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64);
1787 for (i = 576; i < 1024; i++)
1788 out[i] = buf[i-512] + bias;
1793 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1794 for (i = 0; i < 64; i++)
1795 saved[i] = temp[64 + i] - bias;
1796 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1797 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1798 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1799 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1800 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1801 memcpy( saved, buf + 512, 448 * sizeof(float));
1802 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1803 } else { // LONG_STOP or ONLY_LONG
1804 memcpy( saved, buf + 512, 512 * sizeof(float));
1809 * Apply dependent channel coupling (applied before IMDCT).
1811 * @param index index into coupling gain array
1813 static void apply_dependent_coupling(AACContext *ac,
1814 SingleChannelElement *target,
1815 ChannelElement *cce, int index)
1817 IndividualChannelStream *ics = &cce->ch[0].ics;
1818 const uint16_t *offsets = ics->swb_offset;
1819 float *dest = target->coeffs;
1820 const float *src = cce->ch[0].coeffs;
1821 int g, i, group, k, idx = 0;
1822 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1823 av_log(ac->avctx, AV_LOG_ERROR,
1824 "Dependent coupling is not supported together with LTP\n");
1827 for (g = 0; g < ics->num_window_groups; g++) {
1828 for (i = 0; i < ics->max_sfb; i++, idx++) {
1829 if (cce->ch[0].band_type[idx] != ZERO_BT) {
1830 const float gain = cce->coup.gain[index][idx];
1831 for (group = 0; group < ics->group_len[g]; group++) {
1832 for (k = offsets[i]; k < offsets[i + 1]; k++) {
1834 dest[group * 128 + k] += gain * src[group * 128 + k];
1839 dest += ics->group_len[g] * 128;
1840 src += ics->group_len[g] * 128;
1845 * Apply independent channel coupling (applied after IMDCT).
1847 * @param index index into coupling gain array
1849 static void apply_independent_coupling(AACContext *ac,
1850 SingleChannelElement *target,
1851 ChannelElement *cce, int index)
1854 const float gain = cce->coup.gain[index][0];
1855 const float bias = ac->add_bias;
1856 const float *src = cce->ch[0].ret;
1857 float *dest = target->ret;
1858 const int len = 1024 << (ac->m4ac.sbr == 1);
1860 for (i = 0; i < len; i++)
1861 dest[i] += gain * (src[i] - bias);
1865 * channel coupling transformation interface
1867 * @param index index into coupling gain array
1868 * @param apply_coupling_method pointer to (in)dependent coupling function
1870 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1871 enum RawDataBlockType type, int elem_id,
1872 enum CouplingPoint coupling_point,
1873 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1877 for (i = 0; i < MAX_ELEM_ID; i++) {
1878 ChannelElement *cce = ac->che[TYPE_CCE][i];
1881 if (cce && cce->coup.coupling_point == coupling_point) {
1882 ChannelCoupling *coup = &cce->coup;
1884 for (c = 0; c <= coup->num_coupled; c++) {
1885 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1886 if (coup->ch_select[c] != 1) {
1887 apply_coupling_method(ac, &cc->ch[0], cce, index);
1888 if (coup->ch_select[c] != 0)
1891 if (coup->ch_select[c] != 2)
1892 apply_coupling_method(ac, &cc->ch[1], cce, index++);
1894 index += 1 + (coup->ch_select[c] == 3);
1901 * Convert spectral data to float samples, applying all supported tools as appropriate.
1903 static void spectral_to_sample(AACContext *ac)
1906 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1907 for (type = 3; type >= 0; type--) {
1908 for (i = 0; i < MAX_ELEM_ID; i++) {
1909 ChannelElement *che = ac->che[type][i];
1911 if (type <= TYPE_CPE)
1912 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1913 if (che->ch[0].tns.present)
1914 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1915 if (che->ch[1].tns.present)
1916 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1917 if (type <= TYPE_CPE)
1918 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1919 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1920 imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1921 if (type == TYPE_CPE) {
1922 imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1924 if (ac->m4ac.sbr > 0) {
1925 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1928 if (type <= TYPE_CCE)
1929 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1935 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1938 AACADTSHeaderInfo hdr_info;
1940 size = ff_aac_parse_header(gb, &hdr_info);
1942 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1943 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1944 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1945 ac->m4ac.chan_config = hdr_info.chan_config;
1946 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1948 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1950 } else if (ac->output_configured != OC_LOCKED) {
1951 ac->output_configured = OC_NONE;
1953 if (ac->output_configured != OC_LOCKED) {
1957 ac->m4ac.sample_rate = hdr_info.sample_rate;
1958 ac->m4ac.sampling_index = hdr_info.sampling_index;
1959 ac->m4ac.object_type = hdr_info.object_type;
1960 if (!ac->avctx->sample_rate)
1961 ac->avctx->sample_rate = hdr_info.sample_rate;
1962 if (hdr_info.num_aac_frames == 1) {
1963 if (!hdr_info.crc_absent)
1966 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1973 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1974 int *data_size, AVPacket *avpkt)
1976 const uint8_t *buf = avpkt->data;
1977 int buf_size = avpkt->size;
1978 AACContext *ac = avctx->priv_data;
1979 ChannelElement *che = NULL, *che_prev = NULL;
1981 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1982 int err, elem_id, data_size_tmp;
1984 int samples = 0, multiplier;
1987 init_get_bits(&gb, buf, buf_size * 8);
1989 if (show_bits(&gb, 12) == 0xfff) {
1990 if (parse_adts_frame_header(ac, &gb) < 0) {
1991 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1994 if (ac->m4ac.sampling_index > 12) {
1995 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
2000 memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
2002 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
2003 elem_id = get_bits(&gb, 4);
2005 if (elem_type < TYPE_DSE) {
2006 if (!(che=get_che(ac, elem_type, elem_id))) {
2007 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2008 elem_type, elem_id);
2014 switch (elem_type) {
2017 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2021 err = decode_cpe(ac, &gb, che);
2025 err = decode_cce(ac, &gb, che);
2029 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2033 err = skip_data_stream_element(ac, &gb);
2037 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2038 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2039 if ((err = decode_pce(ac, new_che_pos, &gb)))
2041 if (ac->output_configured > OC_TRIAL_PCE)
2042 av_log(avctx, AV_LOG_ERROR,
2043 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2045 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2051 elem_id += get_bits(&gb, 8) - 1;
2052 if (get_bits_left(&gb) < 8 * elem_id) {
2053 av_log(avctx, AV_LOG_ERROR, overread_err);
2057 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2058 err = 0; /* FIXME */
2062 err = -1; /* should not happen, but keeps compiler happy */
2067 elem_type_prev = elem_type;
2072 if (get_bits_left(&gb) < 3) {
2073 av_log(avctx, AV_LOG_ERROR, overread_err);
2078 spectral_to_sample(ac);
2080 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2081 samples <<= multiplier;
2082 if (ac->output_configured < OC_LOCKED) {
2083 avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2084 avctx->frame_size = samples;
2087 data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2088 if (*data_size < data_size_tmp) {
2089 av_log(avctx, AV_LOG_ERROR,
2090 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2091 *data_size, data_size_tmp);
2094 *data_size = data_size_tmp;
2097 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2099 if (ac->output_configured)
2100 ac->output_configured = OC_LOCKED;
2102 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2103 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2104 if (buf[buf_offset])
2107 return buf_size > buf_offset ? buf_consumed : buf_size;
2110 static av_cold int aac_decode_close(AVCodecContext *avctx)
2112 AACContext *ac = avctx->priv_data;
2115 for (i = 0; i < MAX_ELEM_ID; i++) {
2116 for (type = 0; type < 4; type++) {
2117 if (ac->che[type][i])
2118 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2119 av_freep(&ac->che[type][i]);
2123 ff_mdct_end(&ac->mdct);
2124 ff_mdct_end(&ac->mdct_small);
2128 AVCodec aac_decoder = {
2137 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2138 .sample_fmts = (const enum SampleFormat[]) {
2139 SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2141 .channel_layouts = aac_channel_layout,