3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
34 * N (code in SoC repo) gain control
36 * Y window shapes - standard
37 * N window shapes - Low Delay
38 * Y filterbank - standard
39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction
44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution
47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch
50 * Y quantization & coding - AAC
51 * N quantization & coding - TwinVQ
52 * N quantization & coding - BSAC
53 * N AAC Error Resilience tools
54 * N Error Resilience payload syntax
55 * N Error Protection tool
57 * N Silence Compression
60 * N Structured Audio tools
61 * N Structured Audio Sample Bank Format
63 * N Harmonic and Individual Lines plus Noise
64 * N Text-To-Speech Interface
65 * Y Spectral Band Replication
66 * Y (not in this code) Layer-1
67 * Y (not in this code) Layer-2
68 * Y (not in this code) Layer-3
69 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
71 * N Direct Stream Transfer
73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
88 #include "aacdectab.h"
89 #include "cbrt_tablegen.h"
92 #include "mpeg4audio.h"
93 #include "aacadtsdec.h"
101 # include "arm/aac.h"
109 static VLC vlc_scalefactors;
110 static VLC vlc_spectral[11];
112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
116 /* Some buggy encoders appear to set all elem_ids to zero and rely on
117 channels always occurring in the same order. This is expressly forbidden
118 by the spec but we will try to work around it.
121 while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
122 if (ac->output_configured < OC_LOCKED && !err_printed) {
123 av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
128 if (elem_id == MAX_ELEM_ID)
130 ac->tags_seen_this_frame[type][elem_id] = 1;
132 if (ac->tag_che_map[type][elem_id]) {
133 return ac->tag_che_map[type][elem_id];
135 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
138 switch (ac->m4ac.chan_config) {
140 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
142 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
145 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
147 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
150 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
153 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
155 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
158 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
160 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
164 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
166 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
167 } else if (ac->m4ac.chan_config == 2) {
171 if (!ac->tags_mapped && type == TYPE_SCE) {
173 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
181 * Check for the channel element in the current channel position configuration.
182 * If it exists, make sure the appropriate element is allocated and map the
183 * channel order to match the internal FFmpeg channel layout.
185 * @param che_pos current channel position configuration
186 * @param type channel element type
187 * @param id channel element id
188 * @param channels count of the number of channels in the configuration
190 * @return Returns error status. 0 - OK, !0 - error
192 static av_cold int che_configure(AACContext *ac,
193 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
197 if (che_pos[type][id]) {
198 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
199 return AVERROR(ENOMEM);
200 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 if (type != TYPE_CCE) {
202 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 if (type == TYPE_CPE ||
204 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
205 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
209 if (ac->che[type][id])
210 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
211 av_freep(&ac->che[type][id]);
217 * Configure output channel order based on the current program configuration element.
219 * @param che_pos current channel position configuration
220 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
222 * @return Returns error status. 0 - OK, !0 - error
224 static av_cold int output_configure(AACContext *ac,
225 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
226 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
227 int channel_config, enum OCStatus oc_type)
229 AVCodecContext *avctx = ac->avctx;
230 int i, type, channels = 0, ret;
232 if (new_che_pos != che_pos)
233 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
235 if (channel_config) {
236 for (i = 0; i < tags_per_config[channel_config]; i++) {
237 if ((ret = che_configure(ac, che_pos,
238 aac_channel_layout_map[channel_config - 1][i][0],
239 aac_channel_layout_map[channel_config - 1][i][1],
244 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
247 avctx->channel_layout = aac_channel_layout[channel_config - 1];
249 /* Allocate or free elements depending on if they are in the
250 * current program configuration.
252 * Set up default 1:1 output mapping.
254 * For a 5.1 stream the output order will be:
255 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
258 for (i = 0; i < MAX_ELEM_ID; i++) {
259 for (type = 0; type < 4; type++) {
260 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
265 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
266 ac->tags_mapped = 4 * MAX_ELEM_ID;
268 avctx->channel_layout = 0;
271 avctx->channels = channels;
273 ac->output_configured = oc_type;
279 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
281 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
282 * @param sce_map mono (Single Channel Element) map
283 * @param type speaker type/position for these channels
285 static void decode_channel_map(enum ChannelPosition *cpe_map,
286 enum ChannelPosition *sce_map,
287 enum ChannelPosition type,
288 GetBitContext *gb, int n)
291 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
292 map[get_bits(gb, 4)] = type;
297 * Decode program configuration element; reference: table 4.2.
299 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
301 * @return Returns error status. 0 - OK, !0 - error
303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
306 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
309 skip_bits(gb, 2); // object_type
311 sampling_index = get_bits(gb, 4);
312 if (ac->m4ac.sampling_index != sampling_index)
313 av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
315 num_front = get_bits(gb, 4);
316 num_side = get_bits(gb, 4);
317 num_back = get_bits(gb, 4);
318 num_lfe = get_bits(gb, 2);
319 num_assoc_data = get_bits(gb, 3);
320 num_cc = get_bits(gb, 4);
323 skip_bits(gb, 4); // mono_mixdown_tag
325 skip_bits(gb, 4); // stereo_mixdown_tag
328 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
330 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
331 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
332 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
333 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
335 skip_bits_long(gb, 4 * num_assoc_data);
337 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
341 /* comment field, first byte is length */
342 comment_len = get_bits(gb, 8) * 8;
343 if (get_bits_left(gb) < comment_len) {
344 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
347 skip_bits_long(gb, comment_len);
352 * Set up channel positions based on a default channel configuration
353 * as specified in table 1.17.
355 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
357 * @return Returns error status. 0 - OK, !0 - error
359 static av_cold int set_default_channel_config(AACContext *ac,
360 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
363 if (channel_config < 1 || channel_config > 7) {
364 av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
369 /* default channel configurations:
371 * 1ch : front center (mono)
372 * 2ch : L + R (stereo)
373 * 3ch : front center + L + R
374 * 4ch : front center + L + R + back center
375 * 5ch : front center + L + R + back stereo
376 * 6ch : front center + L + R + back stereo + LFE
377 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
380 if (channel_config != 2)
381 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
382 if (channel_config > 1)
383 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
384 if (channel_config == 4)
385 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center
386 if (channel_config > 4)
387 new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
388 = AAC_CHANNEL_BACK; // back stereo
389 if (channel_config > 5)
390 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE
391 if (channel_config == 7)
392 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
398 * Decode GA "General Audio" specific configuration; reference: table 4.1.
400 * @return Returns error status. 0 - OK, !0 - error
402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
405 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
406 int extension_flag, ret;
408 if (get_bits1(gb)) { // frameLengthFlag
409 av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
413 if (get_bits1(gb)) // dependsOnCoreCoder
414 skip_bits(gb, 14); // coreCoderDelay
415 extension_flag = get_bits1(gb);
417 if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
418 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
419 skip_bits(gb, 3); // layerNr
421 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
422 if (channel_config == 0) {
423 skip_bits(gb, 4); // element_instance_tag
424 if ((ret = decode_pce(ac, new_che_pos, gb)))
427 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
430 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
433 if (extension_flag) {
434 switch (ac->m4ac.object_type) {
436 skip_bits(gb, 5); // numOfSubFrame
437 skip_bits(gb, 11); // layer_length
441 case AOT_ER_AAC_SCALABLE:
443 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
444 * aacScalefactorDataResilienceFlag
445 * aacSpectralDataResilienceFlag
449 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
455 * Decode audio specific configuration; reference: table 1.13.
457 * @param data pointer to AVCodecContext extradata
458 * @param data_size size of AVCCodecContext extradata
460 * @return Returns error status. 0 - OK, !0 - error
462 static int decode_audio_specific_config(AACContext *ac, void *data,
468 init_get_bits(&gb, data, data_size * 8);
470 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
472 if (ac->m4ac.sampling_index > 12) {
473 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
476 if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
479 skip_bits_long(&gb, i);
481 switch (ac->m4ac.object_type) {
484 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
488 av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
489 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
496 * linear congruential pseudorandom number generator
498 * @param previous_val pointer to the current state of the generator
500 * @return Returns a 32-bit pseudorandom integer
502 static av_always_inline int lcg_random(int previous_val)
504 return previous_val * 1664525 + 1013904223;
507 static av_always_inline void reset_predict_state(PredictorState *ps)
517 static void reset_all_predictors(PredictorState *ps)
520 for (i = 0; i < MAX_PREDICTORS; i++)
521 reset_predict_state(&ps[i]);
524 static void reset_predictor_group(PredictorState *ps, int group_num)
527 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
528 reset_predict_state(&ps[i]);
531 #define AAC_INIT_VLC_STATIC(num, size) \
532 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
533 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
534 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
537 static av_cold int aac_decode_init(AVCodecContext *avctx)
539 AACContext *ac = avctx->priv_data;
542 ac->m4ac.sample_rate = avctx->sample_rate;
544 if (avctx->extradata_size > 0) {
545 if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
549 avctx->sample_fmt = SAMPLE_FMT_S16;
551 AAC_INIT_VLC_STATIC( 0, 304);
552 AAC_INIT_VLC_STATIC( 1, 270);
553 AAC_INIT_VLC_STATIC( 2, 550);
554 AAC_INIT_VLC_STATIC( 3, 300);
555 AAC_INIT_VLC_STATIC( 4, 328);
556 AAC_INIT_VLC_STATIC( 5, 294);
557 AAC_INIT_VLC_STATIC( 6, 306);
558 AAC_INIT_VLC_STATIC( 7, 268);
559 AAC_INIT_VLC_STATIC( 8, 510);
560 AAC_INIT_VLC_STATIC( 9, 366);
561 AAC_INIT_VLC_STATIC(10, 462);
565 dsputil_init(&ac->dsp, avctx);
567 ac->random_state = 0x1f2e3d4c;
569 // -1024 - Compensate wrong IMDCT method.
570 // 32768 - Required to scale values to the correct range for the bias method
571 // for float to int16 conversion.
573 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
574 ac->add_bias = 385.0f;
575 ac->sf_scale = 1. / (-1024. * 32768.);
579 ac->sf_scale = 1. / -1024.;
585 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
586 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
587 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
590 ff_mdct_init(&ac->mdct, 11, 1, 1.0);
591 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
592 // window initialization
593 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
594 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
595 ff_init_ff_sine_windows(10);
596 ff_init_ff_sine_windows( 7);
604 * Skip data_stream_element; reference: table 4.10.
606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
608 int byte_align = get_bits1(gb);
609 int count = get_bits(gb, 8);
611 count += get_bits(gb, 8);
615 if (get_bits_left(gb) < 8 * count) {
616 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
619 skip_bits_long(gb, 8 * count);
623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
628 ics->predictor_reset_group = get_bits(gb, 5);
629 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
630 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
634 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
635 ics->prediction_used[sfb] = get_bits1(gb);
641 * Decode Individual Channel Stream info; reference: table 4.6.
643 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
646 GetBitContext *gb, int common_window)
649 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
650 memset(ics, 0, sizeof(IndividualChannelStream));
653 ics->window_sequence[1] = ics->window_sequence[0];
654 ics->window_sequence[0] = get_bits(gb, 2);
655 ics->use_kb_window[1] = ics->use_kb_window[0];
656 ics->use_kb_window[0] = get_bits1(gb);
657 ics->num_window_groups = 1;
658 ics->group_len[0] = 1;
659 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
661 ics->max_sfb = get_bits(gb, 4);
662 for (i = 0; i < 7; i++) {
664 ics->group_len[ics->num_window_groups - 1]++;
666 ics->num_window_groups++;
667 ics->group_len[ics->num_window_groups - 1] = 1;
670 ics->num_windows = 8;
671 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index];
672 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
673 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index];
674 ics->predictor_present = 0;
676 ics->max_sfb = get_bits(gb, 6);
677 ics->num_windows = 1;
678 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index];
679 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
680 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
681 ics->predictor_present = get_bits1(gb);
682 ics->predictor_reset_group = 0;
683 if (ics->predictor_present) {
684 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
685 if (decode_prediction(ac, ics, gb)) {
686 memset(ics, 0, sizeof(IndividualChannelStream));
689 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
690 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
691 memset(ics, 0, sizeof(IndividualChannelStream));
694 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
695 memset(ics, 0, sizeof(IndividualChannelStream));
701 if (ics->max_sfb > ics->num_swb) {
702 av_log(ac->avctx, AV_LOG_ERROR,
703 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
704 ics->max_sfb, ics->num_swb);
705 memset(ics, 0, sizeof(IndividualChannelStream));
713 * Decode band types (section_data payload); reference: table 4.46.
715 * @param band_type array of the used band type
716 * @param band_type_run_end array of the last scalefactor band of a band type run
718 * @return Returns error status. 0 - OK, !0 - error
720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
721 int band_type_run_end[120], GetBitContext *gb,
722 IndividualChannelStream *ics)
725 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
726 for (g = 0; g < ics->num_window_groups; g++) {
728 while (k < ics->max_sfb) {
729 uint8_t sect_end = k;
731 int sect_band_type = get_bits(gb, 4);
732 if (sect_band_type == 12) {
733 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
736 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
737 sect_end += sect_len_incr;
738 sect_end += sect_len_incr;
739 if (get_bits_left(gb) < 0) {
740 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
743 if (sect_end > ics->max_sfb) {
744 av_log(ac->avctx, AV_LOG_ERROR,
745 "Number of bands (%d) exceeds limit (%d).\n",
746 sect_end, ics->max_sfb);
749 for (; k < sect_end; k++) {
750 band_type [idx] = sect_band_type;
751 band_type_run_end[idx++] = sect_end;
759 * Decode scalefactors; reference: table 4.47.
761 * @param global_gain first scalefactor value as scalefactors are differentially coded
762 * @param band_type array of the used band type
763 * @param band_type_run_end array of the last scalefactor band of a band type run
764 * @param sf array of scalefactors or intensity stereo positions
766 * @return Returns error status. 0 - OK, !0 - error
768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
769 unsigned int global_gain,
770 IndividualChannelStream *ics,
771 enum BandType band_type[120],
772 int band_type_run_end[120])
774 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
776 int offset[3] = { global_gain, global_gain - 90, 100 };
778 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
779 for (g = 0; g < ics->num_window_groups; g++) {
780 for (i = 0; i < ics->max_sfb;) {
781 int run_end = band_type_run_end[idx];
782 if (band_type[idx] == ZERO_BT) {
783 for (; i < run_end; i++, idx++)
785 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
786 for (; i < run_end; i++, idx++) {
787 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
788 if (offset[2] > 255U) {
789 av_log(ac->avctx, AV_LOG_ERROR,
790 "%s (%d) out of range.\n", sf_str[2], offset[2]);
793 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
795 } else if (band_type[idx] == NOISE_BT) {
796 for (; i < run_end; i++, idx++) {
797 if (noise_flag-- > 0)
798 offset[1] += get_bits(gb, 9) - 256;
800 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
801 if (offset[1] > 255U) {
802 av_log(ac->avctx, AV_LOG_ERROR,
803 "%s (%d) out of range.\n", sf_str[1], offset[1]);
806 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
809 for (; i < run_end; i++, idx++) {
810 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
811 if (offset[0] > 255U) {
812 av_log(ac->avctx, AV_LOG_ERROR,
813 "%s (%d) out of range.\n", sf_str[0], offset[0]);
816 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
825 * Decode pulse data; reference: table 4.7.
827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
828 const uint16_t *swb_offset, int num_swb)
831 pulse->num_pulse = get_bits(gb, 2) + 1;
832 pulse_swb = get_bits(gb, 6);
833 if (pulse_swb >= num_swb)
835 pulse->pos[0] = swb_offset[pulse_swb];
836 pulse->pos[0] += get_bits(gb, 5);
837 if (pulse->pos[0] > 1023)
839 pulse->amp[0] = get_bits(gb, 4);
840 for (i = 1; i < pulse->num_pulse; i++) {
841 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
842 if (pulse->pos[i] > 1023)
844 pulse->amp[i] = get_bits(gb, 4);
850 * Decode Temporal Noise Shaping data; reference: table 4.48.
852 * @return Returns error status. 0 - OK, !0 - error
854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
855 GetBitContext *gb, const IndividualChannelStream *ics)
857 int w, filt, i, coef_len, coef_res, coef_compress;
858 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
859 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
860 for (w = 0; w < ics->num_windows; w++) {
861 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
862 coef_res = get_bits1(gb);
864 for (filt = 0; filt < tns->n_filt[w]; filt++) {
866 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
868 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
869 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
870 tns->order[w][filt], tns_max_order);
871 tns->order[w][filt] = 0;
874 if (tns->order[w][filt]) {
875 tns->direction[w][filt] = get_bits1(gb);
876 coef_compress = get_bits1(gb);
877 coef_len = coef_res + 3 - coef_compress;
878 tmp2_idx = 2 * coef_compress + coef_res;
880 for (i = 0; i < tns->order[w][filt]; i++)
881 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
890 * Decode Mid/Side data; reference: table 4.54.
892 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
893 * [1] mask is decoded from bitstream; [2] mask is all 1s;
894 * [3] reserved for scalable AAC
896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
900 if (ms_present == 1) {
901 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
902 cpe->ms_mask[idx] = get_bits1(gb);
903 } else if (ms_present == 2) {
904 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
913 *dst++ = v[idx & 15] * s;
914 *dst++ = v[idx>>4 & 15] * s;
920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
924 *dst++ = v[idx & 3] * s;
925 *dst++ = v[idx>>2 & 3] * s;
926 *dst++ = v[idx>>4 & 3] * s;
927 *dst++ = v[idx>>6 & 3] * s;
933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
934 unsigned sign, const float *scale)
936 union float754 s0, s1;
938 s0.f = s1.f = *scale;
939 s0.i ^= sign >> 1 << 31;
942 *dst++ = v[idx & 15] * s0.f;
943 *dst++ = v[idx>>4 & 15] * s1.f;
950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
951 unsigned sign, const float *scale)
953 unsigned nz = idx >> 12;
954 union float754 s = { .f = *scale };
957 t.i = s.i ^ (sign & 1<<31);
958 *dst++ = v[idx & 3] * t.f;
960 sign <<= nz & 1; nz >>= 1;
961 t.i = s.i ^ (sign & 1<<31);
962 *dst++ = v[idx>>2 & 3] * t.f;
964 sign <<= nz & 1; nz >>= 1;
965 t.i = s.i ^ (sign & 1<<31);
966 *dst++ = v[idx>>4 & 3] * t.f;
968 sign <<= nz & 1; nz >>= 1;
969 t.i = s.i ^ (sign & 1<<31);
970 *dst++ = v[idx>>6 & 3] * t.f;
977 * Decode spectral data; reference: table 4.50.
978 * Dequantize and scale spectral data; reference: 4.6.3.3.
980 * @param coef array of dequantized, scaled spectral data
981 * @param sf array of scalefactors or intensity stereo positions
982 * @param pulse_present set if pulses are present
983 * @param pulse pointer to pulse data struct
984 * @param band_type array of the used band type
986 * @return Returns error status. 0 - OK, !0 - error
988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
989 GetBitContext *gb, const float sf[120],
990 int pulse_present, const Pulse *pulse,
991 const IndividualChannelStream *ics,
992 enum BandType band_type[120])
994 int i, k, g, idx = 0;
995 const int c = 1024 / ics->num_windows;
996 const uint16_t *offsets = ics->swb_offset;
997 float *coef_base = coef;
999 for (g = 0; g < ics->num_windows; g++)
1000 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1002 for (g = 0; g < ics->num_window_groups; g++) {
1003 unsigned g_len = ics->group_len[g];
1005 for (i = 0; i < ics->max_sfb; i++, idx++) {
1006 const unsigned cbt_m1 = band_type[idx] - 1;
1007 float *cfo = coef + offsets[i];
1008 int off_len = offsets[i + 1] - offsets[i];
1011 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1012 for (group = 0; group < g_len; group++, cfo+=128) {
1013 memset(cfo, 0, off_len * sizeof(float));
1015 } else if (cbt_m1 == NOISE_BT - 1) {
1016 for (group = 0; group < g_len; group++, cfo+=128) {
1020 for (k = 0; k < off_len; k++) {
1021 ac->random_state = lcg_random(ac->random_state);
1022 cfo[k] = ac->random_state;
1025 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1026 scale = sf[idx] / sqrtf(band_energy);
1027 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1030 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1031 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1032 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1033 OPEN_READER(re, gb);
1035 switch (cbt_m1 >> 1) {
1037 for (group = 0; group < g_len; group++, cfo+=128) {
1045 UPDATE_CACHE(re, gb);
1046 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1047 cb_idx = cb_vector_idx[code];
1048 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1054 for (group = 0; group < g_len; group++, cfo+=128) {
1064 UPDATE_CACHE(re, gb);
1065 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1066 #if MIN_CACHE_BITS < 20
1067 UPDATE_CACHE(re, gb);
1069 cb_idx = cb_vector_idx[code];
1070 nnz = cb_idx >> 8 & 15;
1071 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1072 LAST_SKIP_BITS(re, gb, nnz);
1073 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1079 for (group = 0; group < g_len; group++, cfo+=128) {
1087 UPDATE_CACHE(re, gb);
1088 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1089 cb_idx = cb_vector_idx[code];
1090 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1097 for (group = 0; group < g_len; group++, cfo+=128) {
1107 UPDATE_CACHE(re, gb);
1108 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1109 cb_idx = cb_vector_idx[code];
1110 nnz = cb_idx >> 8 & 15;
1111 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1112 LAST_SKIP_BITS(re, gb, nnz);
1113 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1119 for (group = 0; group < g_len; group++, cfo+=128) {
1121 uint32_t *icf = (uint32_t *) cf;
1131 UPDATE_CACHE(re, gb);
1132 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1140 cb_idx = cb_vector_idx[code];
1143 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1144 LAST_SKIP_BITS(re, gb, nnz);
1146 for (j = 0; j < 2; j++) {
1150 /* The total length of escape_sequence must be < 22 bits according
1151 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1152 UPDATE_CACHE(re, gb);
1153 b = GET_CACHE(re, gb);
1154 b = 31 - av_log2(~b);
1157 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1161 #if MIN_CACHE_BITS < 21
1162 LAST_SKIP_BITS(re, gb, b + 1);
1163 UPDATE_CACHE(re, gb);
1165 SKIP_BITS(re, gb, b + 1);
1168 n = (1 << b) + SHOW_UBITS(re, gb, b);
1169 LAST_SKIP_BITS(re, gb, b);
1170 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1173 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1174 *icf++ = (bits & 1<<31) | v;
1181 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1185 CLOSE_READER(re, gb);
1191 if (pulse_present) {
1193 for (i = 0; i < pulse->num_pulse; i++) {
1194 float co = coef_base[ pulse->pos[i] ];
1195 while (offsets[idx + 1] <= pulse->pos[i])
1197 if (band_type[idx] != NOISE_BT && sf[idx]) {
1198 float ico = -pulse->amp[i];
1201 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1203 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1210 static av_always_inline float flt16_round(float pf)
1214 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1218 static av_always_inline float flt16_even(float pf)
1222 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1226 static av_always_inline float flt16_trunc(float pf)
1230 pun.i &= 0xFFFF0000U;
1234 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1237 const float a = 0.953125; // 61.0 / 64
1238 const float alpha = 0.90625; // 29.0 / 32
1243 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1244 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1246 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1248 *coef += pv * ac->sf_scale;
1250 e0 = *coef / ac->sf_scale;
1251 e1 = e0 - k1 * ps->r0;
1253 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1254 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5f * (ps->r1 * ps->r1 + e1 * e1));
1255 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1256 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5f * (ps->r0 * ps->r0 + e0 * e0));
1258 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1259 ps->r0 = flt16_trunc(a * e0);
1263 * Apply AAC-Main style frequency domain prediction.
1265 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1269 if (!sce->ics.predictor_initialized) {
1270 reset_all_predictors(sce->predictor_state);
1271 sce->ics.predictor_initialized = 1;
1274 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1275 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1276 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1277 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1278 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1281 if (sce->ics.predictor_reset_group)
1282 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1284 reset_all_predictors(sce->predictor_state);
1288 * Decode an individual_channel_stream payload; reference: table 4.44.
1290 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1291 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1293 * @return Returns error status. 0 - OK, !0 - error
1295 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1296 GetBitContext *gb, int common_window, int scale_flag)
1299 TemporalNoiseShaping *tns = &sce->tns;
1300 IndividualChannelStream *ics = &sce->ics;
1301 float *out = sce->coeffs;
1302 int global_gain, pulse_present = 0;
1304 /* This assignment is to silence a GCC warning about the variable being used
1305 * uninitialized when in fact it always is.
1307 pulse.num_pulse = 0;
1309 global_gain = get_bits(gb, 8);
1311 if (!common_window && !scale_flag) {
1312 if (decode_ics_info(ac, ics, gb, 0) < 0)
1316 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1318 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1323 if ((pulse_present = get_bits1(gb))) {
1324 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1325 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1328 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1329 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1333 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1335 if (get_bits1(gb)) {
1336 av_log_missing_feature(ac->avctx, "SSR", 1);
1341 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1344 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1345 apply_prediction(ac, sce);
1351 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1353 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1355 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1356 float *ch0 = cpe->ch[0].coeffs;
1357 float *ch1 = cpe->ch[1].coeffs;
1358 int g, i, group, idx = 0;
1359 const uint16_t *offsets = ics->swb_offset;
1360 for (g = 0; g < ics->num_window_groups; g++) {
1361 for (i = 0; i < ics->max_sfb; i++, idx++) {
1362 if (cpe->ms_mask[idx] &&
1363 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1364 for (group = 0; group < ics->group_len[g]; group++) {
1365 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1366 ch1 + group * 128 + offsets[i],
1367 offsets[i+1] - offsets[i]);
1371 ch0 += ics->group_len[g] * 128;
1372 ch1 += ics->group_len[g] * 128;
1377 * intensity stereo decoding; reference: 4.6.8.2.3
1379 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1380 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1381 * [3] reserved for scalable AAC
1383 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1385 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1386 SingleChannelElement *sce1 = &cpe->ch[1];
1387 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1388 const uint16_t *offsets = ics->swb_offset;
1389 int g, group, i, k, idx = 0;
1392 for (g = 0; g < ics->num_window_groups; g++) {
1393 for (i = 0; i < ics->max_sfb;) {
1394 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1395 const int bt_run_end = sce1->band_type_run_end[idx];
1396 for (; i < bt_run_end; i++, idx++) {
1397 c = -1 + 2 * (sce1->band_type[idx] - 14);
1399 c *= 1 - 2 * cpe->ms_mask[idx];
1400 scale = c * sce1->sf[idx];
1401 for (group = 0; group < ics->group_len[g]; group++)
1402 for (k = offsets[i]; k < offsets[i + 1]; k++)
1403 coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1406 int bt_run_end = sce1->band_type_run_end[idx];
1407 idx += bt_run_end - i;
1411 coef0 += ics->group_len[g] * 128;
1412 coef1 += ics->group_len[g] * 128;
1417 * Decode a channel_pair_element; reference: table 4.4.
1419 * @return Returns error status. 0 - OK, !0 - error
1421 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1423 int i, ret, common_window, ms_present = 0;
1425 common_window = get_bits1(gb);
1426 if (common_window) {
1427 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1429 i = cpe->ch[1].ics.use_kb_window[0];
1430 cpe->ch[1].ics = cpe->ch[0].ics;
1431 cpe->ch[1].ics.use_kb_window[1] = i;
1432 ms_present = get_bits(gb, 2);
1433 if (ms_present == 3) {
1434 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1436 } else if (ms_present)
1437 decode_mid_side_stereo(cpe, gb, ms_present);
1439 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1441 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1444 if (common_window) {
1446 apply_mid_side_stereo(ac, cpe);
1447 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1448 apply_prediction(ac, &cpe->ch[0]);
1449 apply_prediction(ac, &cpe->ch[1]);
1453 apply_intensity_stereo(cpe, ms_present);
1457 static const float cce_scale[] = {
1458 1.09050773266525765921, //2^(1/8)
1459 1.18920711500272106672, //2^(1/4)
1465 * Decode coupling_channel_element; reference: table 4.8.
1467 * @return Returns error status. 0 - OK, !0 - error
1469 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1475 SingleChannelElement *sce = &che->ch[0];
1476 ChannelCoupling *coup = &che->coup;
1478 coup->coupling_point = 2 * get_bits1(gb);
1479 coup->num_coupled = get_bits(gb, 3);
1480 for (c = 0; c <= coup->num_coupled; c++) {
1482 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1483 coup->id_select[c] = get_bits(gb, 4);
1484 if (coup->type[c] == TYPE_CPE) {
1485 coup->ch_select[c] = get_bits(gb, 2);
1486 if (coup->ch_select[c] == 3)
1489 coup->ch_select[c] = 2;
1491 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1493 sign = get_bits(gb, 1);
1494 scale = cce_scale[get_bits(gb, 2)];
1496 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1499 for (c = 0; c < num_gain; c++) {
1503 float gain_cache = 1.;
1505 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1506 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1507 gain_cache = powf(scale, -gain);
1509 if (coup->coupling_point == AFTER_IMDCT) {
1510 coup->gain[c][0] = gain_cache;
1512 for (g = 0; g < sce->ics.num_window_groups; g++) {
1513 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1514 if (sce->band_type[idx] != ZERO_BT) {
1516 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1524 gain_cache = powf(scale, -t) * s;
1527 coup->gain[c][idx] = gain_cache;
1537 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1539 * @return Returns number of bytes consumed.
1541 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1545 int num_excl_chan = 0;
1548 for (i = 0; i < 7; i++)
1549 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1550 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1552 return num_excl_chan / 7;
1556 * Decode dynamic range information; reference: table 4.52.
1558 * @param cnt length of TYPE_FIL syntactic element in bytes
1560 * @return Returns number of bytes consumed.
1562 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1563 GetBitContext *gb, int cnt)
1566 int drc_num_bands = 1;
1569 /* pce_tag_present? */
1570 if (get_bits1(gb)) {
1571 che_drc->pce_instance_tag = get_bits(gb, 4);
1572 skip_bits(gb, 4); // tag_reserved_bits
1576 /* excluded_chns_present? */
1577 if (get_bits1(gb)) {
1578 n += decode_drc_channel_exclusions(che_drc, gb);
1581 /* drc_bands_present? */
1582 if (get_bits1(gb)) {
1583 che_drc->band_incr = get_bits(gb, 4);
1584 che_drc->interpolation_scheme = get_bits(gb, 4);
1586 drc_num_bands += che_drc->band_incr;
1587 for (i = 0; i < drc_num_bands; i++) {
1588 che_drc->band_top[i] = get_bits(gb, 8);
1593 /* prog_ref_level_present? */
1594 if (get_bits1(gb)) {
1595 che_drc->prog_ref_level = get_bits(gb, 7);
1596 skip_bits1(gb); // prog_ref_level_reserved_bits
1600 for (i = 0; i < drc_num_bands; i++) {
1601 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1602 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1610 * Decode extension data (incomplete); reference: table 4.51.
1612 * @param cnt length of TYPE_FIL syntactic element in bytes
1614 * @return Returns number of bytes consumed
1616 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1617 ChannelElement *che, enum RawDataBlockType elem_type)
1621 switch (get_bits(gb, 4)) { // extension type
1622 case EXT_SBR_DATA_CRC:
1626 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1628 } else if (!ac->m4ac.sbr) {
1629 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1630 skip_bits_long(gb, 8 * cnt - 4);
1632 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1633 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1634 skip_bits_long(gb, 8 * cnt - 4);
1636 } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1639 output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1643 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1645 case EXT_DYNAMIC_RANGE:
1646 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1650 case EXT_DATA_ELEMENT:
1652 skip_bits_long(gb, 8 * cnt - 4);
1659 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1661 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1662 * @param coef spectral coefficients
1664 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1665 IndividualChannelStream *ics, int decode)
1667 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1669 int bottom, top, order, start, end, size, inc;
1670 float lpc[TNS_MAX_ORDER];
1672 for (w = 0; w < ics->num_windows; w++) {
1673 bottom = ics->num_swb;
1674 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1676 bottom = FFMAX(0, top - tns->length[w][filt]);
1677 order = tns->order[w][filt];
1682 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1684 start = ics->swb_offset[FFMIN(bottom, mmm)];
1685 end = ics->swb_offset[FFMIN( top, mmm)];
1686 if ((size = end - start) <= 0)
1688 if (tns->direction[w][filt]) {
1697 for (m = 0; m < size; m++, start += inc)
1698 for (i = 1; i <= FFMIN(m, order); i++)
1699 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1705 * Conduct IMDCT and windowing.
1707 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1709 IndividualChannelStream *ics = &sce->ics;
1710 float *in = sce->coeffs;
1711 float *out = sce->ret;
1712 float *saved = sce->saved;
1713 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1714 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1715 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1716 float *buf = ac->buf_mdct;
1717 float *temp = ac->temp;
1721 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1722 for (i = 0; i < 1024; i += 128)
1723 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1725 ff_imdct_half(&ac->mdct, buf, in);
1727 /* window overlapping
1728 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1729 * and long to short transitions are considered to be short to short
1730 * transitions. This leaves just two cases (long to long and short to short)
1731 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1733 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1734 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1735 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512);
1737 for (i = 0; i < 448; i++)
1738 out[i] = saved[i] + bias;
1740 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1741 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64);
1742 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64);
1743 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64);
1744 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64);
1745 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64);
1746 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1748 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64);
1749 for (i = 576; i < 1024; i++)
1750 out[i] = buf[i-512] + bias;
1755 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1756 for (i = 0; i < 64; i++)
1757 saved[i] = temp[64 + i] - bias;
1758 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1759 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1760 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1761 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1762 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1763 memcpy( saved, buf + 512, 448 * sizeof(float));
1764 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1765 } else { // LONG_STOP or ONLY_LONG
1766 memcpy( saved, buf + 512, 512 * sizeof(float));
1771 * Apply dependent channel coupling (applied before IMDCT).
1773 * @param index index into coupling gain array
1775 static void apply_dependent_coupling(AACContext *ac,
1776 SingleChannelElement *target,
1777 ChannelElement *cce, int index)
1779 IndividualChannelStream *ics = &cce->ch[0].ics;
1780 const uint16_t *offsets = ics->swb_offset;
1781 float *dest = target->coeffs;
1782 const float *src = cce->ch[0].coeffs;
1783 int g, i, group, k, idx = 0;
1784 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1785 av_log(ac->avctx, AV_LOG_ERROR,
1786 "Dependent coupling is not supported together with LTP\n");
1789 for (g = 0; g < ics->num_window_groups; g++) {
1790 for (i = 0; i < ics->max_sfb; i++, idx++) {
1791 if (cce->ch[0].band_type[idx] != ZERO_BT) {
1792 const float gain = cce->coup.gain[index][idx];
1793 for (group = 0; group < ics->group_len[g]; group++) {
1794 for (k = offsets[i]; k < offsets[i + 1]; k++) {
1796 dest[group * 128 + k] += gain * src[group * 128 + k];
1801 dest += ics->group_len[g] * 128;
1802 src += ics->group_len[g] * 128;
1807 * Apply independent channel coupling (applied after IMDCT).
1809 * @param index index into coupling gain array
1811 static void apply_independent_coupling(AACContext *ac,
1812 SingleChannelElement *target,
1813 ChannelElement *cce, int index)
1816 const float gain = cce->coup.gain[index][0];
1817 const float bias = ac->add_bias;
1818 const float *src = cce->ch[0].ret;
1819 float *dest = target->ret;
1820 const int len = 1024 << (ac->m4ac.sbr == 1);
1822 for (i = 0; i < len; i++)
1823 dest[i] += gain * (src[i] - bias);
1827 * channel coupling transformation interface
1829 * @param apply_coupling_method pointer to (in)dependent coupling function
1831 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1832 enum RawDataBlockType type, int elem_id,
1833 enum CouplingPoint coupling_point,
1834 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1838 for (i = 0; i < MAX_ELEM_ID; i++) {
1839 ChannelElement *cce = ac->che[TYPE_CCE][i];
1842 if (cce && cce->coup.coupling_point == coupling_point) {
1843 ChannelCoupling *coup = &cce->coup;
1845 for (c = 0; c <= coup->num_coupled; c++) {
1846 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1847 if (coup->ch_select[c] != 1) {
1848 apply_coupling_method(ac, &cc->ch[0], cce, index);
1849 if (coup->ch_select[c] != 0)
1852 if (coup->ch_select[c] != 2)
1853 apply_coupling_method(ac, &cc->ch[1], cce, index++);
1855 index += 1 + (coup->ch_select[c] == 3);
1862 * Convert spectral data to float samples, applying all supported tools as appropriate.
1864 static void spectral_to_sample(AACContext *ac)
1867 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1868 for (type = 3; type >= 0; type--) {
1869 for (i = 0; i < MAX_ELEM_ID; i++) {
1870 ChannelElement *che = ac->che[type][i];
1872 if (type <= TYPE_CPE)
1873 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1874 if (che->ch[0].tns.present)
1875 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1876 if (che->ch[1].tns.present)
1877 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1878 if (type <= TYPE_CPE)
1879 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1880 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1881 imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1882 if (type == TYPE_CPE) {
1883 imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1885 if (ac->m4ac.sbr > 0) {
1886 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1889 if (type <= TYPE_CCE)
1890 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1896 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1899 AACADTSHeaderInfo hdr_info;
1901 size = ff_aac_parse_header(gb, &hdr_info);
1903 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1904 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1905 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1906 ac->m4ac.chan_config = hdr_info.chan_config;
1907 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1909 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1911 } else if (ac->output_configured != OC_LOCKED) {
1912 ac->output_configured = OC_NONE;
1914 if (ac->output_configured != OC_LOCKED) {
1918 ac->m4ac.sample_rate = hdr_info.sample_rate;
1919 ac->m4ac.sampling_index = hdr_info.sampling_index;
1920 ac->m4ac.object_type = hdr_info.object_type;
1921 if (!ac->avctx->sample_rate)
1922 ac->avctx->sample_rate = hdr_info.sample_rate;
1923 if (hdr_info.num_aac_frames == 1) {
1924 if (!hdr_info.crc_absent)
1927 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1934 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1935 int *data_size, AVPacket *avpkt)
1937 const uint8_t *buf = avpkt->data;
1938 int buf_size = avpkt->size;
1939 AACContext *ac = avctx->priv_data;
1940 ChannelElement *che = NULL, *che_prev = NULL;
1942 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1943 int err, elem_id, data_size_tmp;
1945 int samples = 0, multiplier;
1948 init_get_bits(&gb, buf, buf_size * 8);
1950 if (show_bits(&gb, 12) == 0xfff) {
1951 if (parse_adts_frame_header(ac, &gb) < 0) {
1952 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1955 if (ac->m4ac.sampling_index > 12) {
1956 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1961 memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1963 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1964 elem_id = get_bits(&gb, 4);
1966 if (elem_type < TYPE_DSE) {
1967 if (!(che=get_che(ac, elem_type, elem_id))) {
1968 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
1969 elem_type, elem_id);
1975 switch (elem_type) {
1978 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1982 err = decode_cpe(ac, &gb, che);
1986 err = decode_cce(ac, &gb, che);
1990 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1994 err = skip_data_stream_element(ac, &gb);
1998 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1999 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2000 if ((err = decode_pce(ac, new_che_pos, &gb)))
2002 if (ac->output_configured > OC_TRIAL_PCE)
2003 av_log(avctx, AV_LOG_ERROR,
2004 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2006 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2012 elem_id += get_bits(&gb, 8) - 1;
2013 if (get_bits_left(&gb) < 8 * elem_id) {
2014 av_log(avctx, AV_LOG_ERROR, overread_err);
2018 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2019 err = 0; /* FIXME */
2023 err = -1; /* should not happen, but keeps compiler happy */
2028 elem_type_prev = elem_type;
2033 if (get_bits_left(&gb) < 3) {
2034 av_log(avctx, AV_LOG_ERROR, overread_err);
2039 spectral_to_sample(ac);
2041 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2042 samples <<= multiplier;
2043 if (ac->output_configured < OC_LOCKED) {
2044 avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2045 avctx->frame_size = samples;
2048 data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2049 if (*data_size < data_size_tmp) {
2050 av_log(avctx, AV_LOG_ERROR,
2051 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2052 *data_size, data_size_tmp);
2055 *data_size = data_size_tmp;
2058 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2060 if (ac->output_configured)
2061 ac->output_configured = OC_LOCKED;
2063 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2064 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2065 if (buf[buf_offset])
2068 return buf_size > buf_offset ? buf_consumed : buf_size;
2071 static av_cold int aac_decode_close(AVCodecContext *avctx)
2073 AACContext *ac = avctx->priv_data;
2076 for (i = 0; i < MAX_ELEM_ID; i++) {
2077 for (type = 0; type < 4; type++) {
2078 if (ac->che[type][i])
2079 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2080 av_freep(&ac->che[type][i]);
2084 ff_mdct_end(&ac->mdct);
2085 ff_mdct_end(&ac->mdct_small);
2089 AVCodec aac_decoder = {
2098 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2099 .sample_fmts = (const enum SampleFormat[]) {
2100 SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2102 .channel_layouts = aac_channel_layout,