3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
34 * N (code in SoC repo) gain control
36 * Y window shapes - standard
37 * N window shapes - Low Delay
38 * Y filterbank - standard
39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction
44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution
47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch
50 * Y quantization & coding - AAC
51 * N quantization & coding - TwinVQ
52 * N quantization & coding - BSAC
53 * N AAC Error Resilience tools
54 * N Error Resilience payload syntax
55 * N Error Protection tool
57 * N Silence Compression
60 * N Structured Audio tools
61 * N Structured Audio Sample Bank Format
63 * N Harmonic and Individual Lines plus Noise
64 * N Text-To-Speech Interface
65 * Y Spectral Band Replication
66 * Y (not in this code) Layer-1
67 * Y (not in this code) Layer-2
68 * Y (not in this code) Layer-3
69 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
71 * N Direct Stream Transfer
73 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
74 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
88 #include "aacdectab.h"
89 #include "cbrt_tablegen.h"
92 #include "mpeg4audio.h"
93 #include "aac_parser.h"
101 # include "arm/aac.h"
109 static VLC vlc_scalefactors;
110 static VLC vlc_spectral[11];
112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
116 /* Some buggy encoders appear to set all elem_ids to zero and rely on
117 channels always occurring in the same order. This is expressly forbidden
118 by the spec but we will try to work around it.
121 while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
122 if (ac->output_configured < OC_LOCKED && !err_printed) {
123 av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
128 if (elem_id == MAX_ELEM_ID)
130 ac->tags_seen_this_frame[type][elem_id] = 1;
132 if (ac->tag_che_map[type][elem_id]) {
133 return ac->tag_che_map[type][elem_id];
135 if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
138 switch (ac->m4ac.chan_config) {
140 if (ac->tags_mapped == 3 && type == TYPE_CPE) {
142 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
145 /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
146 instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
147 encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
148 if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
150 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
153 if (ac->tags_mapped == 2 && type == TYPE_CPE) {
155 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
158 if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
160 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
164 if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
166 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
167 } else if (ac->m4ac.chan_config == 2) {
171 if (!ac->tags_mapped && type == TYPE_SCE) {
173 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
181 * Check for the channel element in the current channel position configuration.
182 * If it exists, make sure the appropriate element is allocated and map the
183 * channel order to match the internal FFmpeg channel layout.
185 * @param che_pos current channel position configuration
186 * @param type channel element type
187 * @param id channel element id
188 * @param channels count of the number of channels in the configuration
190 * @return Returns error status. 0 - OK, !0 - error
192 static av_cold int che_configure(AACContext *ac,
193 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
197 if (che_pos[type][id]) {
198 if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
199 return AVERROR(ENOMEM);
200 ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
201 if (type != TYPE_CCE) {
202 ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
203 if (type == TYPE_CPE ||
204 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
205 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
209 if (ac->che[type][id])
210 ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
211 av_freep(&ac->che[type][id]);
217 * Configure output channel order based on the current program configuration element.
219 * @param che_pos current channel position configuration
220 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
222 * @return Returns error status. 0 - OK, !0 - error
224 static av_cold int output_configure(AACContext *ac,
225 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
226 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
227 int channel_config, enum OCStatus oc_type)
229 AVCodecContext *avctx = ac->avctx;
230 int i, type, channels = 0, ret;
232 if (new_che_pos != che_pos)
233 memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
235 if (channel_config) {
236 for (i = 0; i < tags_per_config[channel_config]; i++) {
237 if ((ret = che_configure(ac, che_pos,
238 aac_channel_layout_map[channel_config - 1][i][0],
239 aac_channel_layout_map[channel_config - 1][i][1],
244 memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
247 avctx->channel_layout = aac_channel_layout[channel_config - 1];
249 /* Allocate or free elements depending on if they are in the
250 * current program configuration.
252 * Set up default 1:1 output mapping.
254 * For a 5.1 stream the output order will be:
255 * [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
258 for (i = 0; i < MAX_ELEM_ID; i++) {
259 for (type = 0; type < 4; type++) {
260 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
265 memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
266 ac->tags_mapped = 4 * MAX_ELEM_ID;
268 avctx->channel_layout = 0;
271 avctx->channels = channels;
273 ac->output_configured = oc_type;
279 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
281 * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
282 * @param sce_map mono (Single Channel Element) map
283 * @param type speaker type/position for these channels
285 static void decode_channel_map(enum ChannelPosition *cpe_map,
286 enum ChannelPosition *sce_map,
287 enum ChannelPosition type,
288 GetBitContext *gb, int n)
291 enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
292 map[get_bits(gb, 4)] = type;
297 * Decode program configuration element; reference: table 4.2.
299 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
301 * @return Returns error status. 0 - OK, !0 - error
303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
306 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
309 skip_bits(gb, 2); // object_type
311 sampling_index = get_bits(gb, 4);
312 if (ac->m4ac.sampling_index != sampling_index)
313 av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
315 num_front = get_bits(gb, 4);
316 num_side = get_bits(gb, 4);
317 num_back = get_bits(gb, 4);
318 num_lfe = get_bits(gb, 2);
319 num_assoc_data = get_bits(gb, 3);
320 num_cc = get_bits(gb, 4);
323 skip_bits(gb, 4); // mono_mixdown_tag
325 skip_bits(gb, 4); // stereo_mixdown_tag
328 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
330 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
331 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
332 decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
333 decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
335 skip_bits_long(gb, 4 * num_assoc_data);
337 decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
341 /* comment field, first byte is length */
342 comment_len = get_bits(gb, 8) * 8;
343 if (get_bits_left(gb) < comment_len) {
344 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
347 skip_bits_long(gb, comment_len);
352 * Set up channel positions based on a default channel configuration
353 * as specified in table 1.17.
355 * @param new_che_pos New channel position configuration - we only do something if it differs from the current one.
357 * @return Returns error status. 0 - OK, !0 - error
359 static av_cold int set_default_channel_config(AACContext *ac,
360 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
363 if (channel_config < 1 || channel_config > 7) {
364 av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
369 /* default channel configurations:
371 * 1ch : front center (mono)
372 * 2ch : L + R (stereo)
373 * 3ch : front center + L + R
374 * 4ch : front center + L + R + back center
375 * 5ch : front center + L + R + back stereo
376 * 6ch : front center + L + R + back stereo + LFE
377 * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
380 if (channel_config != 2)
381 new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
382 if (channel_config > 1)
383 new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
384 if (channel_config == 4)
385 new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK; // back center
386 if (channel_config > 4)
387 new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
388 = AAC_CHANNEL_BACK; // back stereo
389 if (channel_config > 5)
390 new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE; // LFE
391 if (channel_config == 7)
392 new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
398 * Decode GA "General Audio" specific configuration; reference: table 4.1.
400 * @return Returns error status. 0 - OK, !0 - error
402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
405 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
406 int extension_flag, ret;
408 if (get_bits1(gb)) { // frameLengthFlag
409 av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
413 if (get_bits1(gb)) // dependsOnCoreCoder
414 skip_bits(gb, 14); // coreCoderDelay
415 extension_flag = get_bits1(gb);
417 if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
418 ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
419 skip_bits(gb, 3); // layerNr
421 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
422 if (channel_config == 0) {
423 skip_bits(gb, 4); // element_instance_tag
424 if ((ret = decode_pce(ac, new_che_pos, gb)))
427 if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
430 if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
433 if (extension_flag) {
434 switch (ac->m4ac.object_type) {
436 skip_bits(gb, 5); // numOfSubFrame
437 skip_bits(gb, 11); // layer_length
441 case AOT_ER_AAC_SCALABLE:
443 skip_bits(gb, 3); /* aacSectionDataResilienceFlag
444 * aacScalefactorDataResilienceFlag
445 * aacSpectralDataResilienceFlag
449 skip_bits1(gb); // extensionFlag3 (TBD in version 3)
455 * Decode audio specific configuration; reference: table 1.13.
457 * @param data pointer to AVCodecContext extradata
458 * @param data_size size of AVCCodecContext extradata
460 * @return Returns error status. 0 - OK, !0 - error
462 static int decode_audio_specific_config(AACContext *ac, void *data,
468 init_get_bits(&gb, data, data_size * 8);
470 if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
472 if (ac->m4ac.sampling_index > 12) {
473 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
476 if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
479 skip_bits_long(&gb, i);
481 switch (ac->m4ac.object_type) {
484 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
488 av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
489 ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
496 * linear congruential pseudorandom number generator
498 * @param previous_val pointer to the current state of the generator
500 * @return Returns a 32-bit pseudorandom integer
502 static av_always_inline int lcg_random(int previous_val)
504 return previous_val * 1664525 + 1013904223;
507 static av_always_inline void reset_predict_state(PredictorState *ps)
517 static void reset_all_predictors(PredictorState *ps)
520 for (i = 0; i < MAX_PREDICTORS; i++)
521 reset_predict_state(&ps[i]);
524 static void reset_predictor_group(PredictorState *ps, int group_num)
527 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
528 reset_predict_state(&ps[i]);
531 #define AAC_INIT_VLC_STATIC(num, size) \
532 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
533 ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
534 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
537 static av_cold int aac_decode_init(AVCodecContext *avctx)
539 AACContext *ac = avctx->priv_data;
542 ac->m4ac.sample_rate = avctx->sample_rate;
544 if (avctx->extradata_size > 0) {
545 if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
549 avctx->sample_fmt = SAMPLE_FMT_S16;
551 AAC_INIT_VLC_STATIC( 0, 304);
552 AAC_INIT_VLC_STATIC( 1, 270);
553 AAC_INIT_VLC_STATIC( 2, 550);
554 AAC_INIT_VLC_STATIC( 3, 300);
555 AAC_INIT_VLC_STATIC( 4, 328);
556 AAC_INIT_VLC_STATIC( 5, 294);
557 AAC_INIT_VLC_STATIC( 6, 306);
558 AAC_INIT_VLC_STATIC( 7, 268);
559 AAC_INIT_VLC_STATIC( 8, 510);
560 AAC_INIT_VLC_STATIC( 9, 366);
561 AAC_INIT_VLC_STATIC(10, 462);
565 dsputil_init(&ac->dsp, avctx);
567 ac->random_state = 0x1f2e3d4c;
569 // -1024 - Compensate wrong IMDCT method.
570 // 32768 - Required to scale values to the correct range for the bias method
571 // for float to int16 conversion.
573 if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
574 ac->add_bias = 385.0f;
575 ac->sf_scale = 1. / (-1024. * 32768.);
579 ac->sf_scale = 1. / -1024.;
585 INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
586 ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
587 ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
590 ff_mdct_init(&ac->mdct, 11, 1, 1.0);
591 ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
592 // window initialization
593 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
594 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
595 ff_init_ff_sine_windows(10);
596 ff_init_ff_sine_windows( 7);
604 * Skip data_stream_element; reference: table 4.10.
606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
608 int byte_align = get_bits1(gb);
609 int count = get_bits(gb, 8);
611 count += get_bits(gb, 8);
615 if (get_bits_left(gb) < 8 * count) {
616 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
619 skip_bits_long(gb, 8 * count);
623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
628 ics->predictor_reset_group = get_bits(gb, 5);
629 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
630 av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
634 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
635 ics->prediction_used[sfb] = get_bits1(gb);
641 * Decode Individual Channel Stream info; reference: table 4.6.
643 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
646 GetBitContext *gb, int common_window)
649 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
650 memset(ics, 0, sizeof(IndividualChannelStream));
653 ics->window_sequence[1] = ics->window_sequence[0];
654 ics->window_sequence[0] = get_bits(gb, 2);
655 ics->use_kb_window[1] = ics->use_kb_window[0];
656 ics->use_kb_window[0] = get_bits1(gb);
657 ics->num_window_groups = 1;
658 ics->group_len[0] = 1;
659 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
661 ics->max_sfb = get_bits(gb, 4);
662 for (i = 0; i < 7; i++) {
664 ics->group_len[ics->num_window_groups - 1]++;
666 ics->num_window_groups++;
667 ics->group_len[ics->num_window_groups - 1] = 1;
670 ics->num_windows = 8;
671 ics->swb_offset = ff_swb_offset_128[ac->m4ac.sampling_index];
672 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
673 ics->tns_max_bands = ff_tns_max_bands_128[ac->m4ac.sampling_index];
674 ics->predictor_present = 0;
676 ics->max_sfb = get_bits(gb, 6);
677 ics->num_windows = 1;
678 ics->swb_offset = ff_swb_offset_1024[ac->m4ac.sampling_index];
679 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
680 ics->tns_max_bands = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
681 ics->predictor_present = get_bits1(gb);
682 ics->predictor_reset_group = 0;
683 if (ics->predictor_present) {
684 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
685 if (decode_prediction(ac, ics, gb)) {
686 memset(ics, 0, sizeof(IndividualChannelStream));
689 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
690 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
691 memset(ics, 0, sizeof(IndividualChannelStream));
694 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
695 memset(ics, 0, sizeof(IndividualChannelStream));
701 if (ics->max_sfb > ics->num_swb) {
702 av_log(ac->avctx, AV_LOG_ERROR,
703 "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
704 ics->max_sfb, ics->num_swb);
705 memset(ics, 0, sizeof(IndividualChannelStream));
713 * Decode band types (section_data payload); reference: table 4.46.
715 * @param band_type array of the used band type
716 * @param band_type_run_end array of the last scalefactor band of a band type run
718 * @return Returns error status. 0 - OK, !0 - error
720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
721 int band_type_run_end[120], GetBitContext *gb,
722 IndividualChannelStream *ics)
725 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
726 for (g = 0; g < ics->num_window_groups; g++) {
728 while (k < ics->max_sfb) {
729 uint8_t sect_end = k;
731 int sect_band_type = get_bits(gb, 4);
732 if (sect_band_type == 12) {
733 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
736 while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
737 sect_end += sect_len_incr;
738 sect_end += sect_len_incr;
739 if (get_bits_left(gb) < 0) {
740 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
743 if (sect_end > ics->max_sfb) {
744 av_log(ac->avctx, AV_LOG_ERROR,
745 "Number of bands (%d) exceeds limit (%d).\n",
746 sect_end, ics->max_sfb);
749 for (; k < sect_end; k++) {
750 band_type [idx] = sect_band_type;
751 band_type_run_end[idx++] = sect_end;
759 * Decode scalefactors; reference: table 4.47.
761 * @param global_gain first scalefactor value as scalefactors are differentially coded
762 * @param band_type array of the used band type
763 * @param band_type_run_end array of the last scalefactor band of a band type run
764 * @param sf array of scalefactors or intensity stereo positions
766 * @return Returns error status. 0 - OK, !0 - error
768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
769 unsigned int global_gain,
770 IndividualChannelStream *ics,
771 enum BandType band_type[120],
772 int band_type_run_end[120])
774 const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
776 int offset[3] = { global_gain, global_gain - 90, 100 };
778 static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
779 for (g = 0; g < ics->num_window_groups; g++) {
780 for (i = 0; i < ics->max_sfb;) {
781 int run_end = band_type_run_end[idx];
782 if (band_type[idx] == ZERO_BT) {
783 for (; i < run_end; i++, idx++)
785 } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
786 for (; i < run_end; i++, idx++) {
787 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
788 if (offset[2] > 255U) {
789 av_log(ac->avctx, AV_LOG_ERROR,
790 "%s (%d) out of range.\n", sf_str[2], offset[2]);
793 sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
795 } else if (band_type[idx] == NOISE_BT) {
796 for (; i < run_end; i++, idx++) {
797 if (noise_flag-- > 0)
798 offset[1] += get_bits(gb, 9) - 256;
800 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
801 if (offset[1] > 255U) {
802 av_log(ac->avctx, AV_LOG_ERROR,
803 "%s (%d) out of range.\n", sf_str[1], offset[1]);
806 sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
809 for (; i < run_end; i++, idx++) {
810 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
811 if (offset[0] > 255U) {
812 av_log(ac->avctx, AV_LOG_ERROR,
813 "%s (%d) out of range.\n", sf_str[0], offset[0]);
816 sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
825 * Decode pulse data; reference: table 4.7.
827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
828 const uint16_t *swb_offset, int num_swb)
831 pulse->num_pulse = get_bits(gb, 2) + 1;
832 pulse_swb = get_bits(gb, 6);
833 if (pulse_swb >= num_swb)
835 pulse->pos[0] = swb_offset[pulse_swb];
836 pulse->pos[0] += get_bits(gb, 5);
837 if (pulse->pos[0] > 1023)
839 pulse->amp[0] = get_bits(gb, 4);
840 for (i = 1; i < pulse->num_pulse; i++) {
841 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
842 if (pulse->pos[i] > 1023)
844 pulse->amp[i] = get_bits(gb, 4);
850 * Decode Temporal Noise Shaping data; reference: table 4.48.
852 * @return Returns error status. 0 - OK, !0 - error
854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
855 GetBitContext *gb, const IndividualChannelStream *ics)
857 int w, filt, i, coef_len, coef_res, coef_compress;
858 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
859 const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
860 for (w = 0; w < ics->num_windows; w++) {
861 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
862 coef_res = get_bits1(gb);
864 for (filt = 0; filt < tns->n_filt[w]; filt++) {
866 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
868 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
869 av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
870 tns->order[w][filt], tns_max_order);
871 tns->order[w][filt] = 0;
874 if (tns->order[w][filt]) {
875 tns->direction[w][filt] = get_bits1(gb);
876 coef_compress = get_bits1(gb);
877 coef_len = coef_res + 3 - coef_compress;
878 tmp2_idx = 2 * coef_compress + coef_res;
880 for (i = 0; i < tns->order[w][filt]; i++)
881 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
890 * Decode Mid/Side data; reference: table 4.54.
892 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
893 * [1] mask is decoded from bitstream; [2] mask is all 1s;
894 * [3] reserved for scalable AAC
896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
900 if (ms_present == 1) {
901 for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
902 cpe->ms_mask[idx] = get_bits1(gb);
903 } else if (ms_present == 2) {
904 memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
913 *dst++ = v[idx & 15] * s;
914 *dst++ = v[idx>>4 & 15] * s;
920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
924 *dst++ = v[idx & 3] * s;
925 *dst++ = v[idx>>2 & 3] * s;
926 *dst++ = v[idx>>4 & 3] * s;
927 *dst++ = v[idx>>6 & 3] * s;
933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
934 unsigned sign, const float *scale)
936 union float754 s0, s1;
938 s0.f = s1.f = *scale;
939 s0.i ^= sign >> 1 << 31;
942 *dst++ = v[idx & 15] * s0.f;
943 *dst++ = v[idx>>4 & 15] * s1.f;
950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
951 unsigned sign, const float *scale)
953 unsigned nz = idx >> 12;
954 union float754 s = { .f = *scale };
957 t.i = s.i ^ (sign & 1<<31);
958 *dst++ = v[idx & 3] * t.f;
960 sign <<= nz & 1; nz >>= 1;
961 t.i = s.i ^ (sign & 1<<31);
962 *dst++ = v[idx>>2 & 3] * t.f;
964 sign <<= nz & 1; nz >>= 1;
965 t.i = s.i ^ (sign & 1<<31);
966 *dst++ = v[idx>>4 & 3] * t.f;
968 sign <<= nz & 1; nz >>= 1;
969 t.i = s.i ^ (sign & 1<<31);
970 *dst++ = v[idx>>6 & 3] * t.f;
977 * Decode spectral data; reference: table 4.50.
978 * Dequantize and scale spectral data; reference: 4.6.3.3.
980 * @param coef array of dequantized, scaled spectral data
981 * @param sf array of scalefactors or intensity stereo positions
982 * @param pulse_present set if pulses are present
983 * @param pulse pointer to pulse data struct
984 * @param band_type array of the used band type
986 * @return Returns error status. 0 - OK, !0 - error
988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
989 GetBitContext *gb, const float sf[120],
990 int pulse_present, const Pulse *pulse,
991 const IndividualChannelStream *ics,
992 enum BandType band_type[120])
994 int i, k, g, idx = 0;
995 const int c = 1024 / ics->num_windows;
996 const uint16_t *offsets = ics->swb_offset;
997 float *coef_base = coef;
1000 for (g = 0; g < ics->num_windows; g++)
1001 memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1003 for (g = 0; g < ics->num_window_groups; g++) {
1004 unsigned g_len = ics->group_len[g];
1006 for (i = 0; i < ics->max_sfb; i++, idx++) {
1007 const unsigned cbt_m1 = band_type[idx] - 1;
1008 float *cfo = coef + offsets[i];
1009 int off_len = offsets[i + 1] - offsets[i];
1012 if (cbt_m1 >= INTENSITY_BT2 - 1) {
1013 for (group = 0; group < g_len; group++, cfo+=128) {
1014 memset(cfo, 0, off_len * sizeof(float));
1016 } else if (cbt_m1 == NOISE_BT - 1) {
1017 for (group = 0; group < g_len; group++, cfo+=128) {
1021 for (k = 0; k < off_len; k++) {
1022 ac->random_state = lcg_random(ac->random_state);
1023 cfo[k] = ac->random_state;
1026 band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1027 scale = sf[idx] / sqrtf(band_energy);
1028 ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1031 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1032 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1033 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1034 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1035 OPEN_READER(re, gb);
1037 switch (cbt_m1 >> 1) {
1039 for (group = 0; group < g_len; group++, cfo+=128) {
1047 UPDATE_CACHE(re, gb);
1048 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1050 if (code >= cb_size) {
1052 goto err_cb_overflow;
1055 cb_idx = cb_vector_idx[code];
1056 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1062 for (group = 0; group < g_len; group++, cfo+=128) {
1072 UPDATE_CACHE(re, gb);
1073 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1075 if (code >= cb_size) {
1077 goto err_cb_overflow;
1080 #if MIN_CACHE_BITS < 20
1081 UPDATE_CACHE(re, gb);
1083 cb_idx = cb_vector_idx[code];
1084 nnz = cb_idx >> 8 & 15;
1085 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1086 LAST_SKIP_BITS(re, gb, nnz);
1087 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1093 for (group = 0; group < g_len; group++, cfo+=128) {
1101 UPDATE_CACHE(re, gb);
1102 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1104 if (code >= cb_size) {
1106 goto err_cb_overflow;
1109 cb_idx = cb_vector_idx[code];
1110 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1117 for (group = 0; group < g_len; group++, cfo+=128) {
1127 UPDATE_CACHE(re, gb);
1128 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1130 if (code >= cb_size) {
1132 goto err_cb_overflow;
1135 cb_idx = cb_vector_idx[code];
1136 nnz = cb_idx >> 8 & 15;
1137 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1138 LAST_SKIP_BITS(re, gb, nnz);
1139 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1145 for (group = 0; group < g_len; group++, cfo+=128) {
1147 uint32_t *icf = (uint32_t *) cf;
1157 UPDATE_CACHE(re, gb);
1158 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1166 if (code >= cb_size) {
1168 goto err_cb_overflow;
1171 cb_idx = cb_vector_idx[code];
1174 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1175 LAST_SKIP_BITS(re, gb, nnz);
1177 for (j = 0; j < 2; j++) {
1181 /* The total length of escape_sequence must be < 22 bits according
1182 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1183 UPDATE_CACHE(re, gb);
1184 b = GET_CACHE(re, gb);
1185 b = 31 - av_log2(~b);
1188 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1192 #if MIN_CACHE_BITS < 21
1193 LAST_SKIP_BITS(re, gb, b + 1);
1194 UPDATE_CACHE(re, gb);
1196 SKIP_BITS(re, gb, b + 1);
1199 n = (1 << b) + SHOW_UBITS(re, gb, b);
1200 LAST_SKIP_BITS(re, gb, b);
1201 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1204 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1205 *icf++ = (bits & 1<<31) | v;
1212 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1216 CLOSE_READER(re, gb);
1222 if (pulse_present) {
1224 for (i = 0; i < pulse->num_pulse; i++) {
1225 float co = coef_base[ pulse->pos[i] ];
1226 while (offsets[idx + 1] <= pulse->pos[i])
1228 if (band_type[idx] != NOISE_BT && sf[idx]) {
1229 float ico = -pulse->amp[i];
1232 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1234 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1241 av_log(ac->avctx, AV_LOG_ERROR,
1242 "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1243 band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1247 static av_always_inline float flt16_round(float pf)
1251 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1255 static av_always_inline float flt16_even(float pf)
1259 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1263 static av_always_inline float flt16_trunc(float pf)
1267 pun.i &= 0xFFFF0000U;
1271 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1274 const float a = 0.953125; // 61.0 / 64
1275 const float alpha = 0.90625; // 29.0 / 32
1280 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1281 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1283 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1285 *coef += pv * ac->sf_scale;
1287 e0 = *coef / ac->sf_scale;
1288 e1 = e0 - k1 * ps->r0;
1290 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1291 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1292 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1293 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1295 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1296 ps->r0 = flt16_trunc(a * e0);
1300 * Apply AAC-Main style frequency domain prediction.
1302 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1306 if (!sce->ics.predictor_initialized) {
1307 reset_all_predictors(sce->predictor_state);
1308 sce->ics.predictor_initialized = 1;
1311 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1312 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1313 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1314 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1315 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1318 if (sce->ics.predictor_reset_group)
1319 reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1321 reset_all_predictors(sce->predictor_state);
1325 * Decode an individual_channel_stream payload; reference: table 4.44.
1327 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1328 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1330 * @return Returns error status. 0 - OK, !0 - error
1332 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1333 GetBitContext *gb, int common_window, int scale_flag)
1336 TemporalNoiseShaping *tns = &sce->tns;
1337 IndividualChannelStream *ics = &sce->ics;
1338 float *out = sce->coeffs;
1339 int global_gain, pulse_present = 0;
1341 /* This assignment is to silence a GCC warning about the variable being used
1342 * uninitialized when in fact it always is.
1344 pulse.num_pulse = 0;
1346 global_gain = get_bits(gb, 8);
1348 if (!common_window && !scale_flag) {
1349 if (decode_ics_info(ac, ics, gb, 0) < 0)
1353 if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1355 if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1360 if ((pulse_present = get_bits1(gb))) {
1361 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1362 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1365 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1366 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1370 if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1372 if (get_bits1(gb)) {
1373 av_log_missing_feature(ac->avctx, "SSR", 1);
1378 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1381 if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1382 apply_prediction(ac, sce);
1388 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1390 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1392 const IndividualChannelStream *ics = &cpe->ch[0].ics;
1393 float *ch0 = cpe->ch[0].coeffs;
1394 float *ch1 = cpe->ch[1].coeffs;
1395 int g, i, group, idx = 0;
1396 const uint16_t *offsets = ics->swb_offset;
1397 for (g = 0; g < ics->num_window_groups; g++) {
1398 for (i = 0; i < ics->max_sfb; i++, idx++) {
1399 if (cpe->ms_mask[idx] &&
1400 cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1401 for (group = 0; group < ics->group_len[g]; group++) {
1402 ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1403 ch1 + group * 128 + offsets[i],
1404 offsets[i+1] - offsets[i]);
1408 ch0 += ics->group_len[g] * 128;
1409 ch1 += ics->group_len[g] * 128;
1414 * intensity stereo decoding; reference: 4.6.8.2.3
1416 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1417 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1418 * [3] reserved for scalable AAC
1420 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1422 const IndividualChannelStream *ics = &cpe->ch[1].ics;
1423 SingleChannelElement *sce1 = &cpe->ch[1];
1424 float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1425 const uint16_t *offsets = ics->swb_offset;
1426 int g, group, i, k, idx = 0;
1429 for (g = 0; g < ics->num_window_groups; g++) {
1430 for (i = 0; i < ics->max_sfb;) {
1431 if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1432 const int bt_run_end = sce1->band_type_run_end[idx];
1433 for (; i < bt_run_end; i++, idx++) {
1434 c = -1 + 2 * (sce1->band_type[idx] - 14);
1436 c *= 1 - 2 * cpe->ms_mask[idx];
1437 scale = c * sce1->sf[idx];
1438 for (group = 0; group < ics->group_len[g]; group++)
1439 for (k = offsets[i]; k < offsets[i + 1]; k++)
1440 coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1443 int bt_run_end = sce1->band_type_run_end[idx];
1444 idx += bt_run_end - i;
1448 coef0 += ics->group_len[g] * 128;
1449 coef1 += ics->group_len[g] * 128;
1454 * Decode a channel_pair_element; reference: table 4.4.
1456 * @return Returns error status. 0 - OK, !0 - error
1458 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1460 int i, ret, common_window, ms_present = 0;
1462 common_window = get_bits1(gb);
1463 if (common_window) {
1464 if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1466 i = cpe->ch[1].ics.use_kb_window[0];
1467 cpe->ch[1].ics = cpe->ch[0].ics;
1468 cpe->ch[1].ics.use_kb_window[1] = i;
1469 ms_present = get_bits(gb, 2);
1470 if (ms_present == 3) {
1471 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1473 } else if (ms_present)
1474 decode_mid_side_stereo(cpe, gb, ms_present);
1476 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1478 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1481 if (common_window) {
1483 apply_mid_side_stereo(ac, cpe);
1484 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1485 apply_prediction(ac, &cpe->ch[0]);
1486 apply_prediction(ac, &cpe->ch[1]);
1490 apply_intensity_stereo(cpe, ms_present);
1495 * Decode coupling_channel_element; reference: table 4.8.
1497 * @return Returns error status. 0 - OK, !0 - error
1499 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1505 SingleChannelElement *sce = &che->ch[0];
1506 ChannelCoupling *coup = &che->coup;
1508 coup->coupling_point = 2 * get_bits1(gb);
1509 coup->num_coupled = get_bits(gb, 3);
1510 for (c = 0; c <= coup->num_coupled; c++) {
1512 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1513 coup->id_select[c] = get_bits(gb, 4);
1514 if (coup->type[c] == TYPE_CPE) {
1515 coup->ch_select[c] = get_bits(gb, 2);
1516 if (coup->ch_select[c] == 3)
1519 coup->ch_select[c] = 2;
1521 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1523 sign = get_bits(gb, 1);
1524 scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1526 if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1529 for (c = 0; c < num_gain; c++) {
1533 float gain_cache = 1.;
1535 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1536 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1537 gain_cache = pow(scale, -gain);
1539 if (coup->coupling_point == AFTER_IMDCT) {
1540 coup->gain[c][0] = gain_cache;
1542 for (g = 0; g < sce->ics.num_window_groups; g++) {
1543 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1544 if (sce->band_type[idx] != ZERO_BT) {
1546 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1554 gain_cache = pow(scale, -t) * s;
1557 coup->gain[c][idx] = gain_cache;
1567 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1569 * @return Returns number of bytes consumed.
1571 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1575 int num_excl_chan = 0;
1578 for (i = 0; i < 7; i++)
1579 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1580 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1582 return num_excl_chan / 7;
1586 * Decode dynamic range information; reference: table 4.52.
1588 * @param cnt length of TYPE_FIL syntactic element in bytes
1590 * @return Returns number of bytes consumed.
1592 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1593 GetBitContext *gb, int cnt)
1596 int drc_num_bands = 1;
1599 /* pce_tag_present? */
1600 if (get_bits1(gb)) {
1601 che_drc->pce_instance_tag = get_bits(gb, 4);
1602 skip_bits(gb, 4); // tag_reserved_bits
1606 /* excluded_chns_present? */
1607 if (get_bits1(gb)) {
1608 n += decode_drc_channel_exclusions(che_drc, gb);
1611 /* drc_bands_present? */
1612 if (get_bits1(gb)) {
1613 che_drc->band_incr = get_bits(gb, 4);
1614 che_drc->interpolation_scheme = get_bits(gb, 4);
1616 drc_num_bands += che_drc->band_incr;
1617 for (i = 0; i < drc_num_bands; i++) {
1618 che_drc->band_top[i] = get_bits(gb, 8);
1623 /* prog_ref_level_present? */
1624 if (get_bits1(gb)) {
1625 che_drc->prog_ref_level = get_bits(gb, 7);
1626 skip_bits1(gb); // prog_ref_level_reserved_bits
1630 for (i = 0; i < drc_num_bands; i++) {
1631 che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1632 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1640 * Decode extension data (incomplete); reference: table 4.51.
1642 * @param cnt length of TYPE_FIL syntactic element in bytes
1644 * @return Returns number of bytes consumed
1646 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1647 ChannelElement *che, enum RawDataBlockType elem_type)
1651 switch (get_bits(gb, 4)) { // extension type
1652 case EXT_SBR_DATA_CRC:
1656 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1658 } else if (!ac->m4ac.sbr) {
1659 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1660 skip_bits_long(gb, 8 * cnt - 4);
1662 } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1663 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1664 skip_bits_long(gb, 8 * cnt - 4);
1666 } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1669 output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1673 res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1675 case EXT_DYNAMIC_RANGE:
1676 res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1680 case EXT_DATA_ELEMENT:
1682 skip_bits_long(gb, 8 * cnt - 4);
1689 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1691 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
1692 * @param coef spectral coefficients
1694 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1695 IndividualChannelStream *ics, int decode)
1697 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1699 int bottom, top, order, start, end, size, inc;
1700 float lpc[TNS_MAX_ORDER];
1702 for (w = 0; w < ics->num_windows; w++) {
1703 bottom = ics->num_swb;
1704 for (filt = 0; filt < tns->n_filt[w]; filt++) {
1706 bottom = FFMAX(0, top - tns->length[w][filt]);
1707 order = tns->order[w][filt];
1712 compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1714 start = ics->swb_offset[FFMIN(bottom, mmm)];
1715 end = ics->swb_offset[FFMIN( top, mmm)];
1716 if ((size = end - start) <= 0)
1718 if (tns->direction[w][filt]) {
1727 for (m = 0; m < size; m++, start += inc)
1728 for (i = 1; i <= FFMIN(m, order); i++)
1729 coef[start] -= coef[start - i * inc] * lpc[i - 1];
1735 * Conduct IMDCT and windowing.
1737 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1739 IndividualChannelStream *ics = &sce->ics;
1740 float *in = sce->coeffs;
1741 float *out = sce->ret;
1742 float *saved = sce->saved;
1743 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1744 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1745 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1746 float *buf = ac->buf_mdct;
1747 float *temp = ac->temp;
1751 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1752 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1753 av_log(ac->avctx, AV_LOG_WARNING,
1754 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1755 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1756 for (i = 0; i < 1024; i += 128)
1757 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1759 ff_imdct_half(&ac->mdct, buf, in);
1761 /* window overlapping
1762 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1763 * and long to short transitions are considered to be short to short
1764 * transitions. This leaves just two cases (long to long and short to short)
1765 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1767 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1768 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1769 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, bias, 512);
1771 for (i = 0; i < 448; i++)
1772 out[i] = saved[i] + bias;
1774 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1775 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, bias, 64);
1776 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, bias, 64);
1777 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, bias, 64);
1778 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, bias, 64);
1779 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, bias, 64);
1780 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1782 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, bias, 64);
1783 for (i = 576; i < 1024; i++)
1784 out[i] = buf[i-512] + bias;
1789 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1790 for (i = 0; i < 64; i++)
1791 saved[i] = temp[64 + i] - bias;
1792 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1793 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1794 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1795 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1796 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1797 memcpy( saved, buf + 512, 448 * sizeof(float));
1798 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1799 } else { // LONG_STOP or ONLY_LONG
1800 memcpy( saved, buf + 512, 512 * sizeof(float));
1805 * Apply dependent channel coupling (applied before IMDCT).
1807 * @param index index into coupling gain array
1809 static void apply_dependent_coupling(AACContext *ac,
1810 SingleChannelElement *target,
1811 ChannelElement *cce, int index)
1813 IndividualChannelStream *ics = &cce->ch[0].ics;
1814 const uint16_t *offsets = ics->swb_offset;
1815 float *dest = target->coeffs;
1816 const float *src = cce->ch[0].coeffs;
1817 int g, i, group, k, idx = 0;
1818 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1819 av_log(ac->avctx, AV_LOG_ERROR,
1820 "Dependent coupling is not supported together with LTP\n");
1823 for (g = 0; g < ics->num_window_groups; g++) {
1824 for (i = 0; i < ics->max_sfb; i++, idx++) {
1825 if (cce->ch[0].band_type[idx] != ZERO_BT) {
1826 const float gain = cce->coup.gain[index][idx];
1827 for (group = 0; group < ics->group_len[g]; group++) {
1828 for (k = offsets[i]; k < offsets[i + 1]; k++) {
1830 dest[group * 128 + k] += gain * src[group * 128 + k];
1835 dest += ics->group_len[g] * 128;
1836 src += ics->group_len[g] * 128;
1841 * Apply independent channel coupling (applied after IMDCT).
1843 * @param index index into coupling gain array
1845 static void apply_independent_coupling(AACContext *ac,
1846 SingleChannelElement *target,
1847 ChannelElement *cce, int index)
1850 const float gain = cce->coup.gain[index][0];
1851 const float bias = ac->add_bias;
1852 const float *src = cce->ch[0].ret;
1853 float *dest = target->ret;
1854 const int len = 1024 << (ac->m4ac.sbr == 1);
1856 for (i = 0; i < len; i++)
1857 dest[i] += gain * (src[i] - bias);
1861 * channel coupling transformation interface
1863 * @param apply_coupling_method pointer to (in)dependent coupling function
1865 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1866 enum RawDataBlockType type, int elem_id,
1867 enum CouplingPoint coupling_point,
1868 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1872 for (i = 0; i < MAX_ELEM_ID; i++) {
1873 ChannelElement *cce = ac->che[TYPE_CCE][i];
1876 if (cce && cce->coup.coupling_point == coupling_point) {
1877 ChannelCoupling *coup = &cce->coup;
1879 for (c = 0; c <= coup->num_coupled; c++) {
1880 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1881 if (coup->ch_select[c] != 1) {
1882 apply_coupling_method(ac, &cc->ch[0], cce, index);
1883 if (coup->ch_select[c] != 0)
1886 if (coup->ch_select[c] != 2)
1887 apply_coupling_method(ac, &cc->ch[1], cce, index++);
1889 index += 1 + (coup->ch_select[c] == 3);
1896 * Convert spectral data to float samples, applying all supported tools as appropriate.
1898 static void spectral_to_sample(AACContext *ac)
1901 float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1902 for (type = 3; type >= 0; type--) {
1903 for (i = 0; i < MAX_ELEM_ID; i++) {
1904 ChannelElement *che = ac->che[type][i];
1906 if (type <= TYPE_CPE)
1907 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1908 if (che->ch[0].tns.present)
1909 apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1910 if (che->ch[1].tns.present)
1911 apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1912 if (type <= TYPE_CPE)
1913 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1914 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1915 imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1916 if (type == TYPE_CPE) {
1917 imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1919 if (ac->m4ac.sbr > 0) {
1920 ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1923 if (type <= TYPE_CCE)
1924 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1930 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1933 AACADTSHeaderInfo hdr_info;
1935 size = ff_aac_parse_header(gb, &hdr_info);
1937 if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1938 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1939 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1940 ac->m4ac.chan_config = hdr_info.chan_config;
1941 if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1943 if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1945 } else if (ac->output_configured != OC_LOCKED) {
1946 ac->output_configured = OC_NONE;
1948 if (ac->output_configured != OC_LOCKED) {
1952 ac->m4ac.sample_rate = hdr_info.sample_rate;
1953 ac->m4ac.sampling_index = hdr_info.sampling_index;
1954 ac->m4ac.object_type = hdr_info.object_type;
1955 if (!ac->avctx->sample_rate)
1956 ac->avctx->sample_rate = hdr_info.sample_rate;
1957 if (hdr_info.num_aac_frames == 1) {
1958 if (!hdr_info.crc_absent)
1961 av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1968 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1969 int *data_size, AVPacket *avpkt)
1971 const uint8_t *buf = avpkt->data;
1972 int buf_size = avpkt->size;
1973 AACContext *ac = avctx->priv_data;
1974 ChannelElement *che = NULL, *che_prev = NULL;
1976 enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1977 int err, elem_id, data_size_tmp;
1979 int samples = 0, multiplier;
1982 init_get_bits(&gb, buf, buf_size * 8);
1984 if (show_bits(&gb, 12) == 0xfff) {
1985 if (parse_adts_frame_header(ac, &gb) < 0) {
1986 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1989 if (ac->m4ac.sampling_index > 12) {
1990 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1995 memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1997 while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1998 elem_id = get_bits(&gb, 4);
2000 if (elem_type < TYPE_DSE) {
2001 if (!(che=get_che(ac, elem_type, elem_id))) {
2002 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2003 elem_type, elem_id);
2009 switch (elem_type) {
2012 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2016 err = decode_cpe(ac, &gb, che);
2020 err = decode_cce(ac, &gb, che);
2024 err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2028 err = skip_data_stream_element(ac, &gb);
2032 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2033 memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2034 if ((err = decode_pce(ac, new_che_pos, &gb)))
2036 if (ac->output_configured > OC_TRIAL_PCE)
2037 av_log(avctx, AV_LOG_ERROR,
2038 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2040 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2046 elem_id += get_bits(&gb, 8) - 1;
2047 if (get_bits_left(&gb) < 8 * elem_id) {
2048 av_log(avctx, AV_LOG_ERROR, overread_err);
2052 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2053 err = 0; /* FIXME */
2057 err = -1; /* should not happen, but keeps compiler happy */
2062 elem_type_prev = elem_type;
2067 if (get_bits_left(&gb) < 3) {
2068 av_log(avctx, AV_LOG_ERROR, overread_err);
2073 spectral_to_sample(ac);
2075 multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2076 samples <<= multiplier;
2077 if (ac->output_configured < OC_LOCKED) {
2078 avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2079 avctx->frame_size = samples;
2082 data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2083 if (*data_size < data_size_tmp) {
2084 av_log(avctx, AV_LOG_ERROR,
2085 "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2086 *data_size, data_size_tmp);
2089 *data_size = data_size_tmp;
2092 ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2094 if (ac->output_configured)
2095 ac->output_configured = OC_LOCKED;
2097 buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2098 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2099 if (buf[buf_offset])
2102 return buf_size > buf_offset ? buf_consumed : buf_size;
2105 static av_cold int aac_decode_close(AVCodecContext *avctx)
2107 AACContext *ac = avctx->priv_data;
2110 for (i = 0; i < MAX_ELEM_ID; i++) {
2111 for (type = 0; type < 4; type++) {
2112 if (ac->che[type][i])
2113 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2114 av_freep(&ac->che[type][i]);
2118 ff_mdct_end(&ac->mdct);
2119 ff_mdct_end(&ac->mdct_small);
2123 AVCodec aac_decoder = {
2132 .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2133 .sample_fmts = (const enum SampleFormat[]) {
2134 SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2136 .channel_layouts = aac_channel_layout,