git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * AAC decoder
  26  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  28  */
  29
  30 /*
  31  * supported tools
  32  *
  33  * Support?             Name
  34  * N (code in SoC repo) gain control
  35  * Y                    block switching
  36  * Y                    window shapes - standard
  37  * N                    window shapes - Low Delay
  38  * Y                    filterbank - standard
  39  * N (code in SoC repo) filterbank - Scalable Sample Rate
  40  * Y                    Temporal Noise Shaping
  41  * N (code in SoC repo) Long Term Prediction
  42  * Y                    intensity stereo
  43  * Y                    channel coupling
  44  * Y                    frequency domain prediction
  45  * Y                    Perceptual Noise Substitution
  46  * Y                    Mid/Side stereo
  47  * N                    Scalable Inverse AAC Quantization
  48  * N                    Frequency Selective Switch
  49  * N                    upsampling filter
  50  * Y                    quantization & coding - AAC
  51  * N                    quantization & coding - TwinVQ
  52  * N                    quantization & coding - BSAC
  53  * N                    AAC Error Resilience tools
  54  * N                    Error Resilience payload syntax
  55  * N                    Error Protection tool
  56  * N                    CELP
  57  * N                    Silence Compression
  58  * N                    HVXC
  59  * N                    HVXC 4kbits/s VR
  60  * N                    Structured Audio tools
  61  * N                    Structured Audio Sample Bank Format
  62  * N                    MIDI
  63  * N                    Harmonic and Individual Lines plus Noise
  64  * N                    Text-To-Speech Interface
  65  * Y                    Spectral Band Replication
  66  * Y (not in this code) Layer-1
  67  * Y (not in this code) Layer-2
  68  * Y (not in this code) Layer-3
  69  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  70  * Y                    Parametric Stereo
  71  * N                    Direct Stream Transfer
  72  *
  73  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  74  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  75            Parametric Stereo.
  76  */
  77
  78
  79 #include "avcodec.h"
  80 #include "internal.h"
  81 #include "get_bits.h"
  82 #include "dsputil.h"
  83 #include "fft.h"
  84 #include "lpc.h"
  85
  86 #include "aac.h"
  87 #include "aactab.h"
  88 #include "aacdectab.h"
  89 #include "cbrt_tablegen.h"
  90 #include "sbr.h"
  91 #include "aacsbr.h"
  92 #include "mpeg4audio.h"
  93 #include "aac_parser.h"
  94
  95 #include <assert.h>
  96 #include <errno.h>
  97 #include <math.h>
  98 #include <string.h>
  99
 100 #if ARCH_ARM
 101 #   include "arm/aac.h"
 102 #endif
 103
 104 union float754 {
 105     float f;
 106     uint32_t i;
 107 };
 108
 109 static VLC vlc_scalefactors;
 110 static VLC vlc_spectral[11];
 111
 112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 113
 114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 115 {
 116     /* Some buggy encoders appear to set all elem_ids to zero and rely on
 117     channels always occurring in the same order. This is expressly forbidden
 118     by the spec but we will try to work around it.
 119     */
 120     int err_printed = 0;
 121     while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
 122         if (ac->output_configured < OC_LOCKED && !err_printed) {
 123             av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
 124             err_printed = 1;
 125         }
 126         elem_id++;
 127     }
 128     if (elem_id == MAX_ELEM_ID)
 129         return NULL;
 130     ac->tags_seen_this_frame[type][elem_id] = 1;
 131
 132     if (ac->tag_che_map[type][elem_id]) {
 133         return ac->tag_che_map[type][elem_id];
 134     }
 135     if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
 136         return NULL;
 137     }
 138     switch (ac->m4ac.chan_config) {
 139     case 7:
 140         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 141             ac->tags_mapped++;
 142             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 143         }
 144     case 6:
 145         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 146            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 147            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 148         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 151         }
 152     case 5:
 153         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 154             ac->tags_mapped++;
 155             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 156         }
 157     case 4:
 158         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 159             ac->tags_mapped++;
 160             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 161         }
 162     case 3:
 163     case 2:
 164         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 165             ac->tags_mapped++;
 166             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 167         } else if (ac->m4ac.chan_config == 2) {
 168             return NULL;
 169         }
 170     case 1:
 171         if (!ac->tags_mapped && type == TYPE_SCE) {
 172             ac->tags_mapped++;
 173             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 174         }
 175     default:
 176         return NULL;
 177     }
 178 }
 179
 180 /**
 181  * Check for the channel element in the current channel position configuration.
 182  * If it exists, make sure the appropriate element is allocated and map the
 183  * channel order to match the internal FFmpeg channel layout.
 184  *
 185  * @param   che_pos current channel position configuration
 186  * @param   type channel element type
 187  * @param   id channel element id
 188  * @param   channels count of the number of channels in the configuration
 189  *
 190  * @return  Returns error status. 0 - OK, !0 - error
 191  */
 192 static av_cold int che_configure(AACContext *ac,
 193                          enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 194                          int type, int id,
 195                          int *channels)
 196 {
 197     if (che_pos[type][id]) {
 198         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 199             return AVERROR(ENOMEM);
 200         ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
 201         if (type != TYPE_CCE) {
 202             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 203             if (type == TYPE_CPE ||
 204                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 205                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 206             }
 207         }
 208     } else {
 209         if (ac->che[type][id])
 210             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 211         av_freep(&ac->che[type][id]);
 212     }
 213     return 0;
 214 }
 215
 216 /**
 217  * Configure output channel order based on the current program configuration element.
 218  *
 219  * @param   che_pos current channel position configuration
 220  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 221  *
 222  * @return  Returns error status. 0 - OK, !0 - error
 223  */
 224 static av_cold int output_configure(AACContext *ac,
 225                             enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 226                             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 227                             int channel_config, enum OCStatus oc_type)
 228 {
 229     AVCodecContext *avctx = ac->avctx;
 230     int i, type, channels = 0, ret;
 231
 232     if (new_che_pos != che_pos)
 233     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 234
 235     if (channel_config) {
 236         for (i = 0; i < tags_per_config[channel_config]; i++) {
 237             if ((ret = che_configure(ac, che_pos,
 238                                      aac_channel_layout_map[channel_config - 1][i][0],
 239                                      aac_channel_layout_map[channel_config - 1][i][1],
 240                                      &channels)))
 241                 return ret;
 242         }
 243
 244         memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 245         ac->tags_mapped = 0;
 246
 247         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 248     } else {
 249         /* Allocate or free elements depending on if they are in the
 250          * current program configuration.
 251          *
 252          * Set up default 1:1 output mapping.
 253          *
 254          * For a 5.1 stream the output order will be:
 255          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 256          */
 257
 258         for (i = 0; i < MAX_ELEM_ID; i++) {
 259             for (type = 0; type < 4; type++) {
 260                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 261                     return ret;
 262             }
 263         }
 264
 265         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 266         ac->tags_mapped = 4 * MAX_ELEM_ID;
 267
 268         avctx->channel_layout = 0;
 269     }
 270
 271     avctx->channels = channels;
 272
 273     ac->output_configured = oc_type;
 274
 275     return 0;
 276 }
 277
 278 /**
 279  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 280  *
 281  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 282  * @param sce_map mono (Single Channel Element) map
 283  * @param type speaker type/position for these channels
 284  */
 285 static void decode_channel_map(enum ChannelPosition *cpe_map,
 286                                enum ChannelPosition *sce_map,
 287                                enum ChannelPosition type,
 288                                GetBitContext *gb, int n)
 289 {
 290     while (n--) {
 291         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 292         map[get_bits(gb, 4)] = type;
 293     }
 294 }
 295
 296 /**
 297  * Decode program configuration element; reference: table 4.2.
 298  *
 299  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 300  *
 301  * @return  Returns error status. 0 - OK, !0 - error
 302  */
 303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 304                       GetBitContext *gb)
 305 {
 306     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 307     int comment_len;
 308
 309     skip_bits(gb, 2);  // object_type
 310
 311     sampling_index = get_bits(gb, 4);
 312     if (ac->m4ac.sampling_index != sampling_index)
 313         av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 314
 315     num_front       = get_bits(gb, 4);
 316     num_side        = get_bits(gb, 4);
 317     num_back        = get_bits(gb, 4);
 318     num_lfe         = get_bits(gb, 2);
 319     num_assoc_data  = get_bits(gb, 3);
 320     num_cc          = get_bits(gb, 4);
 321
 322     if (get_bits1(gb))
 323         skip_bits(gb, 4); // mono_mixdown_tag
 324     if (get_bits1(gb))
 325         skip_bits(gb, 4); // stereo_mixdown_tag
 326
 327     if (get_bits1(gb))
 328         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 329
 330     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 331     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 332     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 333     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 334
 335     skip_bits_long(gb, 4 * num_assoc_data);
 336
 337     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 338
 339     align_get_bits(gb);
 340
 341     /* comment field, first byte is length */
 342     comment_len = get_bits(gb, 8) * 8;
 343     if (get_bits_left(gb) < comment_len) {
 344         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 345         return -1;
 346     }
 347     skip_bits_long(gb, comment_len);
 348     return 0;
 349 }
 350
 351 /**
 352  * Set up channel positions based on a default channel configuration
 353  * as specified in table 1.17.
 354  *
 355  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 356  *
 357  * @return  Returns error status. 0 - OK, !0 - error
 358  */
 359 static av_cold int set_default_channel_config(AACContext *ac,
 360                                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 361                                       int channel_config)
 362 {
 363     if (channel_config < 1 || channel_config > 7) {
 364         av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 365                channel_config);
 366         return -1;
 367     }
 368
 369     /* default channel configurations:
 370      *
 371      * 1ch : front center (mono)
 372      * 2ch : L + R (stereo)
 373      * 3ch : front center + L + R
 374      * 4ch : front center + L + R + back center
 375      * 5ch : front center + L + R + back stereo
 376      * 6ch : front center + L + R + back stereo + LFE
 377      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 378      */
 379
 380     if (channel_config != 2)
 381         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 382     if (channel_config > 1)
 383         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 384     if (channel_config == 4)
 385         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 386     if (channel_config > 4)
 387         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 388         = AAC_CHANNEL_BACK;  // back stereo
 389     if (channel_config > 5)
 390         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 391     if (channel_config == 7)
 392         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 393
 394     return 0;
 395 }
 396
 397 /**
 398  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 399  *
 400  * @return  Returns error status. 0 - OK, !0 - error
 401  */
 402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
 403                                      int channel_config)
 404 {
 405     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 406     int extension_flag, ret;
 407
 408     if (get_bits1(gb)) { // frameLengthFlag
 409         av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
 410         return -1;
 411     }
 412
 413     if (get_bits1(gb))       // dependsOnCoreCoder
 414         skip_bits(gb, 14);   // coreCoderDelay
 415     extension_flag = get_bits1(gb);
 416
 417     if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
 418         ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
 419         skip_bits(gb, 3);     // layerNr
 420
 421     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 422     if (channel_config == 0) {
 423         skip_bits(gb, 4);  // element_instance_tag
 424         if ((ret = decode_pce(ac, new_che_pos, gb)))
 425             return ret;
 426     } else {
 427         if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
 428             return ret;
 429     }
 430     if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 431         return ret;
 432
 433     if (extension_flag) {
 434         switch (ac->m4ac.object_type) {
 435         case AOT_ER_BSAC:
 436             skip_bits(gb, 5);    // numOfSubFrame
 437             skip_bits(gb, 11);   // layer_length
 438             break;
 439         case AOT_ER_AAC_LC:
 440         case AOT_ER_AAC_LTP:
 441         case AOT_ER_AAC_SCALABLE:
 442         case AOT_ER_AAC_LD:
 443             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 444                                     * aacScalefactorDataResilienceFlag
 445                                     * aacSpectralDataResilienceFlag
 446                                     */
 447             break;
 448         }
 449         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 450     }
 451     return 0;
 452 }
 453
 454 /**
 455  * Decode audio specific configuration; reference: table 1.13.
 456  *
 457  * @param   data        pointer to AVCodecContext extradata
 458  * @param   data_size   size of AVCCodecContext extradata
 459  *
 460  * @return  Returns error status. 0 - OK, !0 - error
 461  */
 462 static int decode_audio_specific_config(AACContext *ac, void *data,
 463                                         int data_size)
 464 {
 465     GetBitContext gb;
 466     int i;
 467
 468     init_get_bits(&gb, data, data_size * 8);
 469
 470     if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
 471         return -1;
 472     if (ac->m4ac.sampling_index > 12) {
 473         av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
 474         return -1;
 475     }
 476     if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
 477         ac->m4ac.ps = 1;
 478
 479     skip_bits_long(&gb, i);
 480
 481     switch (ac->m4ac.object_type) {
 482     case AOT_AAC_MAIN:
 483     case AOT_AAC_LC:
 484         if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
 485             return -1;
 486         break;
 487     default:
 488         av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 489                ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
 490         return -1;
 491     }
 492     return 0;
 493 }
 494
 495 /**
 496  * linear congruential pseudorandom number generator
 497  *
 498  * @param   previous_val    pointer to the current state of the generator
 499  *
 500  * @return  Returns a 32-bit pseudorandom integer
 501  */
 502 static av_always_inline int lcg_random(int previous_val)
 503 {
 504     return previous_val * 1664525 + 1013904223;
 505 }
 506
 507 static av_always_inline void reset_predict_state(PredictorState *ps)
 508 {
 509     ps->r0   = 0.0f;
 510     ps->r1   = 0.0f;
 511     ps->cor0 = 0.0f;
 512     ps->cor1 = 0.0f;
 513     ps->var0 = 1.0f;
 514     ps->var1 = 1.0f;
 515 }
 516
 517 static void reset_all_predictors(PredictorState *ps)
 518 {
 519     int i;
 520     for (i = 0; i < MAX_PREDICTORS; i++)
 521         reset_predict_state(&ps[i]);
 522 }
 523
 524 static void reset_predictor_group(PredictorState *ps, int group_num)
 525 {
 526     int i;
 527     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 528         reset_predict_state(&ps[i]);
 529 }
 530
 531 #define AAC_INIT_VLC_STATIC(num, size) \
 532     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 533          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 534         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 535         size);
 536
 537 static av_cold int aac_decode_init(AVCodecContext *avctx)
 538 {
 539     AACContext *ac = avctx->priv_data;
 540
 541     ac->avctx = avctx;
 542     ac->m4ac.sample_rate = avctx->sample_rate;
 543
 544     if (avctx->extradata_size > 0) {
 545         if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
 546             return -1;
 547     }
 548
 549     avctx->sample_fmt = SAMPLE_FMT_S16;
 550
 551     AAC_INIT_VLC_STATIC( 0, 304);
 552     AAC_INIT_VLC_STATIC( 1, 270);
 553     AAC_INIT_VLC_STATIC( 2, 550);
 554     AAC_INIT_VLC_STATIC( 3, 300);
 555     AAC_INIT_VLC_STATIC( 4, 328);
 556     AAC_INIT_VLC_STATIC( 5, 294);
 557     AAC_INIT_VLC_STATIC( 6, 306);
 558     AAC_INIT_VLC_STATIC( 7, 268);
 559     AAC_INIT_VLC_STATIC( 8, 510);
 560     AAC_INIT_VLC_STATIC( 9, 366);
 561     AAC_INIT_VLC_STATIC(10, 462);
 562
 563     ff_aac_sbr_init();
 564
 565     dsputil_init(&ac->dsp, avctx);
 566
 567     ac->random_state = 0x1f2e3d4c;
 568
 569     // -1024 - Compensate wrong IMDCT method.
 570     // 32768 - Required to scale values to the correct range for the bias method
 571     //         for float to int16 conversion.
 572
 573     if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
 574         ac->add_bias  = 385.0f;
 575         ac->sf_scale  = 1. / (-1024. * 32768.);
 576         ac->sf_offset = 0;
 577     } else {
 578         ac->add_bias  = 0.0f;
 579         ac->sf_scale  = 1. / -1024.;
 580         ac->sf_offset = 60;
 581     }
 582
 583     ff_aac_tableinit();
 584
 585     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 586                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 587                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 588                     352);
 589
 590     ff_mdct_init(&ac->mdct, 11, 1, 1.0);
 591     ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
 592     // window initialization
 593     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 594     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 595     ff_init_ff_sine_windows(10);
 596     ff_init_ff_sine_windows( 7);
 597
 598     cbrt_tableinit();
 599
 600     return 0;
 601 }
 602
 603 /**
 604  * Skip data_stream_element; reference: table 4.10.
 605  */
 606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 607 {
 608     int byte_align = get_bits1(gb);
 609     int count = get_bits(gb, 8);
 610     if (count == 255)
 611         count += get_bits(gb, 8);
 612     if (byte_align)
 613         align_get_bits(gb);
 614
 615     if (get_bits_left(gb) < 8 * count) {
 616         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 617         return -1;
 618     }
 619     skip_bits_long(gb, 8 * count);
 620     return 0;
 621 }
 622
 623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 624                              GetBitContext *gb)
 625 {
 626     int sfb;
 627     if (get_bits1(gb)) {
 628         ics->predictor_reset_group = get_bits(gb, 5);
 629         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 630             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 631             return -1;
 632         }
 633     }
 634     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 635         ics->prediction_used[sfb] = get_bits1(gb);
 636     }
 637     return 0;
 638 }
 639
 640 /**
 641  * Decode Individual Channel Stream info; reference: table 4.6.
 642  *
 643  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 644  */
 645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 646                            GetBitContext *gb, int common_window)
 647 {
 648     if (get_bits1(gb)) {
 649         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 650         memset(ics, 0, sizeof(IndividualChannelStream));
 651         return -1;
 652     }
 653     ics->window_sequence[1] = ics->window_sequence[0];
 654     ics->window_sequence[0] = get_bits(gb, 2);
 655     ics->use_kb_window[1]   = ics->use_kb_window[0];
 656     ics->use_kb_window[0]   = get_bits1(gb);
 657     ics->num_window_groups  = 1;
 658     ics->group_len[0]       = 1;
 659     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 660         int i;
 661         ics->max_sfb = get_bits(gb, 4);
 662         for (i = 0; i < 7; i++) {
 663             if (get_bits1(gb)) {
 664                 ics->group_len[ics->num_window_groups - 1]++;
 665             } else {
 666                 ics->num_window_groups++;
 667                 ics->group_len[ics->num_window_groups - 1] = 1;
 668             }
 669         }
 670         ics->num_windows       = 8;
 671         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 672         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 673         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 674         ics->predictor_present = 0;
 675     } else {
 676         ics->max_sfb               = get_bits(gb, 6);
 677         ics->num_windows           = 1;
 678         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 679         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 680         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 681         ics->predictor_present     = get_bits1(gb);
 682         ics->predictor_reset_group = 0;
 683         if (ics->predictor_present) {
 684             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 685                 if (decode_prediction(ac, ics, gb)) {
 686                     memset(ics, 0, sizeof(IndividualChannelStream));
 687                     return -1;
 688                 }
 689             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 690                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 691                 memset(ics, 0, sizeof(IndividualChannelStream));
 692                 return -1;
 693             } else {
 694                 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
 695                 memset(ics, 0, sizeof(IndividualChannelStream));
 696                 return -1;
 697             }
 698         }
 699     }
 700
 701     if (ics->max_sfb > ics->num_swb) {
 702         av_log(ac->avctx, AV_LOG_ERROR,
 703                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 704                ics->max_sfb, ics->num_swb);
 705         memset(ics, 0, sizeof(IndividualChannelStream));
 706         return -1;
 707     }
 708
 709     return 0;
 710 }
 711
 712 /**
 713  * Decode band types (section_data payload); reference: table 4.46.
 714  *
 715  * @param   band_type           array of the used band type
 716  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 717  *
 718  * @return  Returns error status. 0 - OK, !0 - error
 719  */
 720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 721                              int band_type_run_end[120], GetBitContext *gb,
 722                              IndividualChannelStream *ics)
 723 {
 724     int g, idx = 0;
 725     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 726     for (g = 0; g < ics->num_window_groups; g++) {
 727         int k = 0;
 728         while (k < ics->max_sfb) {
 729             uint8_t sect_end = k;
 730             int sect_len_incr;
 731             int sect_band_type = get_bits(gb, 4);
 732             if (sect_band_type == 12) {
 733                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 734                 return -1;
 735             }
 736             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 737                 sect_end += sect_len_incr;
 738             sect_end += sect_len_incr;
 739             if (get_bits_left(gb) < 0) {
 740                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 741                 return -1;
 742             }
 743             if (sect_end > ics->max_sfb) {
 744                 av_log(ac->avctx, AV_LOG_ERROR,
 745                        "Number of bands (%d) exceeds limit (%d).\n",
 746                        sect_end, ics->max_sfb);
 747                 return -1;
 748             }
 749             for (; k < sect_end; k++) {
 750                 band_type        [idx]   = sect_band_type;
 751                 band_type_run_end[idx++] = sect_end;
 752             }
 753         }
 754     }
 755     return 0;
 756 }
 757
 758 /**
 759  * Decode scalefactors; reference: table 4.47.
 760  *
 761  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 762  * @param   band_type           array of the used band type
 763  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 764  * @param   sf                  array of scalefactors or intensity stereo positions
 765  *
 766  * @return  Returns error status. 0 - OK, !0 - error
 767  */
 768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 769                                unsigned int global_gain,
 770                                IndividualChannelStream *ics,
 771                                enum BandType band_type[120],
 772                                int band_type_run_end[120])
 773 {
 774     const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
 775     int g, i, idx = 0;
 776     int offset[3] = { global_gain, global_gain - 90, 100 };
 777     int noise_flag = 1;
 778     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 779     for (g = 0; g < ics->num_window_groups; g++) {
 780         for (i = 0; i < ics->max_sfb;) {
 781             int run_end = band_type_run_end[idx];
 782             if (band_type[idx] == ZERO_BT) {
 783                 for (; i < run_end; i++, idx++)
 784                     sf[idx] = 0.;
 785             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 786                 for (; i < run_end; i++, idx++) {
 787                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 788                     if (offset[2] > 255U) {
 789                         av_log(ac->avctx, AV_LOG_ERROR,
 790                                "%s (%d) out of range.\n", sf_str[2], offset[2]);
 791                         return -1;
 792                     }
 793                     sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
 794                 }
 795             } else if (band_type[idx] == NOISE_BT) {
 796                 for (; i < run_end; i++, idx++) {
 797                     if (noise_flag-- > 0)
 798                         offset[1] += get_bits(gb, 9) - 256;
 799                     else
 800                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 801                     if (offset[1] > 255U) {
 802                         av_log(ac->avctx, AV_LOG_ERROR,
 803                                "%s (%d) out of range.\n", sf_str[1], offset[1]);
 804                         return -1;
 805                     }
 806                     sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
 807                 }
 808             } else {
 809                 for (; i < run_end; i++, idx++) {
 810                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 811                     if (offset[0] > 255U) {
 812                         av_log(ac->avctx, AV_LOG_ERROR,
 813                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 814                         return -1;
 815                     }
 816                     sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
 817                 }
 818             }
 819         }
 820     }
 821     return 0;
 822 }
 823
 824 /**
 825  * Decode pulse data; reference: table 4.7.
 826  */
 827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 828                          const uint16_t *swb_offset, int num_swb)
 829 {
 830     int i, pulse_swb;
 831     pulse->num_pulse = get_bits(gb, 2) + 1;
 832     pulse_swb        = get_bits(gb, 6);
 833     if (pulse_swb >= num_swb)
 834         return -1;
 835     pulse->pos[0]    = swb_offset[pulse_swb];
 836     pulse->pos[0]   += get_bits(gb, 5);
 837     if (pulse->pos[0] > 1023)
 838         return -1;
 839     pulse->amp[0]    = get_bits(gb, 4);
 840     for (i = 1; i < pulse->num_pulse; i++) {
 841         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 842         if (pulse->pos[i] > 1023)
 843             return -1;
 844         pulse->amp[i] = get_bits(gb, 4);
 845     }
 846     return 0;
 847 }
 848
 849 /**
 850  * Decode Temporal Noise Shaping data; reference: table 4.48.
 851  *
 852  * @return  Returns error status. 0 - OK, !0 - error
 853  */
 854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 855                       GetBitContext *gb, const IndividualChannelStream *ics)
 856 {
 857     int w, filt, i, coef_len, coef_res, coef_compress;
 858     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 859     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 860     for (w = 0; w < ics->num_windows; w++) {
 861         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 862             coef_res = get_bits1(gb);
 863
 864             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 865                 int tmp2_idx;
 866                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 867
 868                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 869                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 870                            tns->order[w][filt], tns_max_order);
 871                     tns->order[w][filt] = 0;
 872                     return -1;
 873                 }
 874                 if (tns->order[w][filt]) {
 875                     tns->direction[w][filt] = get_bits1(gb);
 876                     coef_compress = get_bits1(gb);
 877                     coef_len = coef_res + 3 - coef_compress;
 878                     tmp2_idx = 2 * coef_compress + coef_res;
 879
 880                     for (i = 0; i < tns->order[w][filt]; i++)
 881                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 882                 }
 883             }
 884         }
 885     }
 886     return 0;
 887 }
 888
 889 /**
 890  * Decode Mid/Side data; reference: table 4.54.
 891  *
 892  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 893  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 894  *                      [3] reserved for scalable AAC
 895  */
 896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 897                                    int ms_present)
 898 {
 899     int idx;
 900     if (ms_present == 1) {
 901         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 902             cpe->ms_mask[idx] = get_bits1(gb);
 903     } else if (ms_present == 2) {
 904         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 905     }
 906 }
 907
 908 #ifndef VMUL2
 909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 910                            const float *scale)
 911 {
 912     float s = *scale;
 913     *dst++ = v[idx    & 15] * s;
 914     *dst++ = v[idx>>4 & 15] * s;
 915     return dst;
 916 }
 917 #endif
 918
 919 #ifndef VMUL4
 920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 921                            const float *scale)
 922 {
 923     float s = *scale;
 924     *dst++ = v[idx    & 3] * s;
 925     *dst++ = v[idx>>2 & 3] * s;
 926     *dst++ = v[idx>>4 & 3] * s;
 927     *dst++ = v[idx>>6 & 3] * s;
 928     return dst;
 929 }
 930 #endif
 931
 932 #ifndef VMUL2S
 933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 934                             unsigned sign, const float *scale)
 935 {
 936     union float754 s0, s1;
 937
 938     s0.f = s1.f = *scale;
 939     s0.i ^= sign >> 1 << 31;
 940     s1.i ^= sign      << 31;
 941
 942     *dst++ = v[idx    & 15] * s0.f;
 943     *dst++ = v[idx>>4 & 15] * s1.f;
 944
 945     return dst;
 946 }
 947 #endif
 948
 949 #ifndef VMUL4S
 950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 951                             unsigned sign, const float *scale)
 952 {
 953     unsigned nz = idx >> 12;
 954     union float754 s = { .f = *scale };
 955     union float754 t;
 956
 957     t.i = s.i ^ (sign & 1<<31);
 958     *dst++ = v[idx    & 3] * t.f;
 959
 960     sign <<= nz & 1; nz >>= 1;
 961     t.i = s.i ^ (sign & 1<<31);
 962     *dst++ = v[idx>>2 & 3] * t.f;
 963
 964     sign <<= nz & 1; nz >>= 1;
 965     t.i = s.i ^ (sign & 1<<31);
 966     *dst++ = v[idx>>4 & 3] * t.f;
 967
 968     sign <<= nz & 1; nz >>= 1;
 969     t.i = s.i ^ (sign & 1<<31);
 970     *dst++ = v[idx>>6 & 3] * t.f;
 971
 972     return dst;
 973 }
 974 #endif
 975
 976 /**
 977  * Decode spectral data; reference: table 4.50.
 978  * Dequantize and scale spectral data; reference: 4.6.3.3.
 979  *
 980  * @param   coef            array of dequantized, scaled spectral data
 981  * @param   sf              array of scalefactors or intensity stereo positions
 982  * @param   pulse_present   set if pulses are present
 983  * @param   pulse           pointer to pulse data struct
 984  * @param   band_type       array of the used band type
 985  *
 986  * @return  Returns error status. 0 - OK, !0 - error
 987  */
 988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 989                                        GetBitContext *gb, const float sf[120],
 990                                        int pulse_present, const Pulse *pulse,
 991                                        const IndividualChannelStream *ics,
 992                                        enum BandType band_type[120])
 993 {
 994     int i, k, g, idx = 0;
 995     const int c = 1024 / ics->num_windows;
 996     const uint16_t *offsets = ics->swb_offset;
 997     float *coef_base = coef;
 998     int err_idx;
 999
1000     for (g = 0; g < ics->num_windows; g++)
1001         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1002
1003     for (g = 0; g < ics->num_window_groups; g++) {
1004         unsigned g_len = ics->group_len[g];
1005
1006         for (i = 0; i < ics->max_sfb; i++, idx++) {
1007             const unsigned cbt_m1 = band_type[idx] - 1;
1008             float *cfo = coef + offsets[i];
1009             int off_len = offsets[i + 1] - offsets[i];
1010             int group;
1011
1012             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1013                 for (group = 0; group < g_len; group++, cfo+=128) {
1014                     memset(cfo, 0, off_len * sizeof(float));
1015                 }
1016             } else if (cbt_m1 == NOISE_BT - 1) {
1017                 for (group = 0; group < g_len; group++, cfo+=128) {
1018                     float scale;
1019                     float band_energy;
1020
1021                     for (k = 0; k < off_len; k++) {
1022                         ac->random_state  = lcg_random(ac->random_state);
1023                         cfo[k] = ac->random_state;
1024                     }
1025
1026                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1027                     scale = sf[idx] / sqrtf(band_energy);
1028                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1029                 }
1030             } else {
1031                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1032                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1033                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1034                 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1035                 OPEN_READER(re, gb);
1036
1037                 switch (cbt_m1 >> 1) {
1038                 case 0:
1039                     for (group = 0; group < g_len; group++, cfo+=128) {
1040                         float *cf = cfo;
1041                         int len = off_len;
1042
1043                         do {
1044                             int code;
1045                             unsigned cb_idx;
1046
1047                             UPDATE_CACHE(re, gb);
1048                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1049
1050                             if (code >= cb_size) {
1051                                 err_idx = code;
1052                                 goto err_cb_overflow;
1053                             }
1054
1055                             cb_idx = cb_vector_idx[code];
1056                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1057                         } while (len -= 4);
1058                     }
1059                     break;
1060
1061                 case 1:
1062                     for (group = 0; group < g_len; group++, cfo+=128) {
1063                         float *cf = cfo;
1064                         int len = off_len;
1065
1066                         do {
1067                             int code;
1068                             unsigned nnz;
1069                             unsigned cb_idx;
1070                             uint32_t bits;
1071
1072                             UPDATE_CACHE(re, gb);
1073                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1074
1075                             if (code >= cb_size) {
1076                                 err_idx = code;
1077                                 goto err_cb_overflow;
1078                             }
1079
1080 #if MIN_CACHE_BITS < 20
1081                             UPDATE_CACHE(re, gb);
1082 #endif
1083                             cb_idx = cb_vector_idx[code];
1084                             nnz = cb_idx >> 8 & 15;
1085                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1086                             LAST_SKIP_BITS(re, gb, nnz);
1087                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1088                         } while (len -= 4);
1089                     }
1090                     break;
1091
1092                 case 2:
1093                     for (group = 0; group < g_len; group++, cfo+=128) {
1094                         float *cf = cfo;
1095                         int len = off_len;
1096
1097                         do {
1098                             int code;
1099                             unsigned cb_idx;
1100
1101                             UPDATE_CACHE(re, gb);
1102                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1103
1104                             if (code >= cb_size) {
1105                                 err_idx = code;
1106                                 goto err_cb_overflow;
1107                             }
1108
1109                             cb_idx = cb_vector_idx[code];
1110                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1111                         } while (len -= 2);
1112                     }
1113                     break;
1114
1115                 case 3:
1116                 case 4:
1117                     for (group = 0; group < g_len; group++, cfo+=128) {
1118                         float *cf = cfo;
1119                         int len = off_len;
1120
1121                         do {
1122                             int code;
1123                             unsigned nnz;
1124                             unsigned cb_idx;
1125                             unsigned sign;
1126
1127                             UPDATE_CACHE(re, gb);
1128                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1129
1130                             if (code >= cb_size) {
1131                                 err_idx = code;
1132                                 goto err_cb_overflow;
1133                             }
1134
1135                             cb_idx = cb_vector_idx[code];
1136                             nnz = cb_idx >> 8 & 15;
1137                             sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1138                             LAST_SKIP_BITS(re, gb, nnz);
1139                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1140                         } while (len -= 2);
1141                     }
1142                     break;
1143
1144                 default:
1145                     for (group = 0; group < g_len; group++, cfo+=128) {
1146                         float *cf = cfo;
1147                         uint32_t *icf = (uint32_t *) cf;
1148                         int len = off_len;
1149
1150                         do {
1151                             int code;
1152                             unsigned nzt, nnz;
1153                             unsigned cb_idx;
1154                             uint32_t bits;
1155                             int j;
1156
1157                             UPDATE_CACHE(re, gb);
1158                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1159
1160                             if (!code) {
1161                                 *icf++ = 0;
1162                                 *icf++ = 0;
1163                                 continue;
1164                             }
1165
1166                             if (code >= cb_size) {
1167                                 err_idx = code;
1168                                 goto err_cb_overflow;
1169                             }
1170
1171                             cb_idx = cb_vector_idx[code];
1172                             nnz = cb_idx >> 12;
1173                             nzt = cb_idx >> 8;
1174                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1175                             LAST_SKIP_BITS(re, gb, nnz);
1176
1177                             for (j = 0; j < 2; j++) {
1178                                 if (nzt & 1<<j) {
1179                                     uint32_t b;
1180                                     int n;
1181                                     /* The total length of escape_sequence must be < 22 bits according
1182                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1183                                     UPDATE_CACHE(re, gb);
1184                                     b = GET_CACHE(re, gb);
1185                                     b = 31 - av_log2(~b);
1186
1187                                     if (b > 8) {
1188                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1189                                         return -1;
1190                                     }
1191
1192 #if MIN_CACHE_BITS < 21
1193                                     LAST_SKIP_BITS(re, gb, b + 1);
1194                                     UPDATE_CACHE(re, gb);
1195 #else
1196                                     SKIP_BITS(re, gb, b + 1);
1197 #endif
1198                                     b += 4;
1199                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1200                                     LAST_SKIP_BITS(re, gb, b);
1201                                     *icf++ = cbrt_tab[n] | (bits & 1<<31);
1202                                     bits <<= 1;
1203                                 } else {
1204                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1205                                     *icf++ = (bits & 1<<31) | v;
1206                                     bits <<= !!v;
1207                                 }
1208                                 cb_idx >>= 4;
1209                             }
1210                         } while (len -= 2);
1211
1212                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1213                     }
1214                 }
1215
1216                 CLOSE_READER(re, gb);
1217             }
1218         }
1219         coef += g_len << 7;
1220     }
1221
1222     if (pulse_present) {
1223         idx = 0;
1224         for (i = 0; i < pulse->num_pulse; i++) {
1225             float co = coef_base[ pulse->pos[i] ];
1226             while (offsets[idx + 1] <= pulse->pos[i])
1227                 idx++;
1228             if (band_type[idx] != NOISE_BT && sf[idx]) {
1229                 float ico = -pulse->amp[i];
1230                 if (co) {
1231                     co /= sf[idx];
1232                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1233                 }
1234                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1235             }
1236         }
1237     }
1238     return 0;
1239
1240 err_cb_overflow:
1241     av_log(ac->avctx, AV_LOG_ERROR,
1242            "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1243            band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1244     return -1;
1245 }
1246
1247 static av_always_inline float flt16_round(float pf)
1248 {
1249     union float754 tmp;
1250     tmp.f = pf;
1251     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1252     return tmp.f;
1253 }
1254
1255 static av_always_inline float flt16_even(float pf)
1256 {
1257     union float754 tmp;
1258     tmp.f = pf;
1259     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1260     return tmp.f;
1261 }
1262
1263 static av_always_inline float flt16_trunc(float pf)
1264 {
1265     union float754 pun;
1266     pun.f = pf;
1267     pun.i &= 0xFFFF0000U;
1268     return pun.f;
1269 }
1270
1271 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1272                     int output_enable)
1273 {
1274     const float a     = 0.953125; // 61.0 / 64
1275     const float alpha = 0.90625;  // 29.0 / 32
1276     float e0, e1;
1277     float pv;
1278     float k1, k2;
1279
1280     k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1281     k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1282
1283     pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1284     if (output_enable)
1285         *coef += pv * ac->sf_scale;
1286
1287     e0 = *coef / ac->sf_scale;
1288     e1 = e0 - k1 * ps->r0;
1289
1290     ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1291     ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1292     ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1293     ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1294
1295     ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1296     ps->r0 = flt16_trunc(a * e0);
1297 }
1298
1299 /**
1300  * Apply AAC-Main style frequency domain prediction.
1301  */
1302 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1303 {
1304     int sfb, k;
1305
1306     if (!sce->ics.predictor_initialized) {
1307         reset_all_predictors(sce->predictor_state);
1308         sce->ics.predictor_initialized = 1;
1309     }
1310
1311     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1312         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1313             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1314                 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1315                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1316             }
1317         }
1318         if (sce->ics.predictor_reset_group)
1319             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1320     } else
1321         reset_all_predictors(sce->predictor_state);
1322 }
1323
1324 /**
1325  * Decode an individual_channel_stream payload; reference: table 4.44.
1326  *
1327  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1328  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1329  *
1330  * @return  Returns error status. 0 - OK, !0 - error
1331  */
1332 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1333                       GetBitContext *gb, int common_window, int scale_flag)
1334 {
1335     Pulse pulse;
1336     TemporalNoiseShaping    *tns = &sce->tns;
1337     IndividualChannelStream *ics = &sce->ics;
1338     float *out = sce->coeffs;
1339     int global_gain, pulse_present = 0;
1340
1341     /* This assignment is to silence a GCC warning about the variable being used
1342      * uninitialized when in fact it always is.
1343      */
1344     pulse.num_pulse = 0;
1345
1346     global_gain = get_bits(gb, 8);
1347
1348     if (!common_window && !scale_flag) {
1349         if (decode_ics_info(ac, ics, gb, 0) < 0)
1350             return -1;
1351     }
1352
1353     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1354         return -1;
1355     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1356         return -1;
1357
1358     pulse_present = 0;
1359     if (!scale_flag) {
1360         if ((pulse_present = get_bits1(gb))) {
1361             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1362                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1363                 return -1;
1364             }
1365             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1366                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1367                 return -1;
1368             }
1369         }
1370         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1371             return -1;
1372         if (get_bits1(gb)) {
1373             av_log_missing_feature(ac->avctx, "SSR", 1);
1374             return -1;
1375         }
1376     }
1377
1378     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1379         return -1;
1380
1381     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1382         apply_prediction(ac, sce);
1383
1384     return 0;
1385 }
1386
1387 /**
1388  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1389  */
1390 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1391 {
1392     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1393     float *ch0 = cpe->ch[0].coeffs;
1394     float *ch1 = cpe->ch[1].coeffs;
1395     int g, i, group, idx = 0;
1396     const uint16_t *offsets = ics->swb_offset;
1397     for (g = 0; g < ics->num_window_groups; g++) {
1398         for (i = 0; i < ics->max_sfb; i++, idx++) {
1399             if (cpe->ms_mask[idx] &&
1400                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1401                 for (group = 0; group < ics->group_len[g]; group++) {
1402                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1403                                               ch1 + group * 128 + offsets[i],
1404                                               offsets[i+1] - offsets[i]);
1405                 }
1406             }
1407         }
1408         ch0 += ics->group_len[g] * 128;
1409         ch1 += ics->group_len[g] * 128;
1410     }
1411 }
1412
1413 /**
1414  * intensity stereo decoding; reference: 4.6.8.2.3
1415  *
1416  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1417  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1418  *                      [3] reserved for scalable AAC
1419  */
1420 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1421 {
1422     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1423     SingleChannelElement         *sce1 = &cpe->ch[1];
1424     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1425     const uint16_t *offsets = ics->swb_offset;
1426     int g, group, i, k, idx = 0;
1427     int c;
1428     float scale;
1429     for (g = 0; g < ics->num_window_groups; g++) {
1430         for (i = 0; i < ics->max_sfb;) {
1431             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1432                 const int bt_run_end = sce1->band_type_run_end[idx];
1433                 for (; i < bt_run_end; i++, idx++) {
1434                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1435                     if (ms_present)
1436                         c *= 1 - 2 * cpe->ms_mask[idx];
1437                     scale = c * sce1->sf[idx];
1438                     for (group = 0; group < ics->group_len[g]; group++)
1439                         for (k = offsets[i]; k < offsets[i + 1]; k++)
1440                             coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1441                 }
1442             } else {
1443                 int bt_run_end = sce1->band_type_run_end[idx];
1444                 idx += bt_run_end - i;
1445                 i    = bt_run_end;
1446             }
1447         }
1448         coef0 += ics->group_len[g] * 128;
1449         coef1 += ics->group_len[g] * 128;
1450     }
1451 }
1452
1453 /**
1454  * Decode a channel_pair_element; reference: table 4.4.
1455  *
1456  * @return  Returns error status. 0 - OK, !0 - error
1457  */
1458 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1459 {
1460     int i, ret, common_window, ms_present = 0;
1461
1462     common_window = get_bits1(gb);
1463     if (common_window) {
1464         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1465             return -1;
1466         i = cpe->ch[1].ics.use_kb_window[0];
1467         cpe->ch[1].ics = cpe->ch[0].ics;
1468         cpe->ch[1].ics.use_kb_window[1] = i;
1469         ms_present = get_bits(gb, 2);
1470         if (ms_present == 3) {
1471             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1472             return -1;
1473         } else if (ms_present)
1474             decode_mid_side_stereo(cpe, gb, ms_present);
1475     }
1476     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1477         return ret;
1478     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1479         return ret;
1480
1481     if (common_window) {
1482         if (ms_present)
1483             apply_mid_side_stereo(ac, cpe);
1484         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1485             apply_prediction(ac, &cpe->ch[0]);
1486             apply_prediction(ac, &cpe->ch[1]);
1487         }
1488     }
1489
1490     apply_intensity_stereo(cpe, ms_present);
1491     return 0;
1492 }
1493
1494 /**
1495  * Decode coupling_channel_element; reference: table 4.8.
1496  *
1497  * @return  Returns error status. 0 - OK, !0 - error
1498  */
1499 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1500 {
1501     int num_gain = 0;
1502     int c, g, sfb, ret;
1503     int sign;
1504     float scale;
1505     SingleChannelElement *sce = &che->ch[0];
1506     ChannelCoupling     *coup = &che->coup;
1507
1508     coup->coupling_point = 2 * get_bits1(gb);
1509     coup->num_coupled = get_bits(gb, 3);
1510     for (c = 0; c <= coup->num_coupled; c++) {
1511         num_gain++;
1512         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1513         coup->id_select[c] = get_bits(gb, 4);
1514         if (coup->type[c] == TYPE_CPE) {
1515             coup->ch_select[c] = get_bits(gb, 2);
1516             if (coup->ch_select[c] == 3)
1517                 num_gain++;
1518         } else
1519             coup->ch_select[c] = 2;
1520     }
1521     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1522
1523     sign  = get_bits(gb, 1);
1524     scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1525
1526     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1527         return ret;
1528
1529     for (c = 0; c < num_gain; c++) {
1530         int idx  = 0;
1531         int cge  = 1;
1532         int gain = 0;
1533         float gain_cache = 1.;
1534         if (c) {
1535             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1536             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1537             gain_cache = pow(scale, -gain);
1538         }
1539         if (coup->coupling_point == AFTER_IMDCT) {
1540             coup->gain[c][0] = gain_cache;
1541         } else {
1542             for (g = 0; g < sce->ics.num_window_groups; g++) {
1543                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1544                     if (sce->band_type[idx] != ZERO_BT) {
1545                         if (!cge) {
1546                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1547                             if (t) {
1548                                 int s = 1;
1549                                 t = gain += t;
1550                                 if (sign) {
1551                                     s  -= 2 * (t & 0x1);
1552                                     t >>= 1;
1553                                 }
1554                                 gain_cache = pow(scale, -t) * s;
1555                             }
1556                         }
1557                         coup->gain[c][idx] = gain_cache;
1558                     }
1559                 }
1560             }
1561         }
1562     }
1563     return 0;
1564 }
1565
1566 /**
1567  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1568  *
1569  * @return  Returns number of bytes consumed.
1570  */
1571 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1572                                          GetBitContext *gb)
1573 {
1574     int i;
1575     int num_excl_chan = 0;
1576
1577     do {
1578         for (i = 0; i < 7; i++)
1579             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1580     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1581
1582     return num_excl_chan / 7;
1583 }
1584
1585 /**
1586  * Decode dynamic range information; reference: table 4.52.
1587  *
1588  * @param   cnt length of TYPE_FIL syntactic element in bytes
1589  *
1590  * @return  Returns number of bytes consumed.
1591  */
1592 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1593                                 GetBitContext *gb, int cnt)
1594 {
1595     int n             = 1;
1596     int drc_num_bands = 1;
1597     int i;
1598
1599     /* pce_tag_present? */
1600     if (get_bits1(gb)) {
1601         che_drc->pce_instance_tag  = get_bits(gb, 4);
1602         skip_bits(gb, 4); // tag_reserved_bits
1603         n++;
1604     }
1605
1606     /* excluded_chns_present? */
1607     if (get_bits1(gb)) {
1608         n += decode_drc_channel_exclusions(che_drc, gb);
1609     }
1610
1611     /* drc_bands_present? */
1612     if (get_bits1(gb)) {
1613         che_drc->band_incr            = get_bits(gb, 4);
1614         che_drc->interpolation_scheme = get_bits(gb, 4);
1615         n++;
1616         drc_num_bands += che_drc->band_incr;
1617         for (i = 0; i < drc_num_bands; i++) {
1618             che_drc->band_top[i] = get_bits(gb, 8);
1619             n++;
1620         }
1621     }
1622
1623     /* prog_ref_level_present? */
1624     if (get_bits1(gb)) {
1625         che_drc->prog_ref_level = get_bits(gb, 7);
1626         skip_bits1(gb); // prog_ref_level_reserved_bits
1627         n++;
1628     }
1629
1630     for (i = 0; i < drc_num_bands; i++) {
1631         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1632         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1633         n++;
1634     }
1635
1636     return n;
1637 }
1638
1639 /**
1640  * Decode extension data (incomplete); reference: table 4.51.
1641  *
1642  * @param   cnt length of TYPE_FIL syntactic element in bytes
1643  *
1644  * @return Returns number of bytes consumed
1645  */
1646 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1647                                     ChannelElement *che, enum RawDataBlockType elem_type)
1648 {
1649     int crc_flag = 0;
1650     int res = cnt;
1651     switch (get_bits(gb, 4)) { // extension type
1652     case EXT_SBR_DATA_CRC:
1653         crc_flag++;
1654     case EXT_SBR_DATA:
1655         if (!che) {
1656             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1657             return res;
1658         } else if (!ac->m4ac.sbr) {
1659             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1660             skip_bits_long(gb, 8 * cnt - 4);
1661             return res;
1662         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1663             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1664             skip_bits_long(gb, 8 * cnt - 4);
1665             return res;
1666         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1667             ac->m4ac.sbr = 1;
1668             ac->m4ac.ps = 1;
1669             output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1670         } else {
1671             ac->m4ac.sbr = 1;
1672         }
1673         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1674         break;
1675     case EXT_DYNAMIC_RANGE:
1676         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1677         break;
1678     case EXT_FILL:
1679     case EXT_FILL_DATA:
1680     case EXT_DATA_ELEMENT:
1681     default:
1682         skip_bits_long(gb, 8 * cnt - 4);
1683         break;
1684     };
1685     return res;
1686 }
1687
1688 /**
1689  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1690  *
1691  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1692  * @param   coef    spectral coefficients
1693  */
1694 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1695                       IndividualChannelStream *ics, int decode)
1696 {
1697     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1698     int w, filt, m, i;
1699     int bottom, top, order, start, end, size, inc;
1700     float lpc[TNS_MAX_ORDER];
1701
1702     for (w = 0; w < ics->num_windows; w++) {
1703         bottom = ics->num_swb;
1704         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1705             top    = bottom;
1706             bottom = FFMAX(0, top - tns->length[w][filt]);
1707             order  = tns->order[w][filt];
1708             if (order == 0)
1709                 continue;
1710
1711             // tns_decode_coef
1712             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1713
1714             start = ics->swb_offset[FFMIN(bottom, mmm)];
1715             end   = ics->swb_offset[FFMIN(   top, mmm)];
1716             if ((size = end - start) <= 0)
1717                 continue;
1718             if (tns->direction[w][filt]) {
1719                 inc = -1;
1720                 start = end - 1;
1721             } else {
1722                 inc = 1;
1723             }
1724             start += w * 128;
1725
1726             // ar filter
1727             for (m = 0; m < size; m++, start += inc)
1728                 for (i = 1; i <= FFMIN(m, order); i++)
1729                     coef[start] -= coef[start - i * inc] * lpc[i - 1];
1730         }
1731     }
1732 }
1733
1734 /**
1735  * Conduct IMDCT and windowing.
1736  */
1737 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1738 {
1739     IndividualChannelStream *ics = &sce->ics;
1740     float *in    = sce->coeffs;
1741     float *out   = sce->ret;
1742     float *saved = sce->saved;
1743     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1744     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1745     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1746     float *buf  = ac->buf_mdct;
1747     float *temp = ac->temp;
1748     int i;
1749
1750     // imdct
1751     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1752         if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1753             av_log(ac->avctx, AV_LOG_WARNING,
1754                    "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1755                    "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1756         for (i = 0; i < 1024; i += 128)
1757             ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1758     } else
1759         ff_imdct_half(&ac->mdct, buf, in);
1760
1761     /* window overlapping
1762      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1763      * and long to short transitions are considered to be short to short
1764      * transitions. This leaves just two cases (long to long and short to short)
1765      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1766      */
1767     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1768             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1769         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
1770     } else {
1771         for (i = 0; i < 448; i++)
1772             out[i] = saved[i] + bias;
1773
1774         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1775             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
1776             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
1777             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
1778             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
1779             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
1780             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1781         } else {
1782             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
1783             for (i = 576; i < 1024; i++)
1784                 out[i] = buf[i-512] + bias;
1785         }
1786     }
1787
1788     // buffer update
1789     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1790         for (i = 0; i < 64; i++)
1791             saved[i] = temp[64 + i] - bias;
1792         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1793         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1794         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1795         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1796     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1797         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1798         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1799     } else { // LONG_STOP or ONLY_LONG
1800         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1801     }
1802 }
1803
1804 /**
1805  * Apply dependent channel coupling (applied before IMDCT).
1806  *
1807  * @param   index   index into coupling gain array
1808  */
1809 static void apply_dependent_coupling(AACContext *ac,
1810                                      SingleChannelElement *target,
1811                                      ChannelElement *cce, int index)
1812 {
1813     IndividualChannelStream *ics = &cce->ch[0].ics;
1814     const uint16_t *offsets = ics->swb_offset;
1815     float *dest = target->coeffs;
1816     const float *src = cce->ch[0].coeffs;
1817     int g, i, group, k, idx = 0;
1818     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1819         av_log(ac->avctx, AV_LOG_ERROR,
1820                "Dependent coupling is not supported together with LTP\n");
1821         return;
1822     }
1823     for (g = 0; g < ics->num_window_groups; g++) {
1824         for (i = 0; i < ics->max_sfb; i++, idx++) {
1825             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1826                 const float gain = cce->coup.gain[index][idx];
1827                 for (group = 0; group < ics->group_len[g]; group++) {
1828                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1829                         // XXX dsputil-ize
1830                         dest[group * 128 + k] += gain * src[group * 128 + k];
1831                     }
1832                 }
1833             }
1834         }
1835         dest += ics->group_len[g] * 128;
1836         src  += ics->group_len[g] * 128;
1837     }
1838 }
1839
1840 /**
1841  * Apply independent channel coupling (applied after IMDCT).
1842  *
1843  * @param   index   index into coupling gain array
1844  */
1845 static void apply_independent_coupling(AACContext *ac,
1846                                        SingleChannelElement *target,
1847                                        ChannelElement *cce, int index)
1848 {
1849     int i;
1850     const float gain = cce->coup.gain[index][0];
1851     const float bias = ac->add_bias;
1852     const float *src = cce->ch[0].ret;
1853     float *dest = target->ret;
1854     const int len = 1024 << (ac->m4ac.sbr == 1);
1855
1856     for (i = 0; i < len; i++)
1857         dest[i] += gain * (src[i] - bias);
1858 }
1859
1860 /**
1861  * channel coupling transformation interface
1862  *
1863  * @param   apply_coupling_method   pointer to (in)dependent coupling function
1864  */
1865 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1866                                    enum RawDataBlockType type, int elem_id,
1867                                    enum CouplingPoint coupling_point,
1868                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1869 {
1870     int i, c;
1871
1872     for (i = 0; i < MAX_ELEM_ID; i++) {
1873         ChannelElement *cce = ac->che[TYPE_CCE][i];
1874         int index = 0;
1875
1876         if (cce && cce->coup.coupling_point == coupling_point) {
1877             ChannelCoupling *coup = &cce->coup;
1878
1879             for (c = 0; c <= coup->num_coupled; c++) {
1880                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1881                     if (coup->ch_select[c] != 1) {
1882                         apply_coupling_method(ac, &cc->ch[0], cce, index);
1883                         if (coup->ch_select[c] != 0)
1884                             index++;
1885                     }
1886                     if (coup->ch_select[c] != 2)
1887                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
1888                 } else
1889                     index += 1 + (coup->ch_select[c] == 3);
1890             }
1891         }
1892     }
1893 }
1894
1895 /**
1896  * Convert spectral data to float samples, applying all supported tools as appropriate.
1897  */
1898 static void spectral_to_sample(AACContext *ac)
1899 {
1900     int i, type;
1901     float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1902     for (type = 3; type >= 0; type--) {
1903         for (i = 0; i < MAX_ELEM_ID; i++) {
1904             ChannelElement *che = ac->che[type][i];
1905             if (che) {
1906                 if (type <= TYPE_CPE)
1907                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1908                 if (che->ch[0].tns.present)
1909                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1910                 if (che->ch[1].tns.present)
1911                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1912                 if (type <= TYPE_CPE)
1913                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1914                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1915                     imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1916                     if (type == TYPE_CPE) {
1917                         imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1918                     }
1919                     if (ac->m4ac.sbr > 0) {
1920                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1921                     }
1922                 }
1923                 if (type <= TYPE_CCE)
1924                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1925             }
1926         }
1927     }
1928 }
1929
1930 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1931 {
1932     int size;
1933     AACADTSHeaderInfo hdr_info;
1934
1935     size = ff_aac_parse_header(gb, &hdr_info);
1936     if (size > 0) {
1937         if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1938             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1939             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1940             ac->m4ac.chan_config = hdr_info.chan_config;
1941             if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1942                 return -7;
1943             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1944                 return -7;
1945         } else if (ac->output_configured != OC_LOCKED) {
1946             ac->output_configured = OC_NONE;
1947         }
1948         if (ac->output_configured != OC_LOCKED) {
1949             ac->m4ac.sbr = -1;
1950             ac->m4ac.ps  = -1;
1951         }
1952         ac->m4ac.sample_rate     = hdr_info.sample_rate;
1953         ac->m4ac.sampling_index  = hdr_info.sampling_index;
1954         ac->m4ac.object_type     = hdr_info.object_type;
1955         if (!ac->avctx->sample_rate)
1956             ac->avctx->sample_rate = hdr_info.sample_rate;
1957         if (hdr_info.num_aac_frames == 1) {
1958             if (!hdr_info.crc_absent)
1959                 skip_bits(gb, 16);
1960         } else {
1961             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1962             return -1;
1963         }
1964     }
1965     return size;
1966 }
1967
1968 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1969                             int *data_size, AVPacket *avpkt)
1970 {
1971     const uint8_t *buf = avpkt->data;
1972     int buf_size = avpkt->size;
1973     AACContext *ac = avctx->priv_data;
1974     ChannelElement *che = NULL, *che_prev = NULL;
1975     GetBitContext gb;
1976     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1977     int err, elem_id, data_size_tmp;
1978     int buf_consumed;
1979     int samples = 0, multiplier;
1980     int buf_offset;
1981
1982     init_get_bits(&gb, buf, buf_size * 8);
1983
1984     if (show_bits(&gb, 12) == 0xfff) {
1985         if (parse_adts_frame_header(ac, &gb) < 0) {
1986             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1987             return -1;
1988         }
1989         if (ac->m4ac.sampling_index > 12) {
1990             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1991             return -1;
1992         }
1993     }
1994
1995     memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1996     // parse
1997     while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1998         elem_id = get_bits(&gb, 4);
1999
2000         if (elem_type < TYPE_DSE) {
2001             if (!(che=get_che(ac, elem_type, elem_id))) {
2002                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2003                        elem_type, elem_id);
2004                 return -1;
2005             }
2006             samples = 1024;
2007         }
2008
2009         switch (elem_type) {
2010
2011         case TYPE_SCE:
2012             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2013             break;
2014
2015         case TYPE_CPE:
2016             err = decode_cpe(ac, &gb, che);
2017             break;
2018
2019         case TYPE_CCE:
2020             err = decode_cce(ac, &gb, che);
2021             break;
2022
2023         case TYPE_LFE:
2024             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2025             break;
2026
2027         case TYPE_DSE:
2028             err = skip_data_stream_element(ac, &gb);
2029             break;
2030
2031         case TYPE_PCE: {
2032             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2033             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2034             if ((err = decode_pce(ac, new_che_pos, &gb)))
2035                 break;
2036             if (ac->output_configured > OC_TRIAL_PCE)
2037                 av_log(avctx, AV_LOG_ERROR,
2038                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2039             else
2040                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2041             break;
2042         }
2043
2044         case TYPE_FIL:
2045             if (elem_id == 15)
2046                 elem_id += get_bits(&gb, 8) - 1;
2047             if (get_bits_left(&gb) < 8 * elem_id) {
2048                     av_log(avctx, AV_LOG_ERROR, overread_err);
2049                     return -1;
2050             }
2051             while (elem_id > 0)
2052                 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2053             err = 0; /* FIXME */
2054             break;
2055
2056         default:
2057             err = -1; /* should not happen, but keeps compiler happy */
2058             break;
2059         }
2060
2061         che_prev       = che;
2062         elem_type_prev = elem_type;
2063
2064         if (err)
2065             return err;
2066
2067         if (get_bits_left(&gb) < 3) {
2068             av_log(avctx, AV_LOG_ERROR, overread_err);
2069             return -1;
2070         }
2071     }
2072
2073     spectral_to_sample(ac);
2074
2075     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2076     samples <<= multiplier;
2077     if (ac->output_configured < OC_LOCKED) {
2078         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2079         avctx->frame_size = samples;
2080     }
2081
2082     data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2083     if (*data_size < data_size_tmp) {
2084         av_log(avctx, AV_LOG_ERROR,
2085                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2086                *data_size, data_size_tmp);
2087         return -1;
2088     }
2089     *data_size = data_size_tmp;
2090
2091     if (samples)
2092         ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2093
2094     if (ac->output_configured)
2095         ac->output_configured = OC_LOCKED;
2096
2097     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2098     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2099         if (buf[buf_offset])
2100             break;
2101
2102     return buf_size > buf_offset ? buf_consumed : buf_size;
2103 }
2104
2105 static av_cold int aac_decode_close(AVCodecContext *avctx)
2106 {
2107     AACContext *ac = avctx->priv_data;
2108     int i, type;
2109
2110     for (i = 0; i < MAX_ELEM_ID; i++) {
2111         for (type = 0; type < 4; type++) {
2112             if (ac->che[type][i])
2113                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2114             av_freep(&ac->che[type][i]);
2115         }
2116     }
2117
2118     ff_mdct_end(&ac->mdct);
2119     ff_mdct_end(&ac->mdct_small);
2120     return 0;
2121 }
2122
2123 AVCodec aac_decoder = {
2124     "aac",
2125     AVMEDIA_TYPE_AUDIO,
2126     CODEC_ID_AAC,
2127     sizeof(AACContext),
2128     aac_decode_init,
2129     NULL,
2130     aac_decode_close,
2131     aac_decode_frame,
2132     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2133     .sample_fmts = (const enum SampleFormat[]) {
2134         SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2135     },
2136     .channel_layouts = aac_channel_layout,
2137 };