git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * AAC decoder
  26  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  28  */
  29
  30 /*
  31  * supported tools
  32  *
  33  * Support?             Name
  34  * N (code in SoC repo) gain control
  35  * Y                    block switching
  36  * Y                    window shapes - standard
  37  * N                    window shapes - Low Delay
  38  * Y                    filterbank - standard
  39  * N (code in SoC repo) filterbank - Scalable Sample Rate
  40  * Y                    Temporal Noise Shaping
  41  * N (code in SoC repo) Long Term Prediction
  42  * Y                    intensity stereo
  43  * Y                    channel coupling
  44  * Y                    frequency domain prediction
  45  * Y                    Perceptual Noise Substitution
  46  * Y                    Mid/Side stereo
  47  * N                    Scalable Inverse AAC Quantization
  48  * N                    Frequency Selective Switch
  49  * N                    upsampling filter
  50  * Y                    quantization & coding - AAC
  51  * N                    quantization & coding - TwinVQ
  52  * N                    quantization & coding - BSAC
  53  * N                    AAC Error Resilience tools
  54  * N                    Error Resilience payload syntax
  55  * N                    Error Protection tool
  56  * N                    CELP
  57  * N                    Silence Compression
  58  * N                    HVXC
  59  * N                    HVXC 4kbits/s VR
  60  * N                    Structured Audio tools
  61  * N                    Structured Audio Sample Bank Format
  62  * N                    MIDI
  63  * N                    Harmonic and Individual Lines plus Noise
  64  * N                    Text-To-Speech Interface
  65  * Y                    Spectral Band Replication
  66  * Y (not in this code) Layer-1
  67  * Y (not in this code) Layer-2
  68  * Y (not in this code) Layer-3
  69  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  70  * Y                    Parametric Stereo
  71  * N                    Direct Stream Transfer
  72  *
  73  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  74  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  75            Parametric Stereo.
  76  */
  77
  78
  79 #include "avcodec.h"
  80 #include "internal.h"
  81 #include "get_bits.h"
  82 #include "dsputil.h"
  83 #include "fft.h"
  84 #include "lpc.h"
  85
  86 #include "aac.h"
  87 #include "aactab.h"
  88 #include "aacdectab.h"
  89 #include "cbrt_tablegen.h"
  90 #include "sbr.h"
  91 #include "aacsbr.h"
  92 #include "mpeg4audio.h"
  93 #include "aac_parser.h"
  94
  95 #include <assert.h>
  96 #include <errno.h>
  97 #include <math.h>
  98 #include <string.h>
  99
 100 #if ARCH_ARM
 101 #   include "arm/aac.h"
 102 #endif
 103
 104 union float754 {
 105     float f;
 106     uint32_t i;
 107 };
 108
 109 static VLC vlc_scalefactors;
 110 static VLC vlc_spectral[11];
 111
 112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 113
 114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 115 {
 116     /* Some buggy encoders appear to set all elem_ids to zero and rely on
 117     channels always occurring in the same order. This is expressly forbidden
 118     by the spec but we will try to work around it.
 119     */
 120     int err_printed = 0;
 121     while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
 122         if (ac->output_configured < OC_LOCKED && !err_printed) {
 123             av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
 124             err_printed = 1;
 125         }
 126         elem_id++;
 127     }
 128     if (elem_id == MAX_ELEM_ID)
 129         return NULL;
 130     ac->tags_seen_this_frame[type][elem_id] = 1;
 131
 132     if (ac->tag_che_map[type][elem_id]) {
 133         return ac->tag_che_map[type][elem_id];
 134     }
 135     if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
 136         return NULL;
 137     }
 138     switch (ac->m4ac.chan_config) {
 139     case 7:
 140         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 141             ac->tags_mapped++;
 142             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 143         }
 144     case 6:
 145         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 146            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 147            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 148         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 151         }
 152     case 5:
 153         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 154             ac->tags_mapped++;
 155             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 156         }
 157     case 4:
 158         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 159             ac->tags_mapped++;
 160             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 161         }
 162     case 3:
 163     case 2:
 164         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 165             ac->tags_mapped++;
 166             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 167         } else if (ac->m4ac.chan_config == 2) {
 168             return NULL;
 169         }
 170     case 1:
 171         if (!ac->tags_mapped && type == TYPE_SCE) {
 172             ac->tags_mapped++;
 173             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 174         }
 175     default:
 176         return NULL;
 177     }
 178 }
 179
 180 /**
 181  * Check for the channel element in the current channel position configuration.
 182  * If it exists, make sure the appropriate element is allocated and map the
 183  * channel order to match the internal FFmpeg channel layout.
 184  *
 185  * @param   che_pos current channel position configuration
 186  * @param   type channel element type
 187  * @param   id channel element id
 188  * @param   channels count of the number of channels in the configuration
 189  *
 190  * @return  Returns error status. 0 - OK, !0 - error
 191  */
 192 static av_cold int che_configure(AACContext *ac,
 193                          enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 194                          int type, int id,
 195                          int *channels)
 196 {
 197     if (che_pos[type][id]) {
 198         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 199             return AVERROR(ENOMEM);
 200         ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
 201         if (type != TYPE_CCE) {
 202             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 203             if (type == TYPE_CPE ||
 204                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 205                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 206             }
 207         }
 208     } else {
 209         if (ac->che[type][id])
 210             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 211         av_freep(&ac->che[type][id]);
 212     }
 213     return 0;
 214 }
 215
 216 /**
 217  * Configure output channel order based on the current program configuration element.
 218  *
 219  * @param   che_pos current channel position configuration
 220  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 221  *
 222  * @return  Returns error status. 0 - OK, !0 - error
 223  */
 224 static av_cold int output_configure(AACContext *ac,
 225                             enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 226                             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 227                             int channel_config, enum OCStatus oc_type)
 228 {
 229     AVCodecContext *avctx = ac->avctx;
 230     int i, type, channels = 0, ret;
 231
 232     if (new_che_pos != che_pos)
 233     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 234
 235     if (channel_config) {
 236         for (i = 0; i < tags_per_config[channel_config]; i++) {
 237             if ((ret = che_configure(ac, che_pos,
 238                                      aac_channel_layout_map[channel_config - 1][i][0],
 239                                      aac_channel_layout_map[channel_config - 1][i][1],
 240                                      &channels)))
 241                 return ret;
 242         }
 243
 244         memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 245         ac->tags_mapped = 0;
 246
 247         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 248     } else {
 249         /* Allocate or free elements depending on if they are in the
 250          * current program configuration.
 251          *
 252          * Set up default 1:1 output mapping.
 253          *
 254          * For a 5.1 stream the output order will be:
 255          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 256          */
 257
 258         for (i = 0; i < MAX_ELEM_ID; i++) {
 259             for (type = 0; type < 4; type++) {
 260                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 261                     return ret;
 262             }
 263         }
 264
 265         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 266         ac->tags_mapped = 4 * MAX_ELEM_ID;
 267
 268         avctx->channel_layout = 0;
 269     }
 270
 271     avctx->channels = channels;
 272
 273     ac->output_configured = oc_type;
 274
 275     return 0;
 276 }
 277
 278 /**
 279  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 280  *
 281  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 282  * @param sce_map mono (Single Channel Element) map
 283  * @param type speaker type/position for these channels
 284  */
 285 static void decode_channel_map(enum ChannelPosition *cpe_map,
 286                                enum ChannelPosition *sce_map,
 287                                enum ChannelPosition type,
 288                                GetBitContext *gb, int n)
 289 {
 290     while (n--) {
 291         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 292         map[get_bits(gb, 4)] = type;
 293     }
 294 }
 295
 296 /**
 297  * Decode program configuration element; reference: table 4.2.
 298  *
 299  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 300  *
 301  * @return  Returns error status. 0 - OK, !0 - error
 302  */
 303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 304                       GetBitContext *gb)
 305 {
 306     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 307     int comment_len;
 308
 309     skip_bits(gb, 2);  // object_type
 310
 311     sampling_index = get_bits(gb, 4);
 312     if (ac->m4ac.sampling_index != sampling_index)
 313         av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 314
 315     num_front       = get_bits(gb, 4);
 316     num_side        = get_bits(gb, 4);
 317     num_back        = get_bits(gb, 4);
 318     num_lfe         = get_bits(gb, 2);
 319     num_assoc_data  = get_bits(gb, 3);
 320     num_cc          = get_bits(gb, 4);
 321
 322     if (get_bits1(gb))
 323         skip_bits(gb, 4); // mono_mixdown_tag
 324     if (get_bits1(gb))
 325         skip_bits(gb, 4); // stereo_mixdown_tag
 326
 327     if (get_bits1(gb))
 328         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 329
 330     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 331     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 332     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 333     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 334
 335     skip_bits_long(gb, 4 * num_assoc_data);
 336
 337     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 338
 339     align_get_bits(gb);
 340
 341     /* comment field, first byte is length */
 342     comment_len = get_bits(gb, 8) * 8;
 343     if (get_bits_left(gb) < comment_len) {
 344         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 345         return -1;
 346     }
 347     skip_bits_long(gb, comment_len);
 348     return 0;
 349 }
 350
 351 /**
 352  * Set up channel positions based on a default channel configuration
 353  * as specified in table 1.17.
 354  *
 355  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 356  *
 357  * @return  Returns error status. 0 - OK, !0 - error
 358  */
 359 static av_cold int set_default_channel_config(AACContext *ac,
 360                                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 361                                       int channel_config)
 362 {
 363     if (channel_config < 1 || channel_config > 7) {
 364         av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 365                channel_config);
 366         return -1;
 367     }
 368
 369     /* default channel configurations:
 370      *
 371      * 1ch : front center (mono)
 372      * 2ch : L + R (stereo)
 373      * 3ch : front center + L + R
 374      * 4ch : front center + L + R + back center
 375      * 5ch : front center + L + R + back stereo
 376      * 6ch : front center + L + R + back stereo + LFE
 377      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 378      */
 379
 380     if (channel_config != 2)
 381         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 382     if (channel_config > 1)
 383         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 384     if (channel_config == 4)
 385         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 386     if (channel_config > 4)
 387         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 388         = AAC_CHANNEL_BACK;  // back stereo
 389     if (channel_config > 5)
 390         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 391     if (channel_config == 7)
 392         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 393
 394     return 0;
 395 }
 396
 397 /**
 398  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 399  *
 400  * @return  Returns error status. 0 - OK, !0 - error
 401  */
 402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
 403                                      int channel_config)
 404 {
 405     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 406     int extension_flag, ret;
 407
 408     if (get_bits1(gb)) { // frameLengthFlag
 409         av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
 410         return -1;
 411     }
 412
 413     if (get_bits1(gb))       // dependsOnCoreCoder
 414         skip_bits(gb, 14);   // coreCoderDelay
 415     extension_flag = get_bits1(gb);
 416
 417     if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
 418         ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
 419         skip_bits(gb, 3);     // layerNr
 420
 421     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 422     if (channel_config == 0) {
 423         skip_bits(gb, 4);  // element_instance_tag
 424         if ((ret = decode_pce(ac, new_che_pos, gb)))
 425             return ret;
 426     } else {
 427         if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
 428             return ret;
 429     }
 430     if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 431         return ret;
 432
 433     if (extension_flag) {
 434         switch (ac->m4ac.object_type) {
 435         case AOT_ER_BSAC:
 436             skip_bits(gb, 5);    // numOfSubFrame
 437             skip_bits(gb, 11);   // layer_length
 438             break;
 439         case AOT_ER_AAC_LC:
 440         case AOT_ER_AAC_LTP:
 441         case AOT_ER_AAC_SCALABLE:
 442         case AOT_ER_AAC_LD:
 443             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 444                                     * aacScalefactorDataResilienceFlag
 445                                     * aacSpectralDataResilienceFlag
 446                                     */
 447             break;
 448         }
 449         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 450     }
 451     return 0;
 452 }
 453
 454 /**
 455  * Decode audio specific configuration; reference: table 1.13.
 456  *
 457  * @param   data        pointer to AVCodecContext extradata
 458  * @param   data_size   size of AVCCodecContext extradata
 459  *
 460  * @return  Returns error status. 0 - OK, !0 - error
 461  */
 462 static int decode_audio_specific_config(AACContext *ac, void *data,
 463                                         int data_size)
 464 {
 465     GetBitContext gb;
 466     int i;
 467
 468     init_get_bits(&gb, data, data_size * 8);
 469
 470     if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
 471         return -1;
 472     if (ac->m4ac.sampling_index > 12) {
 473         av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
 474         return -1;
 475     }
 476     if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
 477         ac->m4ac.ps = 1;
 478
 479     skip_bits_long(&gb, i);
 480
 481     switch (ac->m4ac.object_type) {
 482     case AOT_AAC_MAIN:
 483     case AOT_AAC_LC:
 484         if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
 485             return -1;
 486         break;
 487     default:
 488         av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 489                ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
 490         return -1;
 491     }
 492     return 0;
 493 }
 494
 495 /**
 496  * linear congruential pseudorandom number generator
 497  *
 498  * @param   previous_val    pointer to the current state of the generator
 499  *
 500  * @return  Returns a 32-bit pseudorandom integer
 501  */
 502 static av_always_inline int lcg_random(int previous_val)
 503 {
 504     return previous_val * 1664525 + 1013904223;
 505 }
 506
 507 static av_always_inline void reset_predict_state(PredictorState *ps)
 508 {
 509     ps->r0   = 0.0f;
 510     ps->r1   = 0.0f;
 511     ps->cor0 = 0.0f;
 512     ps->cor1 = 0.0f;
 513     ps->var0 = 1.0f;
 514     ps->var1 = 1.0f;
 515 }
 516
 517 static void reset_all_predictors(PredictorState *ps)
 518 {
 519     int i;
 520     for (i = 0; i < MAX_PREDICTORS; i++)
 521         reset_predict_state(&ps[i]);
 522 }
 523
 524 static void reset_predictor_group(PredictorState *ps, int group_num)
 525 {
 526     int i;
 527     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 528         reset_predict_state(&ps[i]);
 529 }
 530
 531 #define AAC_INIT_VLC_STATIC(num, size) \
 532     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 533          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 534         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 535         size);
 536
 537 static av_cold int aac_decode_init(AVCodecContext *avctx)
 538 {
 539     AACContext *ac = avctx->priv_data;
 540
 541     ac->avctx = avctx;
 542     ac->m4ac.sample_rate = avctx->sample_rate;
 543
 544     if (avctx->extradata_size > 0) {
 545         if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
 546             return -1;
 547     }
 548
 549     avctx->sample_fmt = SAMPLE_FMT_S16;
 550
 551     AAC_INIT_VLC_STATIC( 0, 304);
 552     AAC_INIT_VLC_STATIC( 1, 270);
 553     AAC_INIT_VLC_STATIC( 2, 550);
 554     AAC_INIT_VLC_STATIC( 3, 300);
 555     AAC_INIT_VLC_STATIC( 4, 328);
 556     AAC_INIT_VLC_STATIC( 5, 294);
 557     AAC_INIT_VLC_STATIC( 6, 306);
 558     AAC_INIT_VLC_STATIC( 7, 268);
 559     AAC_INIT_VLC_STATIC( 8, 510);
 560     AAC_INIT_VLC_STATIC( 9, 366);
 561     AAC_INIT_VLC_STATIC(10, 462);
 562
 563     ff_aac_sbr_init();
 564
 565     dsputil_init(&ac->dsp, avctx);
 566
 567     ac->random_state = 0x1f2e3d4c;
 568
 569     // -1024 - Compensate wrong IMDCT method.
 570     // 32768 - Required to scale values to the correct range for the bias method
 571     //         for float to int16 conversion.
 572
 573     if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
 574         ac->add_bias  = 385.0f;
 575         ac->sf_scale  = 1. / (-1024. * 32768.);
 576         ac->sf_offset = 0;
 577     } else {
 578         ac->add_bias  = 0.0f;
 579         ac->sf_scale  = 1. / -1024.;
 580         ac->sf_offset = 60;
 581     }
 582
 583     ff_aac_tableinit();
 584
 585     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 586                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 587                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 588                     352);
 589
 590     ff_mdct_init(&ac->mdct, 11, 1, 1.0);
 591     ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
 592     // window initialization
 593     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 594     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 595     ff_init_ff_sine_windows(10);
 596     ff_init_ff_sine_windows( 7);
 597
 598     cbrt_tableinit();
 599
 600     return 0;
 601 }
 602
 603 /**
 604  * Skip data_stream_element; reference: table 4.10.
 605  */
 606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 607 {
 608     int byte_align = get_bits1(gb);
 609     int count = get_bits(gb, 8);
 610     if (count == 255)
 611         count += get_bits(gb, 8);
 612     if (byte_align)
 613         align_get_bits(gb);
 614
 615     if (get_bits_left(gb) < 8 * count) {
 616         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 617         return -1;
 618     }
 619     skip_bits_long(gb, 8 * count);
 620     return 0;
 621 }
 622
 623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 624                              GetBitContext *gb)
 625 {
 626     int sfb;
 627     if (get_bits1(gb)) {
 628         ics->predictor_reset_group = get_bits(gb, 5);
 629         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 630             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 631             return -1;
 632         }
 633     }
 634     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 635         ics->prediction_used[sfb] = get_bits1(gb);
 636     }
 637     return 0;
 638 }
 639
 640 /**
 641  * Decode Individual Channel Stream info; reference: table 4.6.
 642  *
 643  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 644  */
 645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 646                            GetBitContext *gb, int common_window)
 647 {
 648     if (get_bits1(gb)) {
 649         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 650         memset(ics, 0, sizeof(IndividualChannelStream));
 651         return -1;
 652     }
 653     ics->window_sequence[1] = ics->window_sequence[0];
 654     ics->window_sequence[0] = get_bits(gb, 2);
 655     ics->use_kb_window[1]   = ics->use_kb_window[0];
 656     ics->use_kb_window[0]   = get_bits1(gb);
 657     ics->num_window_groups  = 1;
 658     ics->group_len[0]       = 1;
 659     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 660         int i;
 661         ics->max_sfb = get_bits(gb, 4);
 662         for (i = 0; i < 7; i++) {
 663             if (get_bits1(gb)) {
 664                 ics->group_len[ics->num_window_groups - 1]++;
 665             } else {
 666                 ics->num_window_groups++;
 667                 ics->group_len[ics->num_window_groups - 1] = 1;
 668             }
 669         }
 670         ics->num_windows       = 8;
 671         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 672         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 673         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 674         ics->predictor_present = 0;
 675     } else {
 676         ics->max_sfb               = get_bits(gb, 6);
 677         ics->num_windows           = 1;
 678         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 679         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 680         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 681         ics->predictor_present     = get_bits1(gb);
 682         ics->predictor_reset_group = 0;
 683         if (ics->predictor_present) {
 684             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 685                 if (decode_prediction(ac, ics, gb)) {
 686                     memset(ics, 0, sizeof(IndividualChannelStream));
 687                     return -1;
 688                 }
 689             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 690                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 691                 memset(ics, 0, sizeof(IndividualChannelStream));
 692                 return -1;
 693             } else {
 694                 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
 695                 memset(ics, 0, sizeof(IndividualChannelStream));
 696                 return -1;
 697             }
 698         }
 699     }
 700
 701     if (ics->max_sfb > ics->num_swb) {
 702         av_log(ac->avctx, AV_LOG_ERROR,
 703                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 704                ics->max_sfb, ics->num_swb);
 705         memset(ics, 0, sizeof(IndividualChannelStream));
 706         return -1;
 707     }
 708
 709     return 0;
 710 }
 711
 712 /**
 713  * Decode band types (section_data payload); reference: table 4.46.
 714  *
 715  * @param   band_type           array of the used band type
 716  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 717  *
 718  * @return  Returns error status. 0 - OK, !0 - error
 719  */
 720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 721                              int band_type_run_end[120], GetBitContext *gb,
 722                              IndividualChannelStream *ics)
 723 {
 724     int g, idx = 0;
 725     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 726     for (g = 0; g < ics->num_window_groups; g++) {
 727         int k = 0;
 728         while (k < ics->max_sfb) {
 729             uint8_t sect_end = k;
 730             int sect_len_incr;
 731             int sect_band_type = get_bits(gb, 4);
 732             if (sect_band_type == 12) {
 733                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 734                 return -1;
 735             }
 736             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 737                 sect_end += sect_len_incr;
 738             sect_end += sect_len_incr;
 739             if (get_bits_left(gb) < 0) {
 740                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 741                 return -1;
 742             }
 743             if (sect_end > ics->max_sfb) {
 744                 av_log(ac->avctx, AV_LOG_ERROR,
 745                        "Number of bands (%d) exceeds limit (%d).\n",
 746                        sect_end, ics->max_sfb);
 747                 return -1;
 748             }
 749             for (; k < sect_end; k++) {
 750                 band_type        [idx]   = sect_band_type;
 751                 band_type_run_end[idx++] = sect_end;
 752             }
 753         }
 754     }
 755     return 0;
 756 }
 757
 758 /**
 759  * Decode scalefactors; reference: table 4.47.
 760  *
 761  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 762  * @param   band_type           array of the used band type
 763  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 764  * @param   sf                  array of scalefactors or intensity stereo positions
 765  *
 766  * @return  Returns error status. 0 - OK, !0 - error
 767  */
 768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 769                                unsigned int global_gain,
 770                                IndividualChannelStream *ics,
 771                                enum BandType band_type[120],
 772                                int band_type_run_end[120])
 773 {
 774     const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
 775     int g, i, idx = 0;
 776     int offset[3] = { global_gain, global_gain - 90, 100 };
 777     int noise_flag = 1;
 778     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 779     for (g = 0; g < ics->num_window_groups; g++) {
 780         for (i = 0; i < ics->max_sfb;) {
 781             int run_end = band_type_run_end[idx];
 782             if (band_type[idx] == ZERO_BT) {
 783                 for (; i < run_end; i++, idx++)
 784                     sf[idx] = 0.;
 785             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 786                 for (; i < run_end; i++, idx++) {
 787                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 788                     if (offset[2] > 255U) {
 789                         av_log(ac->avctx, AV_LOG_ERROR,
 790                                "%s (%d) out of range.\n", sf_str[2], offset[2]);
 791                         return -1;
 792                     }
 793                     sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
 794                 }
 795             } else if (band_type[idx] == NOISE_BT) {
 796                 for (; i < run_end; i++, idx++) {
 797                     if (noise_flag-- > 0)
 798                         offset[1] += get_bits(gb, 9) - 256;
 799                     else
 800                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 801                     if (offset[1] > 255U) {
 802                         av_log(ac->avctx, AV_LOG_ERROR,
 803                                "%s (%d) out of range.\n", sf_str[1], offset[1]);
 804                         return -1;
 805                     }
 806                     sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
 807                 }
 808             } else {
 809                 for (; i < run_end; i++, idx++) {
 810                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 811                     if (offset[0] > 255U) {
 812                         av_log(ac->avctx, AV_LOG_ERROR,
 813                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 814                         return -1;
 815                     }
 816                     sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
 817                 }
 818             }
 819         }
 820     }
 821     return 0;
 822 }
 823
 824 /**
 825  * Decode pulse data; reference: table 4.7.
 826  */
 827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 828                          const uint16_t *swb_offset, int num_swb)
 829 {
 830     int i, pulse_swb;
 831     pulse->num_pulse = get_bits(gb, 2) + 1;
 832     pulse_swb        = get_bits(gb, 6);
 833     if (pulse_swb >= num_swb)
 834         return -1;
 835     pulse->pos[0]    = swb_offset[pulse_swb];
 836     pulse->pos[0]   += get_bits(gb, 5);
 837     if (pulse->pos[0] > 1023)
 838         return -1;
 839     pulse->amp[0]    = get_bits(gb, 4);
 840     for (i = 1; i < pulse->num_pulse; i++) {
 841         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 842         if (pulse->pos[i] > 1023)
 843             return -1;
 844         pulse->amp[i] = get_bits(gb, 4);
 845     }
 846     return 0;
 847 }
 848
 849 /**
 850  * Decode Temporal Noise Shaping data; reference: table 4.48.
 851  *
 852  * @return  Returns error status. 0 - OK, !0 - error
 853  */
 854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 855                       GetBitContext *gb, const IndividualChannelStream *ics)
 856 {
 857     int w, filt, i, coef_len, coef_res, coef_compress;
 858     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 859     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 860     for (w = 0; w < ics->num_windows; w++) {
 861         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 862             coef_res = get_bits1(gb);
 863
 864             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 865                 int tmp2_idx;
 866                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 867
 868                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 869                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 870                            tns->order[w][filt], tns_max_order);
 871                     tns->order[w][filt] = 0;
 872                     return -1;
 873                 }
 874                 if (tns->order[w][filt]) {
 875                     tns->direction[w][filt] = get_bits1(gb);
 876                     coef_compress = get_bits1(gb);
 877                     coef_len = coef_res + 3 - coef_compress;
 878                     tmp2_idx = 2 * coef_compress + coef_res;
 879
 880                     for (i = 0; i < tns->order[w][filt]; i++)
 881                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 882                 }
 883             }
 884         }
 885     }
 886     return 0;
 887 }
 888
 889 /**
 890  * Decode Mid/Side data; reference: table 4.54.
 891  *
 892  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 893  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 894  *                      [3] reserved for scalable AAC
 895  */
 896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 897                                    int ms_present)
 898 {
 899     int idx;
 900     if (ms_present == 1) {
 901         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 902             cpe->ms_mask[idx] = get_bits1(gb);
 903     } else if (ms_present == 2) {
 904         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 905     }
 906 }
 907
 908 #ifndef VMUL2
 909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 910                            const float *scale)
 911 {
 912     float s = *scale;
 913     *dst++ = v[idx    & 15] * s;
 914     *dst++ = v[idx>>4 & 15] * s;
 915     return dst;
 916 }
 917 #endif
 918
 919 #ifndef VMUL4
 920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 921                            const float *scale)
 922 {
 923     float s = *scale;
 924     *dst++ = v[idx    & 3] * s;
 925     *dst++ = v[idx>>2 & 3] * s;
 926     *dst++ = v[idx>>4 & 3] * s;
 927     *dst++ = v[idx>>6 & 3] * s;
 928     return dst;
 929 }
 930 #endif
 931
 932 #ifndef VMUL2S
 933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 934                             unsigned sign, const float *scale)
 935 {
 936     union float754 s0, s1;
 937
 938     s0.f = s1.f = *scale;
 939     s0.i ^= sign >> 1 << 31;
 940     s1.i ^= sign      << 31;
 941
 942     *dst++ = v[idx    & 15] * s0.f;
 943     *dst++ = v[idx>>4 & 15] * s1.f;
 944
 945     return dst;
 946 }
 947 #endif
 948
 949 #ifndef VMUL4S
 950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 951                             unsigned sign, const float *scale)
 952 {
 953     unsigned nz = idx >> 12;
 954     union float754 s = { .f = *scale };
 955     union float754 t;
 956
 957     t.i = s.i ^ (sign & 1<<31);
 958     *dst++ = v[idx    & 3] * t.f;
 959
 960     sign <<= nz & 1; nz >>= 1;
 961     t.i = s.i ^ (sign & 1<<31);
 962     *dst++ = v[idx>>2 & 3] * t.f;
 963
 964     sign <<= nz & 1; nz >>= 1;
 965     t.i = s.i ^ (sign & 1<<31);
 966     *dst++ = v[idx>>4 & 3] * t.f;
 967
 968     sign <<= nz & 1; nz >>= 1;
 969     t.i = s.i ^ (sign & 1<<31);
 970     *dst++ = v[idx>>6 & 3] * t.f;
 971
 972     return dst;
 973 }
 974 #endif
 975
 976 /**
 977  * Decode spectral data; reference: table 4.50.
 978  * Dequantize and scale spectral data; reference: 4.6.3.3.
 979  *
 980  * @param   coef            array of dequantized, scaled spectral data
 981  * @param   sf              array of scalefactors or intensity stereo positions
 982  * @param   pulse_present   set if pulses are present
 983  * @param   pulse           pointer to pulse data struct
 984  * @param   band_type       array of the used band type
 985  *
 986  * @return  Returns error status. 0 - OK, !0 - error
 987  */
 988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 989                                        GetBitContext *gb, const float sf[120],
 990                                        int pulse_present, const Pulse *pulse,
 991                                        const IndividualChannelStream *ics,
 992                                        enum BandType band_type[120])
 993 {
 994     int i, k, g, idx = 0;
 995     const int c = 1024 / ics->num_windows;
 996     const uint16_t *offsets = ics->swb_offset;
 997     float *coef_base = coef;
 998     int err_idx;
 999
1000     for (g = 0; g < ics->num_windows; g++)
1001         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1002
1003     for (g = 0; g < ics->num_window_groups; g++) {
1004         unsigned g_len = ics->group_len[g];
1005
1006         for (i = 0; i < ics->max_sfb; i++, idx++) {
1007             const unsigned cbt_m1 = band_type[idx] - 1;
1008             float *cfo = coef + offsets[i];
1009             int off_len = offsets[i + 1] - offsets[i];
1010             int group;
1011
1012             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1013                 for (group = 0; group < g_len; group++, cfo+=128) {
1014                     memset(cfo, 0, off_len * sizeof(float));
1015                 }
1016             } else if (cbt_m1 == NOISE_BT - 1) {
1017                 for (group = 0; group < g_len; group++, cfo+=128) {
1018                     float scale;
1019                     float band_energy;
1020
1021                     for (k = 0; k < off_len; k++) {
1022                         ac->random_state  = lcg_random(ac->random_state);
1023                         cfo[k] = ac->random_state;
1024                     }
1025
1026                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1027                     scale = sf[idx] / sqrtf(band_energy);
1028                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1029                 }
1030             } else {
1031                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1032                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1033                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1034                 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1035                 OPEN_READER(re, gb);
1036
1037                 switch (cbt_m1 >> 1) {
1038                 case 0:
1039                     for (group = 0; group < g_len; group++, cfo+=128) {
1040                         float *cf = cfo;
1041                         int len = off_len;
1042
1043                         do {
1044                             int code;
1045                             unsigned cb_idx;
1046
1047                             UPDATE_CACHE(re, gb);
1048                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1049
1050                             if (code >= cb_size) {
1051                                 err_idx = code;
1052                                 goto err_cb_overflow;
1053                             }
1054
1055                             cb_idx = cb_vector_idx[code];
1056                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1057                         } while (len -= 4);
1058                     }
1059                     break;
1060
1061                 case 1:
1062                     for (group = 0; group < g_len; group++, cfo+=128) {
1063                         float *cf = cfo;
1064                         int len = off_len;
1065
1066                         do {
1067                             int code;
1068                             unsigned nnz;
1069                             unsigned cb_idx;
1070                             uint32_t bits;
1071
1072                             UPDATE_CACHE(re, gb);
1073                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1074
1075                             if (code >= cb_size) {
1076                                 err_idx = code;
1077                                 goto err_cb_overflow;
1078                             }
1079
1080 #if MIN_CACHE_BITS < 20
1081                             UPDATE_CACHE(re, gb);
1082 #endif
1083                             cb_idx = cb_vector_idx[code];
1084                             nnz = cb_idx >> 8 & 15;
1085                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1086                             LAST_SKIP_BITS(re, gb, nnz);
1087                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1088                         } while (len -= 4);
1089                     }
1090                     break;
1091
1092                 case 2:
1093                     for (group = 0; group < g_len; group++, cfo+=128) {
1094                         float *cf = cfo;
1095                         int len = off_len;
1096
1097                         do {
1098                             int code;
1099                             unsigned cb_idx;
1100
1101                             UPDATE_CACHE(re, gb);
1102                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1103
1104                             if (code >= cb_size) {
1105                                 err_idx = code;
1106                                 goto err_cb_overflow;
1107                             }
1108
1109                             cb_idx = cb_vector_idx[code];
1110                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1111                         } while (len -= 2);
1112                     }
1113                     break;
1114
1115                 case 3:
1116                 case 4:
1117                     for (group = 0; group < g_len; group++, cfo+=128) {
1118                         float *cf = cfo;
1119                         int len = off_len;
1120
1121                         do {
1122                             int code;
1123                             unsigned nnz;
1124                             unsigned cb_idx;
1125                             unsigned sign;
1126
1127                             UPDATE_CACHE(re, gb);
1128                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1129
1130                             if (code >= cb_size) {
1131                                 err_idx = code;
1132                                 goto err_cb_overflow;
1133                             }
1134
1135                             cb_idx = cb_vector_idx[code];
1136                             nnz = cb_idx >> 8 & 15;
1137                             sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1138                             LAST_SKIP_BITS(re, gb, nnz);
1139                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1140                         } while (len -= 2);
1141                     }
1142                     break;
1143
1144                 default:
1145                     for (group = 0; group < g_len; group++, cfo+=128) {
1146                         float *cf = cfo;
1147                         uint32_t *icf = (uint32_t *) cf;
1148                         int len = off_len;
1149
1150                         do {
1151                             int code;
1152                             unsigned nzt, nnz;
1153                             unsigned cb_idx;
1154                             uint32_t bits;
1155                             int j;
1156
1157                             UPDATE_CACHE(re, gb);
1158                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1159
1160                             if (!code) {
1161                                 *icf++ = 0;
1162                                 *icf++ = 0;
1163                                 continue;
1164                             }
1165
1166                             if (code >= cb_size) {
1167                                 err_idx = code;
1168                                 goto err_cb_overflow;
1169                             }
1170
1171                             cb_idx = cb_vector_idx[code];
1172                             nnz = cb_idx >> 12;
1173                             nzt = cb_idx >> 8;
1174                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1175                             LAST_SKIP_BITS(re, gb, nnz);
1176
1177                             for (j = 0; j < 2; j++) {
1178                                 if (nzt & 1<<j) {
1179                                     uint32_t b;
1180                                     int n;
1181                                     /* The total length of escape_sequence must be < 22 bits according
1182                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1183                                     UPDATE_CACHE(re, gb);
1184                                     b = GET_CACHE(re, gb);
1185                                     b = 31 - av_log2(~b);
1186
1187                                     if (b > 8) {
1188                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1189                                         return -1;
1190                                     }
1191
1192 #if MIN_CACHE_BITS < 21
1193                                     LAST_SKIP_BITS(re, gb, b + 1);
1194                                     UPDATE_CACHE(re, gb);
1195 #else
1196                                     SKIP_BITS(re, gb, b + 1);
1197 #endif
1198                                     b += 4;
1199                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1200                                     LAST_SKIP_BITS(re, gb, b);
1201                                     *icf++ = cbrt_tab[n] | (bits & 1<<31);
1202                                     bits <<= 1;
1203                                 } else {
1204                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1205                                     *icf++ = (bits & 1<<31) | v;
1206                                     bits <<= !!v;
1207                                 }
1208                                 cb_idx >>= 4;
1209                             }
1210                         } while (len -= 2);
1211
1212                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1213                     }
1214                 }
1215
1216                 CLOSE_READER(re, gb);
1217             }
1218         }
1219         coef += g_len << 7;
1220     }
1221
1222     if (pulse_present) {
1223         idx = 0;
1224         for (i = 0; i < pulse->num_pulse; i++) {
1225             float co = coef_base[ pulse->pos[i] ];
1226             while (offsets[idx + 1] <= pulse->pos[i])
1227                 idx++;
1228             if (band_type[idx] != NOISE_BT && sf[idx]) {
1229                 float ico = -pulse->amp[i];
1230                 if (co) {
1231                     co /= sf[idx];
1232                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1233                 }
1234                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1235             }
1236         }
1237     }
1238     return 0;
1239
1240 err_cb_overflow:
1241     av_log(ac->avctx, AV_LOG_ERROR,
1242            "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1243            band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1244     return -1;
1245 }
1246
1247 static av_always_inline float flt16_round(float pf)
1248 {
1249     union float754 tmp;
1250     tmp.f = pf;
1251     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1252     return tmp.f;
1253 }
1254
1255 static av_always_inline float flt16_even(float pf)
1256 {
1257     union float754 tmp;
1258     tmp.f = pf;
1259     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1260     return tmp.f;
1261 }
1262
1263 static av_always_inline float flt16_trunc(float pf)
1264 {
1265     union float754 pun;
1266     pun.f = pf;
1267     pun.i &= 0xFFFF0000U;
1268     return pun.f;
1269 }
1270
1271 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1272                     int output_enable)
1273 {
1274     const float a     = 0.953125; // 61.0 / 64
1275     const float alpha = 0.90625;  // 29.0 / 32
1276     float e0, e1;
1277     float pv;
1278     float k1, k2;
1279
1280     k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1281     k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1282
1283     pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1284     if (output_enable)
1285         *coef += pv * ac->sf_scale;
1286
1287     e0 = *coef / ac->sf_scale;
1288     e1 = e0 - k1 * ps->r0;
1289
1290     ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1291     ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1292     ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1293     ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1294
1295     ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1296     ps->r0 = flt16_trunc(a * e0);
1297 }
1298
1299 /**
1300  * Apply AAC-Main style frequency domain prediction.
1301  */
1302 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1303 {
1304     int sfb, k;
1305
1306     if (!sce->ics.predictor_initialized) {
1307         reset_all_predictors(sce->predictor_state);
1308         sce->ics.predictor_initialized = 1;
1309     }
1310
1311     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1312         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1313             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1314                 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1315                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1316             }
1317         }
1318         if (sce->ics.predictor_reset_group)
1319             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1320     } else
1321         reset_all_predictors(sce->predictor_state);
1322 }
1323
1324 /**
1325  * Decode an individual_channel_stream payload; reference: table 4.44.
1326  *
1327  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1328  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1329  *
1330  * @return  Returns error status. 0 - OK, !0 - error
1331  */
1332 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1333                       GetBitContext *gb, int common_window, int scale_flag)
1334 {
1335     Pulse pulse;
1336     TemporalNoiseShaping    *tns = &sce->tns;
1337     IndividualChannelStream *ics = &sce->ics;
1338     float *out = sce->coeffs;
1339     int global_gain, pulse_present = 0;
1340
1341     /* This assignment is to silence a GCC warning about the variable being used
1342      * uninitialized when in fact it always is.
1343      */
1344     pulse.num_pulse = 0;
1345
1346     global_gain = get_bits(gb, 8);
1347
1348     if (!common_window && !scale_flag) {
1349         if (decode_ics_info(ac, ics, gb, 0) < 0)
1350             return -1;
1351     }
1352
1353     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1354         return -1;
1355     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1356         return -1;
1357
1358     pulse_present = 0;
1359     if (!scale_flag) {
1360         if ((pulse_present = get_bits1(gb))) {
1361             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1362                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1363                 return -1;
1364             }
1365             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1366                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1367                 return -1;
1368             }
1369         }
1370         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1371             return -1;
1372         if (get_bits1(gb)) {
1373             av_log_missing_feature(ac->avctx, "SSR", 1);
1374             return -1;
1375         }
1376     }
1377
1378     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1379         return -1;
1380
1381     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1382         apply_prediction(ac, sce);
1383
1384     return 0;
1385 }
1386
1387 /**
1388  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1389  */
1390 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1391 {
1392     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1393     float *ch0 = cpe->ch[0].coeffs;
1394     float *ch1 = cpe->ch[1].coeffs;
1395     int g, i, group, idx = 0;
1396     const uint16_t *offsets = ics->swb_offset;
1397     for (g = 0; g < ics->num_window_groups; g++) {
1398         for (i = 0; i < ics->max_sfb; i++, idx++) {
1399             if (cpe->ms_mask[idx] &&
1400                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1401                 for (group = 0; group < ics->group_len[g]; group++) {
1402                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1403                                               ch1 + group * 128 + offsets[i],
1404                                               offsets[i+1] - offsets[i]);
1405                 }
1406             }
1407         }
1408         ch0 += ics->group_len[g] * 128;
1409         ch1 += ics->group_len[g] * 128;
1410     }
1411 }
1412
1413 /**
1414  * intensity stereo decoding; reference: 4.6.8.2.3
1415  *
1416  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1417  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1418  *                      [3] reserved for scalable AAC
1419  */
1420 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1421 {
1422     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1423     SingleChannelElement         *sce1 = &cpe->ch[1];
1424     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1425     const uint16_t *offsets = ics->swb_offset;
1426     int g, group, i, k, idx = 0;
1427     int c;
1428     float scale;
1429     for (g = 0; g < ics->num_window_groups; g++) {
1430         for (i = 0; i < ics->max_sfb;) {
1431             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1432                 const int bt_run_end = sce1->band_type_run_end[idx];
1433                 for (; i < bt_run_end; i++, idx++) {
1434                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1435                     if (ms_present)
1436                         c *= 1 - 2 * cpe->ms_mask[idx];
1437                     scale = c * sce1->sf[idx];
1438                     for (group = 0; group < ics->group_len[g]; group++)
1439                         for (k = offsets[i]; k < offsets[i + 1]; k++)
1440                             coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1441                 }
1442             } else {
1443                 int bt_run_end = sce1->band_type_run_end[idx];
1444                 idx += bt_run_end - i;
1445                 i    = bt_run_end;
1446             }
1447         }
1448         coef0 += ics->group_len[g] * 128;
1449         coef1 += ics->group_len[g] * 128;
1450     }
1451 }
1452
1453 /**
1454  * Decode a channel_pair_element; reference: table 4.4.
1455  *
1456  * @param   elem_id Identifies the instance of a syntax element.
1457  *
1458  * @return  Returns error status. 0 - OK, !0 - error
1459  */
1460 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1461 {
1462     int i, ret, common_window, ms_present = 0;
1463
1464     common_window = get_bits1(gb);
1465     if (common_window) {
1466         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1467             return -1;
1468         i = cpe->ch[1].ics.use_kb_window[0];
1469         cpe->ch[1].ics = cpe->ch[0].ics;
1470         cpe->ch[1].ics.use_kb_window[1] = i;
1471         ms_present = get_bits(gb, 2);
1472         if (ms_present == 3) {
1473             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1474             return -1;
1475         } else if (ms_present)
1476             decode_mid_side_stereo(cpe, gb, ms_present);
1477     }
1478     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1479         return ret;
1480     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1481         return ret;
1482
1483     if (common_window) {
1484         if (ms_present)
1485             apply_mid_side_stereo(ac, cpe);
1486         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1487             apply_prediction(ac, &cpe->ch[0]);
1488             apply_prediction(ac, &cpe->ch[1]);
1489         }
1490     }
1491
1492     apply_intensity_stereo(cpe, ms_present);
1493     return 0;
1494 }
1495
1496 /**
1497  * Decode coupling_channel_element; reference: table 4.8.
1498  *
1499  * @param   elem_id Identifies the instance of a syntax element.
1500  *
1501  * @return  Returns error status. 0 - OK, !0 - error
1502  */
1503 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1504 {
1505     int num_gain = 0;
1506     int c, g, sfb, ret;
1507     int sign;
1508     float scale;
1509     SingleChannelElement *sce = &che->ch[0];
1510     ChannelCoupling     *coup = &che->coup;
1511
1512     coup->coupling_point = 2 * get_bits1(gb);
1513     coup->num_coupled = get_bits(gb, 3);
1514     for (c = 0; c <= coup->num_coupled; c++) {
1515         num_gain++;
1516         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1517         coup->id_select[c] = get_bits(gb, 4);
1518         if (coup->type[c] == TYPE_CPE) {
1519             coup->ch_select[c] = get_bits(gb, 2);
1520             if (coup->ch_select[c] == 3)
1521                 num_gain++;
1522         } else
1523             coup->ch_select[c] = 2;
1524     }
1525     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1526
1527     sign  = get_bits(gb, 1);
1528     scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1529
1530     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1531         return ret;
1532
1533     for (c = 0; c < num_gain; c++) {
1534         int idx  = 0;
1535         int cge  = 1;
1536         int gain = 0;
1537         float gain_cache = 1.;
1538         if (c) {
1539             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1540             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1541             gain_cache = pow(scale, -gain);
1542         }
1543         if (coup->coupling_point == AFTER_IMDCT) {
1544             coup->gain[c][0] = gain_cache;
1545         } else {
1546             for (g = 0; g < sce->ics.num_window_groups; g++) {
1547                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1548                     if (sce->band_type[idx] != ZERO_BT) {
1549                         if (!cge) {
1550                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1551                             if (t) {
1552                                 int s = 1;
1553                                 t = gain += t;
1554                                 if (sign) {
1555                                     s  -= 2 * (t & 0x1);
1556                                     t >>= 1;
1557                                 }
1558                                 gain_cache = pow(scale, -t) * s;
1559                             }
1560                         }
1561                         coup->gain[c][idx] = gain_cache;
1562                     }
1563                 }
1564             }
1565         }
1566     }
1567     return 0;
1568 }
1569
1570 /**
1571  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1572  *
1573  * @return  Returns number of bytes consumed.
1574  */
1575 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1576                                          GetBitContext *gb)
1577 {
1578     int i;
1579     int num_excl_chan = 0;
1580
1581     do {
1582         for (i = 0; i < 7; i++)
1583             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1584     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1585
1586     return num_excl_chan / 7;
1587 }
1588
1589 /**
1590  * Decode dynamic range information; reference: table 4.52.
1591  *
1592  * @param   cnt length of TYPE_FIL syntactic element in bytes
1593  *
1594  * @return  Returns number of bytes consumed.
1595  */
1596 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1597                                 GetBitContext *gb, int cnt)
1598 {
1599     int n             = 1;
1600     int drc_num_bands = 1;
1601     int i;
1602
1603     /* pce_tag_present? */
1604     if (get_bits1(gb)) {
1605         che_drc->pce_instance_tag  = get_bits(gb, 4);
1606         skip_bits(gb, 4); // tag_reserved_bits
1607         n++;
1608     }
1609
1610     /* excluded_chns_present? */
1611     if (get_bits1(gb)) {
1612         n += decode_drc_channel_exclusions(che_drc, gb);
1613     }
1614
1615     /* drc_bands_present? */
1616     if (get_bits1(gb)) {
1617         che_drc->band_incr            = get_bits(gb, 4);
1618         che_drc->interpolation_scheme = get_bits(gb, 4);
1619         n++;
1620         drc_num_bands += che_drc->band_incr;
1621         for (i = 0; i < drc_num_bands; i++) {
1622             che_drc->band_top[i] = get_bits(gb, 8);
1623             n++;
1624         }
1625     }
1626
1627     /* prog_ref_level_present? */
1628     if (get_bits1(gb)) {
1629         che_drc->prog_ref_level = get_bits(gb, 7);
1630         skip_bits1(gb); // prog_ref_level_reserved_bits
1631         n++;
1632     }
1633
1634     for (i = 0; i < drc_num_bands; i++) {
1635         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1636         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1637         n++;
1638     }
1639
1640     return n;
1641 }
1642
1643 /**
1644  * Decode extension data (incomplete); reference: table 4.51.
1645  *
1646  * @param   cnt length of TYPE_FIL syntactic element in bytes
1647  *
1648  * @return Returns number of bytes consumed
1649  */
1650 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1651                                     ChannelElement *che, enum RawDataBlockType elem_type)
1652 {
1653     int crc_flag = 0;
1654     int res = cnt;
1655     switch (get_bits(gb, 4)) { // extension type
1656     case EXT_SBR_DATA_CRC:
1657         crc_flag++;
1658     case EXT_SBR_DATA:
1659         if (!che) {
1660             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1661             return res;
1662         } else if (!ac->m4ac.sbr) {
1663             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1664             skip_bits_long(gb, 8 * cnt - 4);
1665             return res;
1666         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1667             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1668             skip_bits_long(gb, 8 * cnt - 4);
1669             return res;
1670         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1671             ac->m4ac.sbr = 1;
1672             ac->m4ac.ps = 1;
1673             output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1674         } else {
1675             ac->m4ac.sbr = 1;
1676         }
1677         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1678         break;
1679     case EXT_DYNAMIC_RANGE:
1680         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1681         break;
1682     case EXT_FILL:
1683     case EXT_FILL_DATA:
1684     case EXT_DATA_ELEMENT:
1685     default:
1686         skip_bits_long(gb, 8 * cnt - 4);
1687         break;
1688     };
1689     return res;
1690 }
1691
1692 /**
1693  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1694  *
1695  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1696  * @param   coef    spectral coefficients
1697  */
1698 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1699                       IndividualChannelStream *ics, int decode)
1700 {
1701     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1702     int w, filt, m, i;
1703     int bottom, top, order, start, end, size, inc;
1704     float lpc[TNS_MAX_ORDER];
1705
1706     for (w = 0; w < ics->num_windows; w++) {
1707         bottom = ics->num_swb;
1708         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1709             top    = bottom;
1710             bottom = FFMAX(0, top - tns->length[w][filt]);
1711             order  = tns->order[w][filt];
1712             if (order == 0)
1713                 continue;
1714
1715             // tns_decode_coef
1716             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1717
1718             start = ics->swb_offset[FFMIN(bottom, mmm)];
1719             end   = ics->swb_offset[FFMIN(   top, mmm)];
1720             if ((size = end - start) <= 0)
1721                 continue;
1722             if (tns->direction[w][filt]) {
1723                 inc = -1;
1724                 start = end - 1;
1725             } else {
1726                 inc = 1;
1727             }
1728             start += w * 128;
1729
1730             // ar filter
1731             for (m = 0; m < size; m++, start += inc)
1732                 for (i = 1; i <= FFMIN(m, order); i++)
1733                     coef[start] -= coef[start - i * inc] * lpc[i - 1];
1734         }
1735     }
1736 }
1737
1738 /**
1739  * Conduct IMDCT and windowing.
1740  */
1741 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1742 {
1743     IndividualChannelStream *ics = &sce->ics;
1744     float *in    = sce->coeffs;
1745     float *out   = sce->ret;
1746     float *saved = sce->saved;
1747     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1748     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1749     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1750     float *buf  = ac->buf_mdct;
1751     float *temp = ac->temp;
1752     int i;
1753
1754     // imdct
1755     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1756         if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1757             av_log(ac->avctx, AV_LOG_WARNING,
1758                    "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1759                    "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1760         for (i = 0; i < 1024; i += 128)
1761             ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1762     } else
1763         ff_imdct_half(&ac->mdct, buf, in);
1764
1765     /* window overlapping
1766      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1767      * and long to short transitions are considered to be short to short
1768      * transitions. This leaves just two cases (long to long and short to short)
1769      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1770      */
1771     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1772             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1773         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
1774     } else {
1775         for (i = 0; i < 448; i++)
1776             out[i] = saved[i] + bias;
1777
1778         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1779             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
1780             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
1781             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
1782             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
1783             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
1784             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1785         } else {
1786             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
1787             for (i = 576; i < 1024; i++)
1788                 out[i] = buf[i-512] + bias;
1789         }
1790     }
1791
1792     // buffer update
1793     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1794         for (i = 0; i < 64; i++)
1795             saved[i] = temp[64 + i] - bias;
1796         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1797         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1798         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1799         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1800     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1801         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1802         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1803     } else { // LONG_STOP or ONLY_LONG
1804         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1805     }
1806 }
1807
1808 /**
1809  * Apply dependent channel coupling (applied before IMDCT).
1810  *
1811  * @param   index   index into coupling gain array
1812  */
1813 static void apply_dependent_coupling(AACContext *ac,
1814                                      SingleChannelElement *target,
1815                                      ChannelElement *cce, int index)
1816 {
1817     IndividualChannelStream *ics = &cce->ch[0].ics;
1818     const uint16_t *offsets = ics->swb_offset;
1819     float *dest = target->coeffs;
1820     const float *src = cce->ch[0].coeffs;
1821     int g, i, group, k, idx = 0;
1822     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1823         av_log(ac->avctx, AV_LOG_ERROR,
1824                "Dependent coupling is not supported together with LTP\n");
1825         return;
1826     }
1827     for (g = 0; g < ics->num_window_groups; g++) {
1828         for (i = 0; i < ics->max_sfb; i++, idx++) {
1829             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1830                 const float gain = cce->coup.gain[index][idx];
1831                 for (group = 0; group < ics->group_len[g]; group++) {
1832                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1833                         // XXX dsputil-ize
1834                         dest[group * 128 + k] += gain * src[group * 128 + k];
1835                     }
1836                 }
1837             }
1838         }
1839         dest += ics->group_len[g] * 128;
1840         src  += ics->group_len[g] * 128;
1841     }
1842 }
1843
1844 /**
1845  * Apply independent channel coupling (applied after IMDCT).
1846  *
1847  * @param   index   index into coupling gain array
1848  */
1849 static void apply_independent_coupling(AACContext *ac,
1850                                        SingleChannelElement *target,
1851                                        ChannelElement *cce, int index)
1852 {
1853     int i;
1854     const float gain = cce->coup.gain[index][0];
1855     const float bias = ac->add_bias;
1856     const float *src = cce->ch[0].ret;
1857     float *dest = target->ret;
1858     const int len = 1024 << (ac->m4ac.sbr == 1);
1859
1860     for (i = 0; i < len; i++)
1861         dest[i] += gain * (src[i] - bias);
1862 }
1863
1864 /**
1865  * channel coupling transformation interface
1866  *
1867  * @param   index   index into coupling gain array
1868  * @param   apply_coupling_method   pointer to (in)dependent coupling function
1869  */
1870 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1871                                    enum RawDataBlockType type, int elem_id,
1872                                    enum CouplingPoint coupling_point,
1873                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1874 {
1875     int i, c;
1876
1877     for (i = 0; i < MAX_ELEM_ID; i++) {
1878         ChannelElement *cce = ac->che[TYPE_CCE][i];
1879         int index = 0;
1880
1881         if (cce && cce->coup.coupling_point == coupling_point) {
1882             ChannelCoupling *coup = &cce->coup;
1883
1884             for (c = 0; c <= coup->num_coupled; c++) {
1885                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1886                     if (coup->ch_select[c] != 1) {
1887                         apply_coupling_method(ac, &cc->ch[0], cce, index);
1888                         if (coup->ch_select[c] != 0)
1889                             index++;
1890                     }
1891                     if (coup->ch_select[c] != 2)
1892                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
1893                 } else
1894                     index += 1 + (coup->ch_select[c] == 3);
1895             }
1896         }
1897     }
1898 }
1899
1900 /**
1901  * Convert spectral data to float samples, applying all supported tools as appropriate.
1902  */
1903 static void spectral_to_sample(AACContext *ac)
1904 {
1905     int i, type;
1906     float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1907     for (type = 3; type >= 0; type--) {
1908         for (i = 0; i < MAX_ELEM_ID; i++) {
1909             ChannelElement *che = ac->che[type][i];
1910             if (che) {
1911                 if (type <= TYPE_CPE)
1912                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1913                 if (che->ch[0].tns.present)
1914                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1915                 if (che->ch[1].tns.present)
1916                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1917                 if (type <= TYPE_CPE)
1918                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1919                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1920                     imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1921                     if (type == TYPE_CPE) {
1922                         imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1923                     }
1924                     if (ac->m4ac.sbr > 0) {
1925                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1926                     }
1927                 }
1928                 if (type <= TYPE_CCE)
1929                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1930             }
1931         }
1932     }
1933 }
1934
1935 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1936 {
1937     int size;
1938     AACADTSHeaderInfo hdr_info;
1939
1940     size = ff_aac_parse_header(gb, &hdr_info);
1941     if (size > 0) {
1942         if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1943             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1944             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1945             ac->m4ac.chan_config = hdr_info.chan_config;
1946             if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1947                 return -7;
1948             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1949                 return -7;
1950         } else if (ac->output_configured != OC_LOCKED) {
1951             ac->output_configured = OC_NONE;
1952         }
1953         if (ac->output_configured != OC_LOCKED) {
1954             ac->m4ac.sbr = -1;
1955             ac->m4ac.ps  = -1;
1956         }
1957         ac->m4ac.sample_rate     = hdr_info.sample_rate;
1958         ac->m4ac.sampling_index  = hdr_info.sampling_index;
1959         ac->m4ac.object_type     = hdr_info.object_type;
1960         if (!ac->avctx->sample_rate)
1961             ac->avctx->sample_rate = hdr_info.sample_rate;
1962         if (hdr_info.num_aac_frames == 1) {
1963             if (!hdr_info.crc_absent)
1964                 skip_bits(gb, 16);
1965         } else {
1966             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1967             return -1;
1968         }
1969     }
1970     return size;
1971 }
1972
1973 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1974                             int *data_size, AVPacket *avpkt)
1975 {
1976     const uint8_t *buf = avpkt->data;
1977     int buf_size = avpkt->size;
1978     AACContext *ac = avctx->priv_data;
1979     ChannelElement *che = NULL, *che_prev = NULL;
1980     GetBitContext gb;
1981     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1982     int err, elem_id, data_size_tmp;
1983     int buf_consumed;
1984     int samples = 0, multiplier;
1985     int buf_offset;
1986
1987     init_get_bits(&gb, buf, buf_size * 8);
1988
1989     if (show_bits(&gb, 12) == 0xfff) {
1990         if (parse_adts_frame_header(ac, &gb) < 0) {
1991             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1992             return -1;
1993         }
1994         if (ac->m4ac.sampling_index > 12) {
1995             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1996             return -1;
1997         }
1998     }
1999
2000     memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
2001     // parse
2002     while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
2003         elem_id = get_bits(&gb, 4);
2004
2005         if (elem_type < TYPE_DSE) {
2006             if (!(che=get_che(ac, elem_type, elem_id))) {
2007                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2008                        elem_type, elem_id);
2009                 return -1;
2010             }
2011             samples = 1024;
2012         }
2013
2014         switch (elem_type) {
2015
2016         case TYPE_SCE:
2017             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2018             break;
2019
2020         case TYPE_CPE:
2021             err = decode_cpe(ac, &gb, che);
2022             break;
2023
2024         case TYPE_CCE:
2025             err = decode_cce(ac, &gb, che);
2026             break;
2027
2028         case TYPE_LFE:
2029             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2030             break;
2031
2032         case TYPE_DSE:
2033             err = skip_data_stream_element(ac, &gb);
2034             break;
2035
2036         case TYPE_PCE: {
2037             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2038             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2039             if ((err = decode_pce(ac, new_che_pos, &gb)))
2040                 break;
2041             if (ac->output_configured > OC_TRIAL_PCE)
2042                 av_log(avctx, AV_LOG_ERROR,
2043                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2044             else
2045                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2046             break;
2047         }
2048
2049         case TYPE_FIL:
2050             if (elem_id == 15)
2051                 elem_id += get_bits(&gb, 8) - 1;
2052             if (get_bits_left(&gb) < 8 * elem_id) {
2053                     av_log(avctx, AV_LOG_ERROR, overread_err);
2054                     return -1;
2055             }
2056             while (elem_id > 0)
2057                 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2058             err = 0; /* FIXME */
2059             break;
2060
2061         default:
2062             err = -1; /* should not happen, but keeps compiler happy */
2063             break;
2064         }
2065
2066         che_prev       = che;
2067         elem_type_prev = elem_type;
2068
2069         if (err)
2070             return err;
2071
2072         if (get_bits_left(&gb) < 3) {
2073             av_log(avctx, AV_LOG_ERROR, overread_err);
2074             return -1;
2075         }
2076     }
2077
2078     spectral_to_sample(ac);
2079
2080     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2081     samples <<= multiplier;
2082     if (ac->output_configured < OC_LOCKED) {
2083         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2084         avctx->frame_size = samples;
2085     }
2086
2087     data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2088     if (*data_size < data_size_tmp) {
2089         av_log(avctx, AV_LOG_ERROR,
2090                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2091                *data_size, data_size_tmp);
2092         return -1;
2093     }
2094     *data_size = data_size_tmp;
2095
2096     if (samples)
2097         ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2098
2099     if (ac->output_configured)
2100         ac->output_configured = OC_LOCKED;
2101
2102     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2103     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2104         if (buf[buf_offset])
2105             break;
2106
2107     return buf_size > buf_offset ? buf_consumed : buf_size;
2108 }
2109
2110 static av_cold int aac_decode_close(AVCodecContext *avctx)
2111 {
2112     AACContext *ac = avctx->priv_data;
2113     int i, type;
2114
2115     for (i = 0; i < MAX_ELEM_ID; i++) {
2116         for (type = 0; type < 4; type++) {
2117             if (ac->che[type][i])
2118                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2119             av_freep(&ac->che[type][i]);
2120         }
2121     }
2122
2123     ff_mdct_end(&ac->mdct);
2124     ff_mdct_end(&ac->mdct_small);
2125     return 0;
2126 }
2127
2128 AVCodec aac_decoder = {
2129     "aac",
2130     AVMEDIA_TYPE_AUDIO,
2131     CODEC_ID_AAC,
2132     sizeof(AACContext),
2133     aac_decode_init,
2134     NULL,
2135     aac_decode_close,
2136     aac_decode_frame,
2137     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2138     .sample_fmts = (const enum SampleFormat[]) {
2139         SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2140     },
2141     .channel_layouts = aac_channel_layout,
2142 };