git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * AAC decoder
  26  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  28  */
  29
  30 /*
  31  * supported tools
  32  *
  33  * Support?             Name
  34  * N (code in SoC repo) gain control
  35  * Y                    block switching
  36  * Y                    window shapes - standard
  37  * N                    window shapes - Low Delay
  38  * Y                    filterbank - standard
  39  * N (code in SoC repo) filterbank - Scalable Sample Rate
  40  * Y                    Temporal Noise Shaping
  41  * N (code in SoC repo) Long Term Prediction
  42  * Y                    intensity stereo
  43  * Y                    channel coupling
  44  * Y                    frequency domain prediction
  45  * Y                    Perceptual Noise Substitution
  46  * Y                    Mid/Side stereo
  47  * N                    Scalable Inverse AAC Quantization
  48  * N                    Frequency Selective Switch
  49  * N                    upsampling filter
  50  * Y                    quantization & coding - AAC
  51  * N                    quantization & coding - TwinVQ
  52  * N                    quantization & coding - BSAC
  53  * N                    AAC Error Resilience tools
  54  * N                    Error Resilience payload syntax
  55  * N                    Error Protection tool
  56  * N                    CELP
  57  * N                    Silence Compression
  58  * N                    HVXC
  59  * N                    HVXC 4kbits/s VR
  60  * N                    Structured Audio tools
  61  * N                    Structured Audio Sample Bank Format
  62  * N                    MIDI
  63  * N                    Harmonic and Individual Lines plus Noise
  64  * N                    Text-To-Speech Interface
  65  * Y                    Spectral Band Replication
  66  * Y (not in this code) Layer-1
  67  * Y (not in this code) Layer-2
  68  * Y (not in this code) Layer-3
  69  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  70  * Y                    Parametric Stereo
  71  * N                    Direct Stream Transfer
  72  *
  73  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  74  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  75            Parametric Stereo.
  76  */
  77
  78
  79 #include "avcodec.h"
  80 #include "internal.h"
  81 #include "get_bits.h"
  82 #include "dsputil.h"
  83 #include "fft.h"
  84 #include "lpc.h"
  85
  86 #include "aac.h"
  87 #include "aactab.h"
  88 #include "aacdectab.h"
  89 #include "cbrt_tablegen.h"
  90 #include "sbr.h"
  91 #include "aacsbr.h"
  92 #include "mpeg4audio.h"
  93 #include "aacadtsdec.h"
  94
  95 #include <assert.h>
  96 #include <errno.h>
  97 #include <math.h>
  98 #include <string.h>
  99
 100 #if ARCH_ARM
 101 #   include "arm/aac.h"
 102 #endif
 103
 104 union float754 {
 105     float f;
 106     uint32_t i;
 107 };
 108
 109 static VLC vlc_scalefactors;
 110 static VLC vlc_spectral[11];
 111
 112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 113
 114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 115 {
 116     /* Some buggy encoders appear to set all elem_ids to zero and rely on
 117     channels always occurring in the same order. This is expressly forbidden
 118     by the spec but we will try to work around it.
 119     */
 120     int err_printed = 0;
 121     while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
 122         if (ac->output_configured < OC_LOCKED && !err_printed) {
 123             av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
 124             err_printed = 1;
 125         }
 126         elem_id++;
 127     }
 128     if (elem_id == MAX_ELEM_ID)
 129         return NULL;
 130     ac->tags_seen_this_frame[type][elem_id] = 1;
 131
 132     if (ac->tag_che_map[type][elem_id]) {
 133         return ac->tag_che_map[type][elem_id];
 134     }
 135     if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
 136         return NULL;
 137     }
 138     switch (ac->m4ac.chan_config) {
 139     case 7:
 140         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 141             ac->tags_mapped++;
 142             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 143         }
 144     case 6:
 145         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 146            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 147            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 148         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 151         }
 152     case 5:
 153         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 154             ac->tags_mapped++;
 155             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 156         }
 157     case 4:
 158         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 159             ac->tags_mapped++;
 160             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 161         }
 162     case 3:
 163     case 2:
 164         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 165             ac->tags_mapped++;
 166             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 167         } else if (ac->m4ac.chan_config == 2) {
 168             return NULL;
 169         }
 170     case 1:
 171         if (!ac->tags_mapped && type == TYPE_SCE) {
 172             ac->tags_mapped++;
 173             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 174         }
 175     default:
 176         return NULL;
 177     }
 178 }
 179
 180 /**
 181  * Check for the channel element in the current channel position configuration.
 182  * If it exists, make sure the appropriate element is allocated and map the
 183  * channel order to match the internal FFmpeg channel layout.
 184  *
 185  * @param   che_pos current channel position configuration
 186  * @param   type channel element type
 187  * @param   id channel element id
 188  * @param   channels count of the number of channels in the configuration
 189  *
 190  * @return  Returns error status. 0 - OK, !0 - error
 191  */
 192 static av_cold int che_configure(AACContext *ac,
 193                          enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 194                          int type, int id,
 195                          int *channels)
 196 {
 197     if (che_pos[type][id]) {
 198         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 199             return AVERROR(ENOMEM);
 200         ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
 201         if (type != TYPE_CCE) {
 202             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 203             if (type == TYPE_CPE ||
 204                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 205                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 206             }
 207         }
 208     } else {
 209         if (ac->che[type][id])
 210             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 211         av_freep(&ac->che[type][id]);
 212     }
 213     return 0;
 214 }
 215
 216 /**
 217  * Configure output channel order based on the current program configuration element.
 218  *
 219  * @param   che_pos current channel position configuration
 220  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 221  *
 222  * @return  Returns error status. 0 - OK, !0 - error
 223  */
 224 static av_cold int output_configure(AACContext *ac,
 225                             enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 226                             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 227                             int channel_config, enum OCStatus oc_type)
 228 {
 229     AVCodecContext *avctx = ac->avctx;
 230     int i, type, channels = 0, ret;
 231
 232     if (new_che_pos != che_pos)
 233     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 234
 235     if (channel_config) {
 236         for (i = 0; i < tags_per_config[channel_config]; i++) {
 237             if ((ret = che_configure(ac, che_pos,
 238                                      aac_channel_layout_map[channel_config - 1][i][0],
 239                                      aac_channel_layout_map[channel_config - 1][i][1],
 240                                      &channels)))
 241                 return ret;
 242         }
 243
 244         memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 245         ac->tags_mapped = 0;
 246
 247         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 248     } else {
 249         /* Allocate or free elements depending on if they are in the
 250          * current program configuration.
 251          *
 252          * Set up default 1:1 output mapping.
 253          *
 254          * For a 5.1 stream the output order will be:
 255          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 256          */
 257
 258         for (i = 0; i < MAX_ELEM_ID; i++) {
 259             for (type = 0; type < 4; type++) {
 260                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 261                     return ret;
 262             }
 263         }
 264
 265         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 266         ac->tags_mapped = 4 * MAX_ELEM_ID;
 267
 268         avctx->channel_layout = 0;
 269     }
 270
 271     avctx->channels = channels;
 272
 273     ac->output_configured = oc_type;
 274
 275     return 0;
 276 }
 277
 278 /**
 279  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 280  *
 281  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 282  * @param sce_map mono (Single Channel Element) map
 283  * @param type speaker type/position for these channels
 284  */
 285 static void decode_channel_map(enum ChannelPosition *cpe_map,
 286                                enum ChannelPosition *sce_map,
 287                                enum ChannelPosition type,
 288                                GetBitContext *gb, int n)
 289 {
 290     while (n--) {
 291         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 292         map[get_bits(gb, 4)] = type;
 293     }
 294 }
 295
 296 /**
 297  * Decode program configuration element; reference: table 4.2.
 298  *
 299  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 300  *
 301  * @return  Returns error status. 0 - OK, !0 - error
 302  */
 303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 304                       GetBitContext *gb)
 305 {
 306     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 307     int comment_len;
 308
 309     skip_bits(gb, 2);  // object_type
 310
 311     sampling_index = get_bits(gb, 4);
 312     if (ac->m4ac.sampling_index != sampling_index)
 313         av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 314
 315     num_front       = get_bits(gb, 4);
 316     num_side        = get_bits(gb, 4);
 317     num_back        = get_bits(gb, 4);
 318     num_lfe         = get_bits(gb, 2);
 319     num_assoc_data  = get_bits(gb, 3);
 320     num_cc          = get_bits(gb, 4);
 321
 322     if (get_bits1(gb))
 323         skip_bits(gb, 4); // mono_mixdown_tag
 324     if (get_bits1(gb))
 325         skip_bits(gb, 4); // stereo_mixdown_tag
 326
 327     if (get_bits1(gb))
 328         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 329
 330     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 331     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 332     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 333     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 334
 335     skip_bits_long(gb, 4 * num_assoc_data);
 336
 337     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 338
 339     align_get_bits(gb);
 340
 341     /* comment field, first byte is length */
 342     comment_len = get_bits(gb, 8) * 8;
 343     if (get_bits_left(gb) < comment_len) {
 344         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 345         return -1;
 346     }
 347     skip_bits_long(gb, comment_len);
 348     return 0;
 349 }
 350
 351 /**
 352  * Set up channel positions based on a default channel configuration
 353  * as specified in table 1.17.
 354  *
 355  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 356  *
 357  * @return  Returns error status. 0 - OK, !0 - error
 358  */
 359 static av_cold int set_default_channel_config(AACContext *ac,
 360                                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 361                                       int channel_config)
 362 {
 363     if (channel_config < 1 || channel_config > 7) {
 364         av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 365                channel_config);
 366         return -1;
 367     }
 368
 369     /* default channel configurations:
 370      *
 371      * 1ch : front center (mono)
 372      * 2ch : L + R (stereo)
 373      * 3ch : front center + L + R
 374      * 4ch : front center + L + R + back center
 375      * 5ch : front center + L + R + back stereo
 376      * 6ch : front center + L + R + back stereo + LFE
 377      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 378      */
 379
 380     if (channel_config != 2)
 381         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 382     if (channel_config > 1)
 383         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 384     if (channel_config == 4)
 385         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 386     if (channel_config > 4)
 387         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 388         = AAC_CHANNEL_BACK;  // back stereo
 389     if (channel_config > 5)
 390         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 391     if (channel_config == 7)
 392         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 393
 394     return 0;
 395 }
 396
 397 /**
 398  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 399  *
 400  * @return  Returns error status. 0 - OK, !0 - error
 401  */
 402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
 403                                      int channel_config)
 404 {
 405     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 406     int extension_flag, ret;
 407
 408     if (get_bits1(gb)) { // frameLengthFlag
 409         av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
 410         return -1;
 411     }
 412
 413     if (get_bits1(gb))       // dependsOnCoreCoder
 414         skip_bits(gb, 14);   // coreCoderDelay
 415     extension_flag = get_bits1(gb);
 416
 417     if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
 418         ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
 419         skip_bits(gb, 3);     // layerNr
 420
 421     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 422     if (channel_config == 0) {
 423         skip_bits(gb, 4);  // element_instance_tag
 424         if ((ret = decode_pce(ac, new_che_pos, gb)))
 425             return ret;
 426     } else {
 427         if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
 428             return ret;
 429     }
 430     if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 431         return ret;
 432
 433     if (extension_flag) {
 434         switch (ac->m4ac.object_type) {
 435         case AOT_ER_BSAC:
 436             skip_bits(gb, 5);    // numOfSubFrame
 437             skip_bits(gb, 11);   // layer_length
 438             break;
 439         case AOT_ER_AAC_LC:
 440         case AOT_ER_AAC_LTP:
 441         case AOT_ER_AAC_SCALABLE:
 442         case AOT_ER_AAC_LD:
 443             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 444                                     * aacScalefactorDataResilienceFlag
 445                                     * aacSpectralDataResilienceFlag
 446                                     */
 447             break;
 448         }
 449         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 450     }
 451     return 0;
 452 }
 453
 454 /**
 455  * Decode audio specific configuration; reference: table 1.13.
 456  *
 457  * @param   data        pointer to AVCodecContext extradata
 458  * @param   data_size   size of AVCCodecContext extradata
 459  *
 460  * @return  Returns error status. 0 - OK, !0 - error
 461  */
 462 static int decode_audio_specific_config(AACContext *ac, void *data,
 463                                         int data_size)
 464 {
 465     GetBitContext gb;
 466     int i;
 467
 468     init_get_bits(&gb, data, data_size * 8);
 469
 470     if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
 471         return -1;
 472     if (ac->m4ac.sampling_index > 12) {
 473         av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
 474         return -1;
 475     }
 476     if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
 477         ac->m4ac.ps = 1;
 478
 479     skip_bits_long(&gb, i);
 480
 481     switch (ac->m4ac.object_type) {
 482     case AOT_AAC_MAIN:
 483     case AOT_AAC_LC:
 484         if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
 485             return -1;
 486         break;
 487     default:
 488         av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 489                ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
 490         return -1;
 491     }
 492     return 0;
 493 }
 494
 495 /**
 496  * linear congruential pseudorandom number generator
 497  *
 498  * @param   previous_val    pointer to the current state of the generator
 499  *
 500  * @return  Returns a 32-bit pseudorandom integer
 501  */
 502 static av_always_inline int lcg_random(int previous_val)
 503 {
 504     return previous_val * 1664525 + 1013904223;
 505 }
 506
 507 static av_always_inline void reset_predict_state(PredictorState *ps)
 508 {
 509     ps->r0   = 0.0f;
 510     ps->r1   = 0.0f;
 511     ps->cor0 = 0.0f;
 512     ps->cor1 = 0.0f;
 513     ps->var0 = 1.0f;
 514     ps->var1 = 1.0f;
 515 }
 516
 517 static void reset_all_predictors(PredictorState *ps)
 518 {
 519     int i;
 520     for (i = 0; i < MAX_PREDICTORS; i++)
 521         reset_predict_state(&ps[i]);
 522 }
 523
 524 static void reset_predictor_group(PredictorState *ps, int group_num)
 525 {
 526     int i;
 527     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 528         reset_predict_state(&ps[i]);
 529 }
 530
 531 #define AAC_INIT_VLC_STATIC(num, size) \
 532     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 533          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 534         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 535         size);
 536
 537 static av_cold int aac_decode_init(AVCodecContext *avctx)
 538 {
 539     AACContext *ac = avctx->priv_data;
 540
 541     ac->avctx = avctx;
 542     ac->m4ac.sample_rate = avctx->sample_rate;
 543
 544     if (avctx->extradata_size > 0) {
 545         if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
 546             return -1;
 547     }
 548
 549     avctx->sample_fmt = SAMPLE_FMT_S16;
 550
 551     AAC_INIT_VLC_STATIC( 0, 304);
 552     AAC_INIT_VLC_STATIC( 1, 270);
 553     AAC_INIT_VLC_STATIC( 2, 550);
 554     AAC_INIT_VLC_STATIC( 3, 300);
 555     AAC_INIT_VLC_STATIC( 4, 328);
 556     AAC_INIT_VLC_STATIC( 5, 294);
 557     AAC_INIT_VLC_STATIC( 6, 306);
 558     AAC_INIT_VLC_STATIC( 7, 268);
 559     AAC_INIT_VLC_STATIC( 8, 510);
 560     AAC_INIT_VLC_STATIC( 9, 366);
 561     AAC_INIT_VLC_STATIC(10, 462);
 562
 563     ff_aac_sbr_init();
 564
 565     dsputil_init(&ac->dsp, avctx);
 566
 567     ac->random_state = 0x1f2e3d4c;
 568
 569     // -1024 - Compensate wrong IMDCT method.
 570     // 32768 - Required to scale values to the correct range for the bias method
 571     //         for float to int16 conversion.
 572
 573     if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
 574         ac->add_bias  = 385.0f;
 575         ac->sf_scale  = 1. / (-1024. * 32768.);
 576         ac->sf_offset = 0;
 577     } else {
 578         ac->add_bias  = 0.0f;
 579         ac->sf_scale  = 1. / -1024.;
 580         ac->sf_offset = 60;
 581     }
 582
 583     ff_aac_tableinit();
 584
 585     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 586                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 587                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 588                     352);
 589
 590     ff_mdct_init(&ac->mdct, 11, 1, 1.0);
 591     ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
 592     // window initialization
 593     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 594     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 595     ff_init_ff_sine_windows(10);
 596     ff_init_ff_sine_windows( 7);
 597
 598     cbrt_tableinit();
 599
 600     return 0;
 601 }
 602
 603 /**
 604  * Skip data_stream_element; reference: table 4.10.
 605  */
 606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 607 {
 608     int byte_align = get_bits1(gb);
 609     int count = get_bits(gb, 8);
 610     if (count == 255)
 611         count += get_bits(gb, 8);
 612     if (byte_align)
 613         align_get_bits(gb);
 614
 615     if (get_bits_left(gb) < 8 * count) {
 616         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 617         return -1;
 618     }
 619     skip_bits_long(gb, 8 * count);
 620     return 0;
 621 }
 622
 623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 624                              GetBitContext *gb)
 625 {
 626     int sfb;
 627     if (get_bits1(gb)) {
 628         ics->predictor_reset_group = get_bits(gb, 5);
 629         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 630             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 631             return -1;
 632         }
 633     }
 634     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 635         ics->prediction_used[sfb] = get_bits1(gb);
 636     }
 637     return 0;
 638 }
 639
 640 /**
 641  * Decode Individual Channel Stream info; reference: table 4.6.
 642  *
 643  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 644  */
 645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 646                            GetBitContext *gb, int common_window)
 647 {
 648     if (get_bits1(gb)) {
 649         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 650         memset(ics, 0, sizeof(IndividualChannelStream));
 651         return -1;
 652     }
 653     ics->window_sequence[1] = ics->window_sequence[0];
 654     ics->window_sequence[0] = get_bits(gb, 2);
 655     ics->use_kb_window[1]   = ics->use_kb_window[0];
 656     ics->use_kb_window[0]   = get_bits1(gb);
 657     ics->num_window_groups  = 1;
 658     ics->group_len[0]       = 1;
 659     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 660         int i;
 661         ics->max_sfb = get_bits(gb, 4);
 662         for (i = 0; i < 7; i++) {
 663             if (get_bits1(gb)) {
 664                 ics->group_len[ics->num_window_groups - 1]++;
 665             } else {
 666                 ics->num_window_groups++;
 667                 ics->group_len[ics->num_window_groups - 1] = 1;
 668             }
 669         }
 670         ics->num_windows       = 8;
 671         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 672         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 673         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 674         ics->predictor_present = 0;
 675     } else {
 676         ics->max_sfb               = get_bits(gb, 6);
 677         ics->num_windows           = 1;
 678         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 679         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 680         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 681         ics->predictor_present     = get_bits1(gb);
 682         ics->predictor_reset_group = 0;
 683         if (ics->predictor_present) {
 684             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 685                 if (decode_prediction(ac, ics, gb)) {
 686                     memset(ics, 0, sizeof(IndividualChannelStream));
 687                     return -1;
 688                 }
 689             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 690                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 691                 memset(ics, 0, sizeof(IndividualChannelStream));
 692                 return -1;
 693             } else {
 694                 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
 695                 memset(ics, 0, sizeof(IndividualChannelStream));
 696                 return -1;
 697             }
 698         }
 699     }
 700
 701     if (ics->max_sfb > ics->num_swb) {
 702         av_log(ac->avctx, AV_LOG_ERROR,
 703                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 704                ics->max_sfb, ics->num_swb);
 705         memset(ics, 0, sizeof(IndividualChannelStream));
 706         return -1;
 707     }
 708
 709     return 0;
 710 }
 711
 712 /**
 713  * Decode band types (section_data payload); reference: table 4.46.
 714  *
 715  * @param   band_type           array of the used band type
 716  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 717  *
 718  * @return  Returns error status. 0 - OK, !0 - error
 719  */
 720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 721                              int band_type_run_end[120], GetBitContext *gb,
 722                              IndividualChannelStream *ics)
 723 {
 724     int g, idx = 0;
 725     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 726     for (g = 0; g < ics->num_window_groups; g++) {
 727         int k = 0;
 728         while (k < ics->max_sfb) {
 729             uint8_t sect_end = k;
 730             int sect_len_incr;
 731             int sect_band_type = get_bits(gb, 4);
 732             if (sect_band_type == 12) {
 733                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 734                 return -1;
 735             }
 736             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 737                 sect_end += sect_len_incr;
 738             sect_end += sect_len_incr;
 739             if (get_bits_left(gb) < 0) {
 740                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 741                 return -1;
 742             }
 743             if (sect_end > ics->max_sfb) {
 744                 av_log(ac->avctx, AV_LOG_ERROR,
 745                        "Number of bands (%d) exceeds limit (%d).\n",
 746                        sect_end, ics->max_sfb);
 747                 return -1;
 748             }
 749             for (; k < sect_end; k++) {
 750                 band_type        [idx]   = sect_band_type;
 751                 band_type_run_end[idx++] = sect_end;
 752             }
 753         }
 754     }
 755     return 0;
 756 }
 757
 758 /**
 759  * Decode scalefactors; reference: table 4.47.
 760  *
 761  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 762  * @param   band_type           array of the used band type
 763  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 764  * @param   sf                  array of scalefactors or intensity stereo positions
 765  *
 766  * @return  Returns error status. 0 - OK, !0 - error
 767  */
 768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 769                                unsigned int global_gain,
 770                                IndividualChannelStream *ics,
 771                                enum BandType band_type[120],
 772                                int band_type_run_end[120])
 773 {
 774     const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
 775     int g, i, idx = 0;
 776     int offset[3] = { global_gain, global_gain - 90, 100 };
 777     int noise_flag = 1;
 778     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 779     for (g = 0; g < ics->num_window_groups; g++) {
 780         for (i = 0; i < ics->max_sfb;) {
 781             int run_end = band_type_run_end[idx];
 782             if (band_type[idx] == ZERO_BT) {
 783                 for (; i < run_end; i++, idx++)
 784                     sf[idx] = 0.;
 785             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 786                 for (; i < run_end; i++, idx++) {
 787                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 788                     if (offset[2] > 255U) {
 789                         av_log(ac->avctx, AV_LOG_ERROR,
 790                                "%s (%d) out of range.\n", sf_str[2], offset[2]);
 791                         return -1;
 792                     }
 793                     sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
 794                 }
 795             } else if (band_type[idx] == NOISE_BT) {
 796                 for (; i < run_end; i++, idx++) {
 797                     if (noise_flag-- > 0)
 798                         offset[1] += get_bits(gb, 9) - 256;
 799                     else
 800                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 801                     if (offset[1] > 255U) {
 802                         av_log(ac->avctx, AV_LOG_ERROR,
 803                                "%s (%d) out of range.\n", sf_str[1], offset[1]);
 804                         return -1;
 805                     }
 806                     sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
 807                 }
 808             } else {
 809                 for (; i < run_end; i++, idx++) {
 810                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 811                     if (offset[0] > 255U) {
 812                         av_log(ac->avctx, AV_LOG_ERROR,
 813                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 814                         return -1;
 815                     }
 816                     sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
 817                 }
 818             }
 819         }
 820     }
 821     return 0;
 822 }
 823
 824 /**
 825  * Decode pulse data; reference: table 4.7.
 826  */
 827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 828                          const uint16_t *swb_offset, int num_swb)
 829 {
 830     int i, pulse_swb;
 831     pulse->num_pulse = get_bits(gb, 2) + 1;
 832     pulse_swb        = get_bits(gb, 6);
 833     if (pulse_swb >= num_swb)
 834         return -1;
 835     pulse->pos[0]    = swb_offset[pulse_swb];
 836     pulse->pos[0]   += get_bits(gb, 5);
 837     if (pulse->pos[0] > 1023)
 838         return -1;
 839     pulse->amp[0]    = get_bits(gb, 4);
 840     for (i = 1; i < pulse->num_pulse; i++) {
 841         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 842         if (pulse->pos[i] > 1023)
 843             return -1;
 844         pulse->amp[i] = get_bits(gb, 4);
 845     }
 846     return 0;
 847 }
 848
 849 /**
 850  * Decode Temporal Noise Shaping data; reference: table 4.48.
 851  *
 852  * @return  Returns error status. 0 - OK, !0 - error
 853  */
 854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 855                       GetBitContext *gb, const IndividualChannelStream *ics)
 856 {
 857     int w, filt, i, coef_len, coef_res, coef_compress;
 858     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 859     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 860     for (w = 0; w < ics->num_windows; w++) {
 861         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 862             coef_res = get_bits1(gb);
 863
 864             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 865                 int tmp2_idx;
 866                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 867
 868                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 869                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 870                            tns->order[w][filt], tns_max_order);
 871                     tns->order[w][filt] = 0;
 872                     return -1;
 873                 }
 874                 if (tns->order[w][filt]) {
 875                     tns->direction[w][filt] = get_bits1(gb);
 876                     coef_compress = get_bits1(gb);
 877                     coef_len = coef_res + 3 - coef_compress;
 878                     tmp2_idx = 2 * coef_compress + coef_res;
 879
 880                     for (i = 0; i < tns->order[w][filt]; i++)
 881                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 882                 }
 883             }
 884         }
 885     }
 886     return 0;
 887 }
 888
 889 /**
 890  * Decode Mid/Side data; reference: table 4.54.
 891  *
 892  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 893  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 894  *                      [3] reserved for scalable AAC
 895  */
 896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 897                                    int ms_present)
 898 {
 899     int idx;
 900     if (ms_present == 1) {
 901         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 902             cpe->ms_mask[idx] = get_bits1(gb);
 903     } else if (ms_present == 2) {
 904         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 905     }
 906 }
 907
 908 #ifndef VMUL2
 909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 910                            const float *scale)
 911 {
 912     float s = *scale;
 913     *dst++ = v[idx    & 15] * s;
 914     *dst++ = v[idx>>4 & 15] * s;
 915     return dst;
 916 }
 917 #endif
 918
 919 #ifndef VMUL4
 920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 921                            const float *scale)
 922 {
 923     float s = *scale;
 924     *dst++ = v[idx    & 3] * s;
 925     *dst++ = v[idx>>2 & 3] * s;
 926     *dst++ = v[idx>>4 & 3] * s;
 927     *dst++ = v[idx>>6 & 3] * s;
 928     return dst;
 929 }
 930 #endif
 931
 932 #ifndef VMUL2S
 933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 934                             unsigned sign, const float *scale)
 935 {
 936     union float754 s0, s1;
 937
 938     s0.f = s1.f = *scale;
 939     s0.i ^= sign >> 1 << 31;
 940     s1.i ^= sign      << 31;
 941
 942     *dst++ = v[idx    & 15] * s0.f;
 943     *dst++ = v[idx>>4 & 15] * s1.f;
 944
 945     return dst;
 946 }
 947 #endif
 948
 949 #ifndef VMUL4S
 950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 951                             unsigned sign, const float *scale)
 952 {
 953     unsigned nz = idx >> 12;
 954     union float754 s = { .f = *scale };
 955     union float754 t;
 956
 957     t.i = s.i ^ (sign & 1<<31);
 958     *dst++ = v[idx    & 3] * t.f;
 959
 960     sign <<= nz & 1; nz >>= 1;
 961     t.i = s.i ^ (sign & 1<<31);
 962     *dst++ = v[idx>>2 & 3] * t.f;
 963
 964     sign <<= nz & 1; nz >>= 1;
 965     t.i = s.i ^ (sign & 1<<31);
 966     *dst++ = v[idx>>4 & 3] * t.f;
 967
 968     sign <<= nz & 1; nz >>= 1;
 969     t.i = s.i ^ (sign & 1<<31);
 970     *dst++ = v[idx>>6 & 3] * t.f;
 971
 972     return dst;
 973 }
 974 #endif
 975
 976 /**
 977  * Decode spectral data; reference: table 4.50.
 978  * Dequantize and scale spectral data; reference: 4.6.3.3.
 979  *
 980  * @param   coef            array of dequantized, scaled spectral data
 981  * @param   sf              array of scalefactors or intensity stereo positions
 982  * @param   pulse_present   set if pulses are present
 983  * @param   pulse           pointer to pulse data struct
 984  * @param   band_type       array of the used band type
 985  *
 986  * @return  Returns error status. 0 - OK, !0 - error
 987  */
 988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 989                                        GetBitContext *gb, const float sf[120],
 990                                        int pulse_present, const Pulse *pulse,
 991                                        const IndividualChannelStream *ics,
 992                                        enum BandType band_type[120])
 993 {
 994     int i, k, g, idx = 0;
 995     const int c = 1024 / ics->num_windows;
 996     const uint16_t *offsets = ics->swb_offset;
 997     float *coef_base = coef;
 998
 999     for (g = 0; g < ics->num_windows; g++)
1000         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1001
1002     for (g = 0; g < ics->num_window_groups; g++) {
1003         unsigned g_len = ics->group_len[g];
1004
1005         for (i = 0; i < ics->max_sfb; i++, idx++) {
1006             const unsigned cbt_m1 = band_type[idx] - 1;
1007             float *cfo = coef + offsets[i];
1008             int off_len = offsets[i + 1] - offsets[i];
1009             int group;
1010
1011             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1012                 for (group = 0; group < g_len; group++, cfo+=128) {
1013                     memset(cfo, 0, off_len * sizeof(float));
1014                 }
1015             } else if (cbt_m1 == NOISE_BT - 1) {
1016                 for (group = 0; group < g_len; group++, cfo+=128) {
1017                     float scale;
1018                     float band_energy;
1019
1020                     for (k = 0; k < off_len; k++) {
1021                         ac->random_state  = lcg_random(ac->random_state);
1022                         cfo[k] = ac->random_state;
1023                     }
1024
1025                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1026                     scale = sf[idx] / sqrtf(band_energy);
1027                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1028                 }
1029             } else {
1030                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1031                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1032                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1033                 OPEN_READER(re, gb);
1034
1035                 switch (cbt_m1 >> 1) {
1036                 case 0:
1037                     for (group = 0; group < g_len; group++, cfo+=128) {
1038                         float *cf = cfo;
1039                         int len = off_len;
1040
1041                         do {
1042                             int code;
1043                             unsigned cb_idx;
1044
1045                             UPDATE_CACHE(re, gb);
1046                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1047                             cb_idx = cb_vector_idx[code];
1048                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1049                         } while (len -= 4);
1050                     }
1051                     break;
1052
1053                 case 1:
1054                     for (group = 0; group < g_len; group++, cfo+=128) {
1055                         float *cf = cfo;
1056                         int len = off_len;
1057
1058                         do {
1059                             int code;
1060                             unsigned nnz;
1061                             unsigned cb_idx;
1062                             uint32_t bits;
1063
1064                             UPDATE_CACHE(re, gb);
1065                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1066 #if MIN_CACHE_BITS < 20
1067                             UPDATE_CACHE(re, gb);
1068 #endif
1069                             cb_idx = cb_vector_idx[code];
1070                             nnz = cb_idx >> 8 & 15;
1071                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1072                             LAST_SKIP_BITS(re, gb, nnz);
1073                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1074                         } while (len -= 4);
1075                     }
1076                     break;
1077
1078                 case 2:
1079                     for (group = 0; group < g_len; group++, cfo+=128) {
1080                         float *cf = cfo;
1081                         int len = off_len;
1082
1083                         do {
1084                             int code;
1085                             unsigned cb_idx;
1086
1087                             UPDATE_CACHE(re, gb);
1088                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1089                             cb_idx = cb_vector_idx[code];
1090                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1091                         } while (len -= 2);
1092                     }
1093                     break;
1094
1095                 case 3:
1096                 case 4:
1097                     for (group = 0; group < g_len; group++, cfo+=128) {
1098                         float *cf = cfo;
1099                         int len = off_len;
1100
1101                         do {
1102                             int code;
1103                             unsigned nnz;
1104                             unsigned cb_idx;
1105                             unsigned sign;
1106
1107                             UPDATE_CACHE(re, gb);
1108                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1109                             cb_idx = cb_vector_idx[code];
1110                             nnz = cb_idx >> 8 & 15;
1111                             sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1112                             LAST_SKIP_BITS(re, gb, nnz);
1113                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1114                         } while (len -= 2);
1115                     }
1116                     break;
1117
1118                 default:
1119                     for (group = 0; group < g_len; group++, cfo+=128) {
1120                         float *cf = cfo;
1121                         uint32_t *icf = (uint32_t *) cf;
1122                         int len = off_len;
1123
1124                         do {
1125                             int code;
1126                             unsigned nzt, nnz;
1127                             unsigned cb_idx;
1128                             uint32_t bits;
1129                             int j;
1130
1131                             UPDATE_CACHE(re, gb);
1132                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1133
1134                             if (!code) {
1135                                 *icf++ = 0;
1136                                 *icf++ = 0;
1137                                 continue;
1138                             }
1139
1140                             cb_idx = cb_vector_idx[code];
1141                             nnz = cb_idx >> 12;
1142                             nzt = cb_idx >> 8;
1143                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1144                             LAST_SKIP_BITS(re, gb, nnz);
1145
1146                             for (j = 0; j < 2; j++) {
1147                                 if (nzt & 1<<j) {
1148                                     uint32_t b;
1149                                     int n;
1150                                     /* The total length of escape_sequence must be < 22 bits according
1151                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1152                                     UPDATE_CACHE(re, gb);
1153                                     b = GET_CACHE(re, gb);
1154                                     b = 31 - av_log2(~b);
1155
1156                                     if (b > 8) {
1157                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1158                                         return -1;
1159                                     }
1160
1161 #if MIN_CACHE_BITS < 21
1162                                     LAST_SKIP_BITS(re, gb, b + 1);
1163                                     UPDATE_CACHE(re, gb);
1164 #else
1165                                     SKIP_BITS(re, gb, b + 1);
1166 #endif
1167                                     b += 4;
1168                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1169                                     LAST_SKIP_BITS(re, gb, b);
1170                                     *icf++ = cbrt_tab[n] | (bits & 1<<31);
1171                                     bits <<= 1;
1172                                 } else {
1173                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1174                                     *icf++ = (bits & 1<<31) | v;
1175                                     bits <<= !!v;
1176                                 }
1177                                 cb_idx >>= 4;
1178                             }
1179                         } while (len -= 2);
1180
1181                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1182                     }
1183                 }
1184
1185                 CLOSE_READER(re, gb);
1186             }
1187         }
1188         coef += g_len << 7;
1189     }
1190
1191     if (pulse_present) {
1192         idx = 0;
1193         for (i = 0; i < pulse->num_pulse; i++) {
1194             float co = coef_base[ pulse->pos[i] ];
1195             while (offsets[idx + 1] <= pulse->pos[i])
1196                 idx++;
1197             if (band_type[idx] != NOISE_BT && sf[idx]) {
1198                 float ico = -pulse->amp[i];
1199                 if (co) {
1200                     co /= sf[idx];
1201                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1202                 }
1203                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1204             }
1205         }
1206     }
1207     return 0;
1208 }
1209
1210 static av_always_inline float flt16_round(float pf)
1211 {
1212     union float754 tmp;
1213     tmp.f = pf;
1214     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1215     return tmp.f;
1216 }
1217
1218 static av_always_inline float flt16_even(float pf)
1219 {
1220     union float754 tmp;
1221     tmp.f = pf;
1222     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1223     return tmp.f;
1224 }
1225
1226 static av_always_inline float flt16_trunc(float pf)
1227 {
1228     union float754 pun;
1229     pun.f = pf;
1230     pun.i &= 0xFFFF0000U;
1231     return pun.f;
1232 }
1233
1234 static av_always_inline void predict(PredictorState *ps, float *coef,
1235                                      float sf_scale, float inv_sf_scale,
1236                     int output_enable)
1237 {
1238     const float a     = 0.953125; // 61.0 / 64
1239     const float alpha = 0.90625;  // 29.0 / 32
1240     float e0, e1;
1241     float pv;
1242     float k1, k2;
1243     float   r0 = ps->r0,     r1 = ps->r1;
1244     float cor0 = ps->cor0, cor1 = ps->cor1;
1245     float var0 = ps->var0, var1 = ps->var1;
1246
1247     k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1248     k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1249
1250     pv = flt16_round(k1 * r0 + k2 * r1);
1251     if (output_enable)
1252         *coef += pv * sf_scale;
1253
1254     e0 = *coef * inv_sf_scale;
1255     e1 = e0 - k1 * r0;
1256
1257     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1258     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1259     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1260     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1261
1262     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1263     ps->r0 = flt16_trunc(a * e0);
1264 }
1265
1266 /**
1267  * Apply AAC-Main style frequency domain prediction.
1268  */
1269 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1270 {
1271     int sfb, k;
1272     float sf_scale = ac->sf_scale, inv_sf_scale = 1 / ac->sf_scale;
1273
1274     if (!sce->ics.predictor_initialized) {
1275         reset_all_predictors(sce->predictor_state);
1276         sce->ics.predictor_initialized = 1;
1277     }
1278
1279     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1280         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1281             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1282                 predict(&sce->predictor_state[k], &sce->coeffs[k],
1283                         sf_scale, inv_sf_scale,
1284                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1285             }
1286         }
1287         if (sce->ics.predictor_reset_group)
1288             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1289     } else
1290         reset_all_predictors(sce->predictor_state);
1291 }
1292
1293 /**
1294  * Decode an individual_channel_stream payload; reference: table 4.44.
1295  *
1296  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1297  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1298  *
1299  * @return  Returns error status. 0 - OK, !0 - error
1300  */
1301 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1302                       GetBitContext *gb, int common_window, int scale_flag)
1303 {
1304     Pulse pulse;
1305     TemporalNoiseShaping    *tns = &sce->tns;
1306     IndividualChannelStream *ics = &sce->ics;
1307     float *out = sce->coeffs;
1308     int global_gain, pulse_present = 0;
1309
1310     /* This assignment is to silence a GCC warning about the variable being used
1311      * uninitialized when in fact it always is.
1312      */
1313     pulse.num_pulse = 0;
1314
1315     global_gain = get_bits(gb, 8);
1316
1317     if (!common_window && !scale_flag) {
1318         if (decode_ics_info(ac, ics, gb, 0) < 0)
1319             return -1;
1320     }
1321
1322     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1323         return -1;
1324     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1325         return -1;
1326
1327     pulse_present = 0;
1328     if (!scale_flag) {
1329         if ((pulse_present = get_bits1(gb))) {
1330             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1331                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1332                 return -1;
1333             }
1334             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1335                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1336                 return -1;
1337             }
1338         }
1339         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1340             return -1;
1341         if (get_bits1(gb)) {
1342             av_log_missing_feature(ac->avctx, "SSR", 1);
1343             return -1;
1344         }
1345     }
1346
1347     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1348         return -1;
1349
1350     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1351         apply_prediction(ac, sce);
1352
1353     return 0;
1354 }
1355
1356 /**
1357  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1358  */
1359 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1360 {
1361     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1362     float *ch0 = cpe->ch[0].coeffs;
1363     float *ch1 = cpe->ch[1].coeffs;
1364     int g, i, group, idx = 0;
1365     const uint16_t *offsets = ics->swb_offset;
1366     for (g = 0; g < ics->num_window_groups; g++) {
1367         for (i = 0; i < ics->max_sfb; i++, idx++) {
1368             if (cpe->ms_mask[idx] &&
1369                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1370                 for (group = 0; group < ics->group_len[g]; group++) {
1371                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1372                                               ch1 + group * 128 + offsets[i],
1373                                               offsets[i+1] - offsets[i]);
1374                 }
1375             }
1376         }
1377         ch0 += ics->group_len[g] * 128;
1378         ch1 += ics->group_len[g] * 128;
1379     }
1380 }
1381
1382 /**
1383  * intensity stereo decoding; reference: 4.6.8.2.3
1384  *
1385  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1386  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1387  *                      [3] reserved for scalable AAC
1388  */
1389 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1390 {
1391     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1392     SingleChannelElement         *sce1 = &cpe->ch[1];
1393     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1394     const uint16_t *offsets = ics->swb_offset;
1395     int g, group, i, k, idx = 0;
1396     int c;
1397     float scale;
1398     for (g = 0; g < ics->num_window_groups; g++) {
1399         for (i = 0; i < ics->max_sfb;) {
1400             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1401                 const int bt_run_end = sce1->band_type_run_end[idx];
1402                 for (; i < bt_run_end; i++, idx++) {
1403                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1404                     if (ms_present)
1405                         c *= 1 - 2 * cpe->ms_mask[idx];
1406                     scale = c * sce1->sf[idx];
1407                     for (group = 0; group < ics->group_len[g]; group++)
1408                         for (k = offsets[i]; k < offsets[i + 1]; k++)
1409                             coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1410                 }
1411             } else {
1412                 int bt_run_end = sce1->band_type_run_end[idx];
1413                 idx += bt_run_end - i;
1414                 i    = bt_run_end;
1415             }
1416         }
1417         coef0 += ics->group_len[g] * 128;
1418         coef1 += ics->group_len[g] * 128;
1419     }
1420 }
1421
1422 /**
1423  * Decode a channel_pair_element; reference: table 4.4.
1424  *
1425  * @return  Returns error status. 0 - OK, !0 - error
1426  */
1427 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1428 {
1429     int i, ret, common_window, ms_present = 0;
1430
1431     common_window = get_bits1(gb);
1432     if (common_window) {
1433         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1434             return -1;
1435         i = cpe->ch[1].ics.use_kb_window[0];
1436         cpe->ch[1].ics = cpe->ch[0].ics;
1437         cpe->ch[1].ics.use_kb_window[1] = i;
1438         ms_present = get_bits(gb, 2);
1439         if (ms_present == 3) {
1440             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1441             return -1;
1442         } else if (ms_present)
1443             decode_mid_side_stereo(cpe, gb, ms_present);
1444     }
1445     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1446         return ret;
1447     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1448         return ret;
1449
1450     if (common_window) {
1451         if (ms_present)
1452             apply_mid_side_stereo(ac, cpe);
1453         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1454             apply_prediction(ac, &cpe->ch[0]);
1455             apply_prediction(ac, &cpe->ch[1]);
1456         }
1457     }
1458
1459     apply_intensity_stereo(cpe, ms_present);
1460     return 0;
1461 }
1462
1463 static const float cce_scale[] = {
1464     1.09050773266525765921, //2^(1/8)
1465     1.18920711500272106672, //2^(1/4)
1466     M_SQRT2,
1467     2,
1468 };
1469
1470 /**
1471  * Decode coupling_channel_element; reference: table 4.8.
1472  *
1473  * @return  Returns error status. 0 - OK, !0 - error
1474  */
1475 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1476 {
1477     int num_gain = 0;
1478     int c, g, sfb, ret;
1479     int sign;
1480     float scale;
1481     SingleChannelElement *sce = &che->ch[0];
1482     ChannelCoupling     *coup = &che->coup;
1483
1484     coup->coupling_point = 2 * get_bits1(gb);
1485     coup->num_coupled = get_bits(gb, 3);
1486     for (c = 0; c <= coup->num_coupled; c++) {
1487         num_gain++;
1488         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1489         coup->id_select[c] = get_bits(gb, 4);
1490         if (coup->type[c] == TYPE_CPE) {
1491             coup->ch_select[c] = get_bits(gb, 2);
1492             if (coup->ch_select[c] == 3)
1493                 num_gain++;
1494         } else
1495             coup->ch_select[c] = 2;
1496     }
1497     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1498
1499     sign  = get_bits(gb, 1);
1500     scale = cce_scale[get_bits(gb, 2)];
1501
1502     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1503         return ret;
1504
1505     for (c = 0; c < num_gain; c++) {
1506         int idx  = 0;
1507         int cge  = 1;
1508         int gain = 0;
1509         float gain_cache = 1.;
1510         if (c) {
1511             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1512             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1513             gain_cache = powf(scale, -gain);
1514         }
1515         if (coup->coupling_point == AFTER_IMDCT) {
1516             coup->gain[c][0] = gain_cache;
1517         } else {
1518             for (g = 0; g < sce->ics.num_window_groups; g++) {
1519                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1520                     if (sce->band_type[idx] != ZERO_BT) {
1521                         if (!cge) {
1522                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1523                             if (t) {
1524                                 int s = 1;
1525                                 t = gain += t;
1526                                 if (sign) {
1527                                     s  -= 2 * (t & 0x1);
1528                                     t >>= 1;
1529                                 }
1530                                 gain_cache = powf(scale, -t) * s;
1531                             }
1532                         }
1533                         coup->gain[c][idx] = gain_cache;
1534                     }
1535                 }
1536             }
1537         }
1538     }
1539     return 0;
1540 }
1541
1542 /**
1543  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1544  *
1545  * @return  Returns number of bytes consumed.
1546  */
1547 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1548                                          GetBitContext *gb)
1549 {
1550     int i;
1551     int num_excl_chan = 0;
1552
1553     do {
1554         for (i = 0; i < 7; i++)
1555             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1556     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1557
1558     return num_excl_chan / 7;
1559 }
1560
1561 /**
1562  * Decode dynamic range information; reference: table 4.52.
1563  *
1564  * @param   cnt length of TYPE_FIL syntactic element in bytes
1565  *
1566  * @return  Returns number of bytes consumed.
1567  */
1568 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1569                                 GetBitContext *gb, int cnt)
1570 {
1571     int n             = 1;
1572     int drc_num_bands = 1;
1573     int i;
1574
1575     /* pce_tag_present? */
1576     if (get_bits1(gb)) {
1577         che_drc->pce_instance_tag  = get_bits(gb, 4);
1578         skip_bits(gb, 4); // tag_reserved_bits
1579         n++;
1580     }
1581
1582     /* excluded_chns_present? */
1583     if (get_bits1(gb)) {
1584         n += decode_drc_channel_exclusions(che_drc, gb);
1585     }
1586
1587     /* drc_bands_present? */
1588     if (get_bits1(gb)) {
1589         che_drc->band_incr            = get_bits(gb, 4);
1590         che_drc->interpolation_scheme = get_bits(gb, 4);
1591         n++;
1592         drc_num_bands += che_drc->band_incr;
1593         for (i = 0; i < drc_num_bands; i++) {
1594             che_drc->band_top[i] = get_bits(gb, 8);
1595             n++;
1596         }
1597     }
1598
1599     /* prog_ref_level_present? */
1600     if (get_bits1(gb)) {
1601         che_drc->prog_ref_level = get_bits(gb, 7);
1602         skip_bits1(gb); // prog_ref_level_reserved_bits
1603         n++;
1604     }
1605
1606     for (i = 0; i < drc_num_bands; i++) {
1607         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1608         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1609         n++;
1610     }
1611
1612     return n;
1613 }
1614
1615 /**
1616  * Decode extension data (incomplete); reference: table 4.51.
1617  *
1618  * @param   cnt length of TYPE_FIL syntactic element in bytes
1619  *
1620  * @return Returns number of bytes consumed
1621  */
1622 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1623                                     ChannelElement *che, enum RawDataBlockType elem_type)
1624 {
1625     int crc_flag = 0;
1626     int res = cnt;
1627     switch (get_bits(gb, 4)) { // extension type
1628     case EXT_SBR_DATA_CRC:
1629         crc_flag++;
1630     case EXT_SBR_DATA:
1631         if (!che) {
1632             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1633             return res;
1634         } else if (!ac->m4ac.sbr) {
1635             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1636             skip_bits_long(gb, 8 * cnt - 4);
1637             return res;
1638         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1639             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1640             skip_bits_long(gb, 8 * cnt - 4);
1641             return res;
1642         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1643             ac->m4ac.sbr = 1;
1644             ac->m4ac.ps = 1;
1645             output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1646         } else {
1647             ac->m4ac.sbr = 1;
1648         }
1649         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1650         break;
1651     case EXT_DYNAMIC_RANGE:
1652         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1653         break;
1654     case EXT_FILL:
1655     case EXT_FILL_DATA:
1656     case EXT_DATA_ELEMENT:
1657     default:
1658         skip_bits_long(gb, 8 * cnt - 4);
1659         break;
1660     };
1661     return res;
1662 }
1663
1664 /**
1665  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1666  *
1667  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1668  * @param   coef    spectral coefficients
1669  */
1670 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1671                       IndividualChannelStream *ics, int decode)
1672 {
1673     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1674     int w, filt, m, i;
1675     int bottom, top, order, start, end, size, inc;
1676     float lpc[TNS_MAX_ORDER];
1677
1678     for (w = 0; w < ics->num_windows; w++) {
1679         bottom = ics->num_swb;
1680         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1681             top    = bottom;
1682             bottom = FFMAX(0, top - tns->length[w][filt]);
1683             order  = tns->order[w][filt];
1684             if (order == 0)
1685                 continue;
1686
1687             // tns_decode_coef
1688             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1689
1690             start = ics->swb_offset[FFMIN(bottom, mmm)];
1691             end   = ics->swb_offset[FFMIN(   top, mmm)];
1692             if ((size = end - start) <= 0)
1693                 continue;
1694             if (tns->direction[w][filt]) {
1695                 inc = -1;
1696                 start = end - 1;
1697             } else {
1698                 inc = 1;
1699             }
1700             start += w * 128;
1701
1702             // ar filter
1703             for (m = 0; m < size; m++, start += inc)
1704                 for (i = 1; i <= FFMIN(m, order); i++)
1705                     coef[start] -= coef[start - i * inc] * lpc[i - 1];
1706         }
1707     }
1708 }
1709
1710 /**
1711  * Conduct IMDCT and windowing.
1712  */
1713 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1714 {
1715     IndividualChannelStream *ics = &sce->ics;
1716     float *in    = sce->coeffs;
1717     float *out   = sce->ret;
1718     float *saved = sce->saved;
1719     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1720     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1721     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1722     float *buf  = ac->buf_mdct;
1723     float *temp = ac->temp;
1724     int i;
1725
1726     // imdct
1727     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1728         for (i = 0; i < 1024; i += 128)
1729             ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1730     } else
1731         ff_imdct_half(&ac->mdct, buf, in);
1732
1733     /* window overlapping
1734      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1735      * and long to short transitions are considered to be short to short
1736      * transitions. This leaves just two cases (long to long and short to short)
1737      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1738      */
1739     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1740             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1741         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
1742     } else {
1743         for (i = 0; i < 448; i++)
1744             out[i] = saved[i] + bias;
1745
1746         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1747             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
1748             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
1749             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
1750             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
1751             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
1752             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1753         } else {
1754             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
1755             for (i = 576; i < 1024; i++)
1756                 out[i] = buf[i-512] + bias;
1757         }
1758     }
1759
1760     // buffer update
1761     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1762         for (i = 0; i < 64; i++)
1763             saved[i] = temp[64 + i] - bias;
1764         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1765         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1766         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1767         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1768     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1769         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1770         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1771     } else { // LONG_STOP or ONLY_LONG
1772         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1773     }
1774 }
1775
1776 /**
1777  * Apply dependent channel coupling (applied before IMDCT).
1778  *
1779  * @param   index   index into coupling gain array
1780  */
1781 static void apply_dependent_coupling(AACContext *ac,
1782                                      SingleChannelElement *target,
1783                                      ChannelElement *cce, int index)
1784 {
1785     IndividualChannelStream *ics = &cce->ch[0].ics;
1786     const uint16_t *offsets = ics->swb_offset;
1787     float *dest = target->coeffs;
1788     const float *src = cce->ch[0].coeffs;
1789     int g, i, group, k, idx = 0;
1790     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1791         av_log(ac->avctx, AV_LOG_ERROR,
1792                "Dependent coupling is not supported together with LTP\n");
1793         return;
1794     }
1795     for (g = 0; g < ics->num_window_groups; g++) {
1796         for (i = 0; i < ics->max_sfb; i++, idx++) {
1797             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1798                 const float gain = cce->coup.gain[index][idx];
1799                 for (group = 0; group < ics->group_len[g]; group++) {
1800                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1801                         // XXX dsputil-ize
1802                         dest[group * 128 + k] += gain * src[group * 128 + k];
1803                     }
1804                 }
1805             }
1806         }
1807         dest += ics->group_len[g] * 128;
1808         src  += ics->group_len[g] * 128;
1809     }
1810 }
1811
1812 /**
1813  * Apply independent channel coupling (applied after IMDCT).
1814  *
1815  * @param   index   index into coupling gain array
1816  */
1817 static void apply_independent_coupling(AACContext *ac,
1818                                        SingleChannelElement *target,
1819                                        ChannelElement *cce, int index)
1820 {
1821     int i;
1822     const float gain = cce->coup.gain[index][0];
1823     const float bias = ac->add_bias;
1824     const float *src = cce->ch[0].ret;
1825     float *dest = target->ret;
1826     const int len = 1024 << (ac->m4ac.sbr == 1);
1827
1828     for (i = 0; i < len; i++)
1829         dest[i] += gain * (src[i] - bias);
1830 }
1831
1832 /**
1833  * channel coupling transformation interface
1834  *
1835  * @param   apply_coupling_method   pointer to (in)dependent coupling function
1836  */
1837 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1838                                    enum RawDataBlockType type, int elem_id,
1839                                    enum CouplingPoint coupling_point,
1840                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1841 {
1842     int i, c;
1843
1844     for (i = 0; i < MAX_ELEM_ID; i++) {
1845         ChannelElement *cce = ac->che[TYPE_CCE][i];
1846         int index = 0;
1847
1848         if (cce && cce->coup.coupling_point == coupling_point) {
1849             ChannelCoupling *coup = &cce->coup;
1850
1851             for (c = 0; c <= coup->num_coupled; c++) {
1852                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1853                     if (coup->ch_select[c] != 1) {
1854                         apply_coupling_method(ac, &cc->ch[0], cce, index);
1855                         if (coup->ch_select[c] != 0)
1856                             index++;
1857                     }
1858                     if (coup->ch_select[c] != 2)
1859                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
1860                 } else
1861                     index += 1 + (coup->ch_select[c] == 3);
1862             }
1863         }
1864     }
1865 }
1866
1867 /**
1868  * Convert spectral data to float samples, applying all supported tools as appropriate.
1869  */
1870 static void spectral_to_sample(AACContext *ac)
1871 {
1872     int i, type;
1873     float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1874     for (type = 3; type >= 0; type--) {
1875         for (i = 0; i < MAX_ELEM_ID; i++) {
1876             ChannelElement *che = ac->che[type][i];
1877             if (che) {
1878                 if (type <= TYPE_CPE)
1879                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1880                 if (che->ch[0].tns.present)
1881                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1882                 if (che->ch[1].tns.present)
1883                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1884                 if (type <= TYPE_CPE)
1885                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1886                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1887                     imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1888                     if (type == TYPE_CPE) {
1889                         imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1890                     }
1891                     if (ac->m4ac.sbr > 0) {
1892                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1893                     }
1894                 }
1895                 if (type <= TYPE_CCE)
1896                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1897             }
1898         }
1899     }
1900 }
1901
1902 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1903 {
1904     int size;
1905     AACADTSHeaderInfo hdr_info;
1906
1907     size = ff_aac_parse_header(gb, &hdr_info);
1908     if (size > 0) {
1909         if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1910             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1911             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1912             ac->m4ac.chan_config = hdr_info.chan_config;
1913             if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1914                 return -7;
1915             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1916                 return -7;
1917         } else if (ac->output_configured != OC_LOCKED) {
1918             ac->output_configured = OC_NONE;
1919         }
1920         if (ac->output_configured != OC_LOCKED) {
1921             ac->m4ac.sbr = -1;
1922             ac->m4ac.ps  = -1;
1923         }
1924         ac->m4ac.sample_rate     = hdr_info.sample_rate;
1925         ac->m4ac.sampling_index  = hdr_info.sampling_index;
1926         ac->m4ac.object_type     = hdr_info.object_type;
1927         if (!ac->avctx->sample_rate)
1928             ac->avctx->sample_rate = hdr_info.sample_rate;
1929         if (hdr_info.num_aac_frames == 1) {
1930             if (!hdr_info.crc_absent)
1931                 skip_bits(gb, 16);
1932         } else {
1933             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1934             return -1;
1935         }
1936     }
1937     return size;
1938 }
1939
1940 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1941                             int *data_size, AVPacket *avpkt)
1942 {
1943     const uint8_t *buf = avpkt->data;
1944     int buf_size = avpkt->size;
1945     AACContext *ac = avctx->priv_data;
1946     ChannelElement *che = NULL, *che_prev = NULL;
1947     GetBitContext gb;
1948     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1949     int err, elem_id, data_size_tmp;
1950     int buf_consumed;
1951     int samples = 0, multiplier;
1952     int buf_offset;
1953
1954     init_get_bits(&gb, buf, buf_size * 8);
1955
1956     if (show_bits(&gb, 12) == 0xfff) {
1957         if (parse_adts_frame_header(ac, &gb) < 0) {
1958             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1959             return -1;
1960         }
1961         if (ac->m4ac.sampling_index > 12) {
1962             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1963             return -1;
1964         }
1965     }
1966
1967     memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1968     // parse
1969     while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1970         elem_id = get_bits(&gb, 4);
1971
1972         if (elem_type < TYPE_DSE) {
1973             if (!(che=get_che(ac, elem_type, elem_id))) {
1974                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
1975                        elem_type, elem_id);
1976                 return -1;
1977             }
1978             samples = 1024;
1979         }
1980
1981         switch (elem_type) {
1982
1983         case TYPE_SCE:
1984             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1985             break;
1986
1987         case TYPE_CPE:
1988             err = decode_cpe(ac, &gb, che);
1989             break;
1990
1991         case TYPE_CCE:
1992             err = decode_cce(ac, &gb, che);
1993             break;
1994
1995         case TYPE_LFE:
1996             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1997             break;
1998
1999         case TYPE_DSE:
2000             err = skip_data_stream_element(ac, &gb);
2001             break;
2002
2003         case TYPE_PCE: {
2004             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2005             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2006             if ((err = decode_pce(ac, new_che_pos, &gb)))
2007                 break;
2008             if (ac->output_configured > OC_TRIAL_PCE)
2009                 av_log(avctx, AV_LOG_ERROR,
2010                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2011             else
2012                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2013             break;
2014         }
2015
2016         case TYPE_FIL:
2017             if (elem_id == 15)
2018                 elem_id += get_bits(&gb, 8) - 1;
2019             if (get_bits_left(&gb) < 8 * elem_id) {
2020                     av_log(avctx, AV_LOG_ERROR, overread_err);
2021                     return -1;
2022             }
2023             while (elem_id > 0)
2024                 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2025             err = 0; /* FIXME */
2026             break;
2027
2028         default:
2029             err = -1; /* should not happen, but keeps compiler happy */
2030             break;
2031         }
2032
2033         che_prev       = che;
2034         elem_type_prev = elem_type;
2035
2036         if (err)
2037             return err;
2038
2039         if (get_bits_left(&gb) < 3) {
2040             av_log(avctx, AV_LOG_ERROR, overread_err);
2041             return -1;
2042         }
2043     }
2044
2045     spectral_to_sample(ac);
2046
2047     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2048     samples <<= multiplier;
2049     if (ac->output_configured < OC_LOCKED) {
2050         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2051         avctx->frame_size = samples;
2052     }
2053
2054     data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2055     if (*data_size < data_size_tmp) {
2056         av_log(avctx, AV_LOG_ERROR,
2057                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2058                *data_size, data_size_tmp);
2059         return -1;
2060     }
2061     *data_size = data_size_tmp;
2062
2063     if (samples)
2064         ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2065
2066     if (ac->output_configured)
2067         ac->output_configured = OC_LOCKED;
2068
2069     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2070     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2071         if (buf[buf_offset])
2072             break;
2073
2074     return buf_size > buf_offset ? buf_consumed : buf_size;
2075 }
2076
2077 static av_cold int aac_decode_close(AVCodecContext *avctx)
2078 {
2079     AACContext *ac = avctx->priv_data;
2080     int i, type;
2081
2082     for (i = 0; i < MAX_ELEM_ID; i++) {
2083         for (type = 0; type < 4; type++) {
2084             if (ac->che[type][i])
2085                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2086             av_freep(&ac->che[type][i]);
2087         }
2088     }
2089
2090     ff_mdct_end(&ac->mdct);
2091     ff_mdct_end(&ac->mdct_small);
2092     return 0;
2093 }
2094
2095 AVCodec aac_decoder = {
2096     "aac",
2097     AVMEDIA_TYPE_AUDIO,
2098     CODEC_ID_AAC,
2099     sizeof(AACContext),
2100     aac_decode_init,
2101     NULL,
2102     aac_decode_close,
2103     aac_decode_frame,
2104     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2105     .sample_fmts = (const enum SampleFormat[]) {
2106         SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2107     },
2108     .channel_layouts = aac_channel_layout,
2109 };