git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * AAC decoder
  26  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  28  */
  29
  30 /*
  31  * supported tools
  32  *
  33  * Support?             Name
  34  * N (code in SoC repo) gain control
  35  * Y                    block switching
  36  * Y                    window shapes - standard
  37  * N                    window shapes - Low Delay
  38  * Y                    filterbank - standard
  39  * N (code in SoC repo) filterbank - Scalable Sample Rate
  40  * Y                    Temporal Noise Shaping
  41  * N (code in SoC repo) Long Term Prediction
  42  * Y                    intensity stereo
  43  * Y                    channel coupling
  44  * Y                    frequency domain prediction
  45  * Y                    Perceptual Noise Substitution
  46  * Y                    Mid/Side stereo
  47  * N                    Scalable Inverse AAC Quantization
  48  * N                    Frequency Selective Switch
  49  * N                    upsampling filter
  50  * Y                    quantization & coding - AAC
  51  * N                    quantization & coding - TwinVQ
  52  * N                    quantization & coding - BSAC
  53  * N                    AAC Error Resilience tools
  54  * N                    Error Resilience payload syntax
  55  * N                    Error Protection tool
  56  * N                    CELP
  57  * N                    Silence Compression
  58  * N                    HVXC
  59  * N                    HVXC 4kbits/s VR
  60  * N                    Structured Audio tools
  61  * N                    Structured Audio Sample Bank Format
  62  * N                    MIDI
  63  * N                    Harmonic and Individual Lines plus Noise
  64  * N                    Text-To-Speech Interface
  65  * Y                    Spectral Band Replication
  66  * Y (not in this code) Layer-1
  67  * Y (not in this code) Layer-2
  68  * Y (not in this code) Layer-3
  69  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  70  * Y                    Parametric Stereo
  71  * N                    Direct Stream Transfer
  72  *
  73  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  74  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  75            Parametric Stereo.
  76  */
  77
  78
  79 #include "avcodec.h"
  80 #include "internal.h"
  81 #include "get_bits.h"
  82 #include "dsputil.h"
  83 #include "fft.h"
  84 #include "lpc.h"
  85
  86 #include "aac.h"
  87 #include "aactab.h"
  88 #include "aacdectab.h"
  89 #include "cbrt_tablegen.h"
  90 #include "sbr.h"
  91 #include "aacsbr.h"
  92 #include "mpeg4audio.h"
  93 #include "aacadtsdec.h"
  94
  95 #include <assert.h>
  96 #include <errno.h>
  97 #include <math.h>
  98 #include <string.h>
  99
 100 #if ARCH_ARM
 101 #   include "arm/aac.h"
 102 #endif
 103
 104 union float754 {
 105     float f;
 106     uint32_t i;
 107 };
 108
 109 static VLC vlc_scalefactors;
 110 static VLC vlc_spectral[11];
 111
 112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 113
 114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 115 {
 116     /* Some buggy encoders appear to set all elem_ids to zero and rely on
 117     channels always occurring in the same order. This is expressly forbidden
 118     by the spec but we will try to work around it.
 119     */
 120     int err_printed = 0;
 121     while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
 122         if (ac->output_configured < OC_LOCKED && !err_printed) {
 123             av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
 124             err_printed = 1;
 125         }
 126         elem_id++;
 127     }
 128     if (elem_id == MAX_ELEM_ID)
 129         return NULL;
 130     ac->tags_seen_this_frame[type][elem_id] = 1;
 131
 132     if (ac->tag_che_map[type][elem_id]) {
 133         return ac->tag_che_map[type][elem_id];
 134     }
 135     if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
 136         return NULL;
 137     }
 138     switch (ac->m4ac.chan_config) {
 139     case 7:
 140         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 141             ac->tags_mapped++;
 142             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 143         }
 144     case 6:
 145         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 146            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 147            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 148         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 151         }
 152     case 5:
 153         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 154             ac->tags_mapped++;
 155             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 156         }
 157     case 4:
 158         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 159             ac->tags_mapped++;
 160             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 161         }
 162     case 3:
 163     case 2:
 164         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 165             ac->tags_mapped++;
 166             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 167         } else if (ac->m4ac.chan_config == 2) {
 168             return NULL;
 169         }
 170     case 1:
 171         if (!ac->tags_mapped && type == TYPE_SCE) {
 172             ac->tags_mapped++;
 173             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 174         }
 175     default:
 176         return NULL;
 177     }
 178 }
 179
 180 /**
 181  * Check for the channel element in the current channel position configuration.
 182  * If it exists, make sure the appropriate element is allocated and map the
 183  * channel order to match the internal FFmpeg channel layout.
 184  *
 185  * @param   che_pos current channel position configuration
 186  * @param   type channel element type
 187  * @param   id channel element id
 188  * @param   channels count of the number of channels in the configuration
 189  *
 190  * @return  Returns error status. 0 - OK, !0 - error
 191  */
 192 static av_cold int che_configure(AACContext *ac,
 193                          enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 194                          int type, int id,
 195                          int *channels)
 196 {
 197     if (che_pos[type][id]) {
 198         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 199             return AVERROR(ENOMEM);
 200         ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
 201         if (type != TYPE_CCE) {
 202             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 203             if (type == TYPE_CPE ||
 204                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 205                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 206             }
 207         }
 208     } else {
 209         if (ac->che[type][id])
 210             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 211         av_freep(&ac->che[type][id]);
 212     }
 213     return 0;
 214 }
 215
 216 /**
 217  * Configure output channel order based on the current program configuration element.
 218  *
 219  * @param   che_pos current channel position configuration
 220  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 221  *
 222  * @return  Returns error status. 0 - OK, !0 - error
 223  */
 224 static av_cold int output_configure(AACContext *ac,
 225                             enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 226                             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 227                             int channel_config, enum OCStatus oc_type)
 228 {
 229     AVCodecContext *avctx = ac->avctx;
 230     int i, type, channels = 0, ret;
 231
 232     if (new_che_pos != che_pos)
 233     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 234
 235     if (channel_config) {
 236         for (i = 0; i < tags_per_config[channel_config]; i++) {
 237             if ((ret = che_configure(ac, che_pos,
 238                                      aac_channel_layout_map[channel_config - 1][i][0],
 239                                      aac_channel_layout_map[channel_config - 1][i][1],
 240                                      &channels)))
 241                 return ret;
 242         }
 243
 244         memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 245         ac->tags_mapped = 0;
 246
 247         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 248     } else {
 249         /* Allocate or free elements depending on if they are in the
 250          * current program configuration.
 251          *
 252          * Set up default 1:1 output mapping.
 253          *
 254          * For a 5.1 stream the output order will be:
 255          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 256          */
 257
 258         for (i = 0; i < MAX_ELEM_ID; i++) {
 259             for (type = 0; type < 4; type++) {
 260                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 261                     return ret;
 262             }
 263         }
 264
 265         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 266         ac->tags_mapped = 4 * MAX_ELEM_ID;
 267
 268         avctx->channel_layout = 0;
 269     }
 270
 271     avctx->channels = channels;
 272
 273     ac->output_configured = oc_type;
 274
 275     return 0;
 276 }
 277
 278 /**
 279  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 280  *
 281  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 282  * @param sce_map mono (Single Channel Element) map
 283  * @param type speaker type/position for these channels
 284  */
 285 static void decode_channel_map(enum ChannelPosition *cpe_map,
 286                                enum ChannelPosition *sce_map,
 287                                enum ChannelPosition type,
 288                                GetBitContext *gb, int n)
 289 {
 290     while (n--) {
 291         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 292         map[get_bits(gb, 4)] = type;
 293     }
 294 }
 295
 296 /**
 297  * Decode program configuration element; reference: table 4.2.
 298  *
 299  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 300  *
 301  * @return  Returns error status. 0 - OK, !0 - error
 302  */
 303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 304                       GetBitContext *gb)
 305 {
 306     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 307     int comment_len;
 308
 309     skip_bits(gb, 2);  // object_type
 310
 311     sampling_index = get_bits(gb, 4);
 312     if (ac->m4ac.sampling_index != sampling_index)
 313         av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 314
 315     num_front       = get_bits(gb, 4);
 316     num_side        = get_bits(gb, 4);
 317     num_back        = get_bits(gb, 4);
 318     num_lfe         = get_bits(gb, 2);
 319     num_assoc_data  = get_bits(gb, 3);
 320     num_cc          = get_bits(gb, 4);
 321
 322     if (get_bits1(gb))
 323         skip_bits(gb, 4); // mono_mixdown_tag
 324     if (get_bits1(gb))
 325         skip_bits(gb, 4); // stereo_mixdown_tag
 326
 327     if (get_bits1(gb))
 328         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 329
 330     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 331     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 332     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 333     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 334
 335     skip_bits_long(gb, 4 * num_assoc_data);
 336
 337     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 338
 339     align_get_bits(gb);
 340
 341     /* comment field, first byte is length */
 342     comment_len = get_bits(gb, 8) * 8;
 343     if (get_bits_left(gb) < comment_len) {
 344         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 345         return -1;
 346     }
 347     skip_bits_long(gb, comment_len);
 348     return 0;
 349 }
 350
 351 /**
 352  * Set up channel positions based on a default channel configuration
 353  * as specified in table 1.17.
 354  *
 355  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 356  *
 357  * @return  Returns error status. 0 - OK, !0 - error
 358  */
 359 static av_cold int set_default_channel_config(AACContext *ac,
 360                                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 361                                       int channel_config)
 362 {
 363     if (channel_config < 1 || channel_config > 7) {
 364         av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 365                channel_config);
 366         return -1;
 367     }
 368
 369     /* default channel configurations:
 370      *
 371      * 1ch : front center (mono)
 372      * 2ch : L + R (stereo)
 373      * 3ch : front center + L + R
 374      * 4ch : front center + L + R + back center
 375      * 5ch : front center + L + R + back stereo
 376      * 6ch : front center + L + R + back stereo + LFE
 377      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 378      */
 379
 380     if (channel_config != 2)
 381         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 382     if (channel_config > 1)
 383         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 384     if (channel_config == 4)
 385         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 386     if (channel_config > 4)
 387         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 388         = AAC_CHANNEL_BACK;  // back stereo
 389     if (channel_config > 5)
 390         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 391     if (channel_config == 7)
 392         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 393
 394     return 0;
 395 }
 396
 397 /**
 398  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 399  *
 400  * @return  Returns error status. 0 - OK, !0 - error
 401  */
 402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
 403                                      int channel_config)
 404 {
 405     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 406     int extension_flag, ret;
 407
 408     if (get_bits1(gb)) { // frameLengthFlag
 409         av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
 410         return -1;
 411     }
 412
 413     if (get_bits1(gb))       // dependsOnCoreCoder
 414         skip_bits(gb, 14);   // coreCoderDelay
 415     extension_flag = get_bits1(gb);
 416
 417     if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
 418         ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
 419         skip_bits(gb, 3);     // layerNr
 420
 421     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 422     if (channel_config == 0) {
 423         skip_bits(gb, 4);  // element_instance_tag
 424         if ((ret = decode_pce(ac, new_che_pos, gb)))
 425             return ret;
 426     } else {
 427         if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
 428             return ret;
 429     }
 430     if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 431         return ret;
 432
 433     if (extension_flag) {
 434         switch (ac->m4ac.object_type) {
 435         case AOT_ER_BSAC:
 436             skip_bits(gb, 5);    // numOfSubFrame
 437             skip_bits(gb, 11);   // layer_length
 438             break;
 439         case AOT_ER_AAC_LC:
 440         case AOT_ER_AAC_LTP:
 441         case AOT_ER_AAC_SCALABLE:
 442         case AOT_ER_AAC_LD:
 443             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 444                                     * aacScalefactorDataResilienceFlag
 445                                     * aacSpectralDataResilienceFlag
 446                                     */
 447             break;
 448         }
 449         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 450     }
 451     return 0;
 452 }
 453
 454 /**
 455  * Decode audio specific configuration; reference: table 1.13.
 456  *
 457  * @param   data        pointer to AVCodecContext extradata
 458  * @param   data_size   size of AVCCodecContext extradata
 459  *
 460  * @return  Returns error status. 0 - OK, !0 - error
 461  */
 462 static int decode_audio_specific_config(AACContext *ac, void *data,
 463                                         int data_size)
 464 {
 465     GetBitContext gb;
 466     int i;
 467
 468     init_get_bits(&gb, data, data_size * 8);
 469
 470     if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
 471         return -1;
 472     if (ac->m4ac.sampling_index > 12) {
 473         av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
 474         return -1;
 475     }
 476     if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
 477         ac->m4ac.ps = 1;
 478
 479     skip_bits_long(&gb, i);
 480
 481     switch (ac->m4ac.object_type) {
 482     case AOT_AAC_MAIN:
 483     case AOT_AAC_LC:
 484         if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
 485             return -1;
 486         break;
 487     default:
 488         av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 489                ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
 490         return -1;
 491     }
 492     return 0;
 493 }
 494
 495 /**
 496  * linear congruential pseudorandom number generator
 497  *
 498  * @param   previous_val    pointer to the current state of the generator
 499  *
 500  * @return  Returns a 32-bit pseudorandom integer
 501  */
 502 static av_always_inline int lcg_random(int previous_val)
 503 {
 504     return previous_val * 1664525 + 1013904223;
 505 }
 506
 507 static av_always_inline void reset_predict_state(PredictorState *ps)
 508 {
 509     ps->r0   = 0.0f;
 510     ps->r1   = 0.0f;
 511     ps->cor0 = 0.0f;
 512     ps->cor1 = 0.0f;
 513     ps->var0 = 1.0f;
 514     ps->var1 = 1.0f;
 515 }
 516
 517 static void reset_all_predictors(PredictorState *ps)
 518 {
 519     int i;
 520     for (i = 0; i < MAX_PREDICTORS; i++)
 521         reset_predict_state(&ps[i]);
 522 }
 523
 524 static void reset_predictor_group(PredictorState *ps, int group_num)
 525 {
 526     int i;
 527     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 528         reset_predict_state(&ps[i]);
 529 }
 530
 531 #define AAC_INIT_VLC_STATIC(num, size) \
 532     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 533          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 534         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 535         size);
 536
 537 static av_cold int aac_decode_init(AVCodecContext *avctx)
 538 {
 539     AACContext *ac = avctx->priv_data;
 540
 541     ac->avctx = avctx;
 542     ac->m4ac.sample_rate = avctx->sample_rate;
 543
 544     if (avctx->extradata_size > 0) {
 545         if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
 546             return -1;
 547     }
 548
 549     avctx->sample_fmt = SAMPLE_FMT_S16;
 550
 551     AAC_INIT_VLC_STATIC( 0, 304);
 552     AAC_INIT_VLC_STATIC( 1, 270);
 553     AAC_INIT_VLC_STATIC( 2, 550);
 554     AAC_INIT_VLC_STATIC( 3, 300);
 555     AAC_INIT_VLC_STATIC( 4, 328);
 556     AAC_INIT_VLC_STATIC( 5, 294);
 557     AAC_INIT_VLC_STATIC( 6, 306);
 558     AAC_INIT_VLC_STATIC( 7, 268);
 559     AAC_INIT_VLC_STATIC( 8, 510);
 560     AAC_INIT_VLC_STATIC( 9, 366);
 561     AAC_INIT_VLC_STATIC(10, 462);
 562
 563     ff_aac_sbr_init();
 564
 565     dsputil_init(&ac->dsp, avctx);
 566
 567     ac->random_state = 0x1f2e3d4c;
 568
 569     // -1024 - Compensate wrong IMDCT method.
 570     // 32768 - Required to scale values to the correct range for the bias method
 571     //         for float to int16 conversion.
 572
 573     if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
 574         ac->add_bias  = 385.0f;
 575         ac->sf_scale  = 1. / (-1024. * 32768.);
 576         ac->sf_offset = 0;
 577     } else {
 578         ac->add_bias  = 0.0f;
 579         ac->sf_scale  = 1. / -1024.;
 580         ac->sf_offset = 60;
 581     }
 582
 583     ff_aac_tableinit();
 584
 585     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 586                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 587                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 588                     352);
 589
 590     ff_mdct_init(&ac->mdct, 11, 1, 1.0);
 591     ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
 592     // window initialization
 593     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 594     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 595     ff_init_ff_sine_windows(10);
 596     ff_init_ff_sine_windows( 7);
 597
 598     cbrt_tableinit();
 599
 600     return 0;
 601 }
 602
 603 /**
 604  * Skip data_stream_element; reference: table 4.10.
 605  */
 606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 607 {
 608     int byte_align = get_bits1(gb);
 609     int count = get_bits(gb, 8);
 610     if (count == 255)
 611         count += get_bits(gb, 8);
 612     if (byte_align)
 613         align_get_bits(gb);
 614
 615     if (get_bits_left(gb) < 8 * count) {
 616         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 617         return -1;
 618     }
 619     skip_bits_long(gb, 8 * count);
 620     return 0;
 621 }
 622
 623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 624                              GetBitContext *gb)
 625 {
 626     int sfb;
 627     if (get_bits1(gb)) {
 628         ics->predictor_reset_group = get_bits(gb, 5);
 629         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 630             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 631             return -1;
 632         }
 633     }
 634     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 635         ics->prediction_used[sfb] = get_bits1(gb);
 636     }
 637     return 0;
 638 }
 639
 640 /**
 641  * Decode Individual Channel Stream info; reference: table 4.6.
 642  *
 643  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 644  */
 645 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 646                            GetBitContext *gb, int common_window)
 647 {
 648     if (get_bits1(gb)) {
 649         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 650         memset(ics, 0, sizeof(IndividualChannelStream));
 651         return -1;
 652     }
 653     ics->window_sequence[1] = ics->window_sequence[0];
 654     ics->window_sequence[0] = get_bits(gb, 2);
 655     ics->use_kb_window[1]   = ics->use_kb_window[0];
 656     ics->use_kb_window[0]   = get_bits1(gb);
 657     ics->num_window_groups  = 1;
 658     ics->group_len[0]       = 1;
 659     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 660         int i;
 661         ics->max_sfb = get_bits(gb, 4);
 662         for (i = 0; i < 7; i++) {
 663             if (get_bits1(gb)) {
 664                 ics->group_len[ics->num_window_groups - 1]++;
 665             } else {
 666                 ics->num_window_groups++;
 667                 ics->group_len[ics->num_window_groups - 1] = 1;
 668             }
 669         }
 670         ics->num_windows       = 8;
 671         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 672         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 673         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 674         ics->predictor_present = 0;
 675     } else {
 676         ics->max_sfb               = get_bits(gb, 6);
 677         ics->num_windows           = 1;
 678         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 679         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 680         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 681         ics->predictor_present     = get_bits1(gb);
 682         ics->predictor_reset_group = 0;
 683         if (ics->predictor_present) {
 684             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 685                 if (decode_prediction(ac, ics, gb)) {
 686                     memset(ics, 0, sizeof(IndividualChannelStream));
 687                     return -1;
 688                 }
 689             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 690                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 691                 memset(ics, 0, sizeof(IndividualChannelStream));
 692                 return -1;
 693             } else {
 694                 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
 695                 memset(ics, 0, sizeof(IndividualChannelStream));
 696                 return -1;
 697             }
 698         }
 699     }
 700
 701     if (ics->max_sfb > ics->num_swb) {
 702         av_log(ac->avctx, AV_LOG_ERROR,
 703                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 704                ics->max_sfb, ics->num_swb);
 705         memset(ics, 0, sizeof(IndividualChannelStream));
 706         return -1;
 707     }
 708
 709     return 0;
 710 }
 711
 712 /**
 713  * Decode band types (section_data payload); reference: table 4.46.
 714  *
 715  * @param   band_type           array of the used band type
 716  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 717  *
 718  * @return  Returns error status. 0 - OK, !0 - error
 719  */
 720 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 721                              int band_type_run_end[120], GetBitContext *gb,
 722                              IndividualChannelStream *ics)
 723 {
 724     int g, idx = 0;
 725     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 726     for (g = 0; g < ics->num_window_groups; g++) {
 727         int k = 0;
 728         while (k < ics->max_sfb) {
 729             uint8_t sect_end = k;
 730             int sect_len_incr;
 731             int sect_band_type = get_bits(gb, 4);
 732             if (sect_band_type == 12) {
 733                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 734                 return -1;
 735             }
 736             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 737                 sect_end += sect_len_incr;
 738             sect_end += sect_len_incr;
 739             if (get_bits_left(gb) < 0) {
 740                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 741                 return -1;
 742             }
 743             if (sect_end > ics->max_sfb) {
 744                 av_log(ac->avctx, AV_LOG_ERROR,
 745                        "Number of bands (%d) exceeds limit (%d).\n",
 746                        sect_end, ics->max_sfb);
 747                 return -1;
 748             }
 749             for (; k < sect_end; k++) {
 750                 band_type        [idx]   = sect_band_type;
 751                 band_type_run_end[idx++] = sect_end;
 752             }
 753         }
 754     }
 755     return 0;
 756 }
 757
 758 /**
 759  * Decode scalefactors; reference: table 4.47.
 760  *
 761  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 762  * @param   band_type           array of the used band type
 763  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 764  * @param   sf                  array of scalefactors or intensity stereo positions
 765  *
 766  * @return  Returns error status. 0 - OK, !0 - error
 767  */
 768 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 769                                unsigned int global_gain,
 770                                IndividualChannelStream *ics,
 771                                enum BandType band_type[120],
 772                                int band_type_run_end[120])
 773 {
 774     const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
 775     int g, i, idx = 0;
 776     int offset[3] = { global_gain, global_gain - 90, 100 };
 777     int noise_flag = 1;
 778     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 779     for (g = 0; g < ics->num_window_groups; g++) {
 780         for (i = 0; i < ics->max_sfb;) {
 781             int run_end = band_type_run_end[idx];
 782             if (band_type[idx] == ZERO_BT) {
 783                 for (; i < run_end; i++, idx++)
 784                     sf[idx] = 0.;
 785             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 786                 for (; i < run_end; i++, idx++) {
 787                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 788                     if (offset[2] > 255U) {
 789                         av_log(ac->avctx, AV_LOG_ERROR,
 790                                "%s (%d) out of range.\n", sf_str[2], offset[2]);
 791                         return -1;
 792                     }
 793                     sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
 794                 }
 795             } else if (band_type[idx] == NOISE_BT) {
 796                 for (; i < run_end; i++, idx++) {
 797                     if (noise_flag-- > 0)
 798                         offset[1] += get_bits(gb, 9) - 256;
 799                     else
 800                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 801                     if (offset[1] > 255U) {
 802                         av_log(ac->avctx, AV_LOG_ERROR,
 803                                "%s (%d) out of range.\n", sf_str[1], offset[1]);
 804                         return -1;
 805                     }
 806                     sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
 807                 }
 808             } else {
 809                 for (; i < run_end; i++, idx++) {
 810                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 811                     if (offset[0] > 255U) {
 812                         av_log(ac->avctx, AV_LOG_ERROR,
 813                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 814                         return -1;
 815                     }
 816                     sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
 817                 }
 818             }
 819         }
 820     }
 821     return 0;
 822 }
 823
 824 /**
 825  * Decode pulse data; reference: table 4.7.
 826  */
 827 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 828                          const uint16_t *swb_offset, int num_swb)
 829 {
 830     int i, pulse_swb;
 831     pulse->num_pulse = get_bits(gb, 2) + 1;
 832     pulse_swb        = get_bits(gb, 6);
 833     if (pulse_swb >= num_swb)
 834         return -1;
 835     pulse->pos[0]    = swb_offset[pulse_swb];
 836     pulse->pos[0]   += get_bits(gb, 5);
 837     if (pulse->pos[0] > 1023)
 838         return -1;
 839     pulse->amp[0]    = get_bits(gb, 4);
 840     for (i = 1; i < pulse->num_pulse; i++) {
 841         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 842         if (pulse->pos[i] > 1023)
 843             return -1;
 844         pulse->amp[i] = get_bits(gb, 4);
 845     }
 846     return 0;
 847 }
 848
 849 /**
 850  * Decode Temporal Noise Shaping data; reference: table 4.48.
 851  *
 852  * @return  Returns error status. 0 - OK, !0 - error
 853  */
 854 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 855                       GetBitContext *gb, const IndividualChannelStream *ics)
 856 {
 857     int w, filt, i, coef_len, coef_res, coef_compress;
 858     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 859     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 860     for (w = 0; w < ics->num_windows; w++) {
 861         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 862             coef_res = get_bits1(gb);
 863
 864             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 865                 int tmp2_idx;
 866                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 867
 868                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 869                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 870                            tns->order[w][filt], tns_max_order);
 871                     tns->order[w][filt] = 0;
 872                     return -1;
 873                 }
 874                 if (tns->order[w][filt]) {
 875                     tns->direction[w][filt] = get_bits1(gb);
 876                     coef_compress = get_bits1(gb);
 877                     coef_len = coef_res + 3 - coef_compress;
 878                     tmp2_idx = 2 * coef_compress + coef_res;
 879
 880                     for (i = 0; i < tns->order[w][filt]; i++)
 881                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 882                 }
 883             }
 884         }
 885     }
 886     return 0;
 887 }
 888
 889 /**
 890  * Decode Mid/Side data; reference: table 4.54.
 891  *
 892  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 893  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 894  *                      [3] reserved for scalable AAC
 895  */
 896 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 897                                    int ms_present)
 898 {
 899     int idx;
 900     if (ms_present == 1) {
 901         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 902             cpe->ms_mask[idx] = get_bits1(gb);
 903     } else if (ms_present == 2) {
 904         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 905     }
 906 }
 907
 908 #ifndef VMUL2
 909 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 910                            const float *scale)
 911 {
 912     float s = *scale;
 913     *dst++ = v[idx    & 15] * s;
 914     *dst++ = v[idx>>4 & 15] * s;
 915     return dst;
 916 }
 917 #endif
 918
 919 #ifndef VMUL4
 920 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 921                            const float *scale)
 922 {
 923     float s = *scale;
 924     *dst++ = v[idx    & 3] * s;
 925     *dst++ = v[idx>>2 & 3] * s;
 926     *dst++ = v[idx>>4 & 3] * s;
 927     *dst++ = v[idx>>6 & 3] * s;
 928     return dst;
 929 }
 930 #endif
 931
 932 #ifndef VMUL2S
 933 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 934                             unsigned sign, const float *scale)
 935 {
 936     union float754 s0, s1;
 937
 938     s0.f = s1.f = *scale;
 939     s0.i ^= sign >> 1 << 31;
 940     s1.i ^= sign      << 31;
 941
 942     *dst++ = v[idx    & 15] * s0.f;
 943     *dst++ = v[idx>>4 & 15] * s1.f;
 944
 945     return dst;
 946 }
 947 #endif
 948
 949 #ifndef VMUL4S
 950 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 951                             unsigned sign, const float *scale)
 952 {
 953     unsigned nz = idx >> 12;
 954     union float754 s = { .f = *scale };
 955     union float754 t;
 956
 957     t.i = s.i ^ (sign & 1<<31);
 958     *dst++ = v[idx    & 3] * t.f;
 959
 960     sign <<= nz & 1; nz >>= 1;
 961     t.i = s.i ^ (sign & 1<<31);
 962     *dst++ = v[idx>>2 & 3] * t.f;
 963
 964     sign <<= nz & 1; nz >>= 1;
 965     t.i = s.i ^ (sign & 1<<31);
 966     *dst++ = v[idx>>4 & 3] * t.f;
 967
 968     sign <<= nz & 1; nz >>= 1;
 969     t.i = s.i ^ (sign & 1<<31);
 970     *dst++ = v[idx>>6 & 3] * t.f;
 971
 972     return dst;
 973 }
 974 #endif
 975
 976 /**
 977  * Decode spectral data; reference: table 4.50.
 978  * Dequantize and scale spectral data; reference: 4.6.3.3.
 979  *
 980  * @param   coef            array of dequantized, scaled spectral data
 981  * @param   sf              array of scalefactors or intensity stereo positions
 982  * @param   pulse_present   set if pulses are present
 983  * @param   pulse           pointer to pulse data struct
 984  * @param   band_type       array of the used band type
 985  *
 986  * @return  Returns error status. 0 - OK, !0 - error
 987  */
 988 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 989                                        GetBitContext *gb, const float sf[120],
 990                                        int pulse_present, const Pulse *pulse,
 991                                        const IndividualChannelStream *ics,
 992                                        enum BandType band_type[120])
 993 {
 994     int i, k, g, idx = 0;
 995     const int c = 1024 / ics->num_windows;
 996     const uint16_t *offsets = ics->swb_offset;
 997     float *coef_base = coef;
 998
 999     for (g = 0; g < ics->num_windows; g++)
1000         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1001
1002     for (g = 0; g < ics->num_window_groups; g++) {
1003         unsigned g_len = ics->group_len[g];
1004
1005         for (i = 0; i < ics->max_sfb; i++, idx++) {
1006             const unsigned cbt_m1 = band_type[idx] - 1;
1007             float *cfo = coef + offsets[i];
1008             int off_len = offsets[i + 1] - offsets[i];
1009             int group;
1010
1011             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1012                 for (group = 0; group < g_len; group++, cfo+=128) {
1013                     memset(cfo, 0, off_len * sizeof(float));
1014                 }
1015             } else if (cbt_m1 == NOISE_BT - 1) {
1016                 for (group = 0; group < g_len; group++, cfo+=128) {
1017                     float scale;
1018                     float band_energy;
1019
1020                     for (k = 0; k < off_len; k++) {
1021                         ac->random_state  = lcg_random(ac->random_state);
1022                         cfo[k] = ac->random_state;
1023                     }
1024
1025                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1026                     scale = sf[idx] / sqrtf(band_energy);
1027                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1028                 }
1029             } else {
1030                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1031                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1032                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1033                 OPEN_READER(re, gb);
1034
1035                 switch (cbt_m1 >> 1) {
1036                 case 0:
1037                     for (group = 0; group < g_len; group++, cfo+=128) {
1038                         float *cf = cfo;
1039                         int len = off_len;
1040
1041                         do {
1042                             int code;
1043                             unsigned cb_idx;
1044
1045                             UPDATE_CACHE(re, gb);
1046                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1047                             cb_idx = cb_vector_idx[code];
1048                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1049                         } while (len -= 4);
1050                     }
1051                     break;
1052
1053                 case 1:
1054                     for (group = 0; group < g_len; group++, cfo+=128) {
1055                         float *cf = cfo;
1056                         int len = off_len;
1057
1058                         do {
1059                             int code;
1060                             unsigned nnz;
1061                             unsigned cb_idx;
1062                             uint32_t bits;
1063
1064                             UPDATE_CACHE(re, gb);
1065                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1066 #if MIN_CACHE_BITS < 20
1067                             UPDATE_CACHE(re, gb);
1068 #endif
1069                             cb_idx = cb_vector_idx[code];
1070                             nnz = cb_idx >> 8 & 15;
1071                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1072                             LAST_SKIP_BITS(re, gb, nnz);
1073                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1074                         } while (len -= 4);
1075                     }
1076                     break;
1077
1078                 case 2:
1079                     for (group = 0; group < g_len; group++, cfo+=128) {
1080                         float *cf = cfo;
1081                         int len = off_len;
1082
1083                         do {
1084                             int code;
1085                             unsigned cb_idx;
1086
1087                             UPDATE_CACHE(re, gb);
1088                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1089                             cb_idx = cb_vector_idx[code];
1090                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1091                         } while (len -= 2);
1092                     }
1093                     break;
1094
1095                 case 3:
1096                 case 4:
1097                     for (group = 0; group < g_len; group++, cfo+=128) {
1098                         float *cf = cfo;
1099                         int len = off_len;
1100
1101                         do {
1102                             int code;
1103                             unsigned nnz;
1104                             unsigned cb_idx;
1105                             unsigned sign;
1106
1107                             UPDATE_CACHE(re, gb);
1108                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1109                             cb_idx = cb_vector_idx[code];
1110                             nnz = cb_idx >> 8 & 15;
1111                             sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1112                             LAST_SKIP_BITS(re, gb, nnz);
1113                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1114                         } while (len -= 2);
1115                     }
1116                     break;
1117
1118                 default:
1119                     for (group = 0; group < g_len; group++, cfo+=128) {
1120                         float *cf = cfo;
1121                         uint32_t *icf = (uint32_t *) cf;
1122                         int len = off_len;
1123
1124                         do {
1125                             int code;
1126                             unsigned nzt, nnz;
1127                             unsigned cb_idx;
1128                             uint32_t bits;
1129                             int j;
1130
1131                             UPDATE_CACHE(re, gb);
1132                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1133
1134                             if (!code) {
1135                                 *icf++ = 0;
1136                                 *icf++ = 0;
1137                                 continue;
1138                             }
1139
1140                             cb_idx = cb_vector_idx[code];
1141                             nnz = cb_idx >> 12;
1142                             nzt = cb_idx >> 8;
1143                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1144                             LAST_SKIP_BITS(re, gb, nnz);
1145
1146                             for (j = 0; j < 2; j++) {
1147                                 if (nzt & 1<<j) {
1148                                     uint32_t b;
1149                                     int n;
1150                                     /* The total length of escape_sequence must be < 22 bits according
1151                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1152                                     UPDATE_CACHE(re, gb);
1153                                     b = GET_CACHE(re, gb);
1154                                     b = 31 - av_log2(~b);
1155
1156                                     if (b > 8) {
1157                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1158                                         return -1;
1159                                     }
1160
1161 #if MIN_CACHE_BITS < 21
1162                                     LAST_SKIP_BITS(re, gb, b + 1);
1163                                     UPDATE_CACHE(re, gb);
1164 #else
1165                                     SKIP_BITS(re, gb, b + 1);
1166 #endif
1167                                     b += 4;
1168                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1169                                     LAST_SKIP_BITS(re, gb, b);
1170                                     *icf++ = cbrt_tab[n] | (bits & 1<<31);
1171                                     bits <<= 1;
1172                                 } else {
1173                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1174                                     *icf++ = (bits & 1<<31) | v;
1175                                     bits <<= !!v;
1176                                 }
1177                                 cb_idx >>= 4;
1178                             }
1179                         } while (len -= 2);
1180
1181                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1182                     }
1183                 }
1184
1185                 CLOSE_READER(re, gb);
1186             }
1187         }
1188         coef += g_len << 7;
1189     }
1190
1191     if (pulse_present) {
1192         idx = 0;
1193         for (i = 0; i < pulse->num_pulse; i++) {
1194             float co = coef_base[ pulse->pos[i] ];
1195             while (offsets[idx + 1] <= pulse->pos[i])
1196                 idx++;
1197             if (band_type[idx] != NOISE_BT && sf[idx]) {
1198                 float ico = -pulse->amp[i];
1199                 if (co) {
1200                     co /= sf[idx];
1201                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1202                 }
1203                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1204             }
1205         }
1206     }
1207     return 0;
1208 }
1209
1210 static av_always_inline float flt16_round(float pf)
1211 {
1212     union float754 tmp;
1213     tmp.f = pf;
1214     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1215     return tmp.f;
1216 }
1217
1218 static av_always_inline float flt16_even(float pf)
1219 {
1220     union float754 tmp;
1221     tmp.f = pf;
1222     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1223     return tmp.f;
1224 }
1225
1226 static av_always_inline float flt16_trunc(float pf)
1227 {
1228     union float754 pun;
1229     pun.f = pf;
1230     pun.i &= 0xFFFF0000U;
1231     return pun.f;
1232 }
1233
1234 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1235                     int output_enable)
1236 {
1237     const float a     = 0.953125; // 61.0 / 64
1238     const float alpha = 0.90625;  // 29.0 / 32
1239     float e0, e1;
1240     float pv;
1241     float k1, k2;
1242
1243     k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1244     k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1245
1246     pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1247     if (output_enable)
1248         *coef += pv * ac->sf_scale;
1249
1250     e0 = *coef / ac->sf_scale;
1251     e1 = e0 - k1 * ps->r0;
1252
1253     ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1254     ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5f * (ps->r1 * ps->r1 + e1 * e1));
1255     ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1256     ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5f * (ps->r0 * ps->r0 + e0 * e0));
1257
1258     ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1259     ps->r0 = flt16_trunc(a * e0);
1260 }
1261
1262 /**
1263  * Apply AAC-Main style frequency domain prediction.
1264  */
1265 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1266 {
1267     int sfb, k;
1268
1269     if (!sce->ics.predictor_initialized) {
1270         reset_all_predictors(sce->predictor_state);
1271         sce->ics.predictor_initialized = 1;
1272     }
1273
1274     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1275         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1276             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1277                 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1278                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1279             }
1280         }
1281         if (sce->ics.predictor_reset_group)
1282             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1283     } else
1284         reset_all_predictors(sce->predictor_state);
1285 }
1286
1287 /**
1288  * Decode an individual_channel_stream payload; reference: table 4.44.
1289  *
1290  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1291  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1292  *
1293  * @return  Returns error status. 0 - OK, !0 - error
1294  */
1295 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1296                       GetBitContext *gb, int common_window, int scale_flag)
1297 {
1298     Pulse pulse;
1299     TemporalNoiseShaping    *tns = &sce->tns;
1300     IndividualChannelStream *ics = &sce->ics;
1301     float *out = sce->coeffs;
1302     int global_gain, pulse_present = 0;
1303
1304     /* This assignment is to silence a GCC warning about the variable being used
1305      * uninitialized when in fact it always is.
1306      */
1307     pulse.num_pulse = 0;
1308
1309     global_gain = get_bits(gb, 8);
1310
1311     if (!common_window && !scale_flag) {
1312         if (decode_ics_info(ac, ics, gb, 0) < 0)
1313             return -1;
1314     }
1315
1316     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1317         return -1;
1318     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1319         return -1;
1320
1321     pulse_present = 0;
1322     if (!scale_flag) {
1323         if ((pulse_present = get_bits1(gb))) {
1324             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1325                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1326                 return -1;
1327             }
1328             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1329                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1330                 return -1;
1331             }
1332         }
1333         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1334             return -1;
1335         if (get_bits1(gb)) {
1336             av_log_missing_feature(ac->avctx, "SSR", 1);
1337             return -1;
1338         }
1339     }
1340
1341     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1342         return -1;
1343
1344     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1345         apply_prediction(ac, sce);
1346
1347     return 0;
1348 }
1349
1350 /**
1351  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1352  */
1353 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1354 {
1355     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1356     float *ch0 = cpe->ch[0].coeffs;
1357     float *ch1 = cpe->ch[1].coeffs;
1358     int g, i, group, idx = 0;
1359     const uint16_t *offsets = ics->swb_offset;
1360     for (g = 0; g < ics->num_window_groups; g++) {
1361         for (i = 0; i < ics->max_sfb; i++, idx++) {
1362             if (cpe->ms_mask[idx] &&
1363                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1364                 for (group = 0; group < ics->group_len[g]; group++) {
1365                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1366                                               ch1 + group * 128 + offsets[i],
1367                                               offsets[i+1] - offsets[i]);
1368                 }
1369             }
1370         }
1371         ch0 += ics->group_len[g] * 128;
1372         ch1 += ics->group_len[g] * 128;
1373     }
1374 }
1375
1376 /**
1377  * intensity stereo decoding; reference: 4.6.8.2.3
1378  *
1379  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1380  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1381  *                      [3] reserved for scalable AAC
1382  */
1383 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1384 {
1385     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1386     SingleChannelElement         *sce1 = &cpe->ch[1];
1387     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1388     const uint16_t *offsets = ics->swb_offset;
1389     int g, group, i, k, idx = 0;
1390     int c;
1391     float scale;
1392     for (g = 0; g < ics->num_window_groups; g++) {
1393         for (i = 0; i < ics->max_sfb;) {
1394             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1395                 const int bt_run_end = sce1->band_type_run_end[idx];
1396                 for (; i < bt_run_end; i++, idx++) {
1397                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1398                     if (ms_present)
1399                         c *= 1 - 2 * cpe->ms_mask[idx];
1400                     scale = c * sce1->sf[idx];
1401                     for (group = 0; group < ics->group_len[g]; group++)
1402                         for (k = offsets[i]; k < offsets[i + 1]; k++)
1403                             coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1404                 }
1405             } else {
1406                 int bt_run_end = sce1->band_type_run_end[idx];
1407                 idx += bt_run_end - i;
1408                 i    = bt_run_end;
1409             }
1410         }
1411         coef0 += ics->group_len[g] * 128;
1412         coef1 += ics->group_len[g] * 128;
1413     }
1414 }
1415
1416 /**
1417  * Decode a channel_pair_element; reference: table 4.4.
1418  *
1419  * @return  Returns error status. 0 - OK, !0 - error
1420  */
1421 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1422 {
1423     int i, ret, common_window, ms_present = 0;
1424
1425     common_window = get_bits1(gb);
1426     if (common_window) {
1427         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1428             return -1;
1429         i = cpe->ch[1].ics.use_kb_window[0];
1430         cpe->ch[1].ics = cpe->ch[0].ics;
1431         cpe->ch[1].ics.use_kb_window[1] = i;
1432         ms_present = get_bits(gb, 2);
1433         if (ms_present == 3) {
1434             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1435             return -1;
1436         } else if (ms_present)
1437             decode_mid_side_stereo(cpe, gb, ms_present);
1438     }
1439     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1440         return ret;
1441     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1442         return ret;
1443
1444     if (common_window) {
1445         if (ms_present)
1446             apply_mid_side_stereo(ac, cpe);
1447         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1448             apply_prediction(ac, &cpe->ch[0]);
1449             apply_prediction(ac, &cpe->ch[1]);
1450         }
1451     }
1452
1453     apply_intensity_stereo(cpe, ms_present);
1454     return 0;
1455 }
1456
1457 static const float cce_scale[] = {
1458     1.09050773266525765921, //2^(1/8)
1459     1.18920711500272106672, //2^(1/4)
1460     M_SQRT2,
1461     2,
1462 };
1463
1464 /**
1465  * Decode coupling_channel_element; reference: table 4.8.
1466  *
1467  * @return  Returns error status. 0 - OK, !0 - error
1468  */
1469 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1470 {
1471     int num_gain = 0;
1472     int c, g, sfb, ret;
1473     int sign;
1474     float scale;
1475     SingleChannelElement *sce = &che->ch[0];
1476     ChannelCoupling     *coup = &che->coup;
1477
1478     coup->coupling_point = 2 * get_bits1(gb);
1479     coup->num_coupled = get_bits(gb, 3);
1480     for (c = 0; c <= coup->num_coupled; c++) {
1481         num_gain++;
1482         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1483         coup->id_select[c] = get_bits(gb, 4);
1484         if (coup->type[c] == TYPE_CPE) {
1485             coup->ch_select[c] = get_bits(gb, 2);
1486             if (coup->ch_select[c] == 3)
1487                 num_gain++;
1488         } else
1489             coup->ch_select[c] = 2;
1490     }
1491     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1492
1493     sign  = get_bits(gb, 1);
1494     scale = cce_scale[get_bits(gb, 2)];
1495
1496     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1497         return ret;
1498
1499     for (c = 0; c < num_gain; c++) {
1500         int idx  = 0;
1501         int cge  = 1;
1502         int gain = 0;
1503         float gain_cache = 1.;
1504         if (c) {
1505             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1506             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1507             gain_cache = powf(scale, -gain);
1508         }
1509         if (coup->coupling_point == AFTER_IMDCT) {
1510             coup->gain[c][0] = gain_cache;
1511         } else {
1512             for (g = 0; g < sce->ics.num_window_groups; g++) {
1513                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1514                     if (sce->band_type[idx] != ZERO_BT) {
1515                         if (!cge) {
1516                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1517                             if (t) {
1518                                 int s = 1;
1519                                 t = gain += t;
1520                                 if (sign) {
1521                                     s  -= 2 * (t & 0x1);
1522                                     t >>= 1;
1523                                 }
1524                                 gain_cache = powf(scale, -t) * s;
1525                             }
1526                         }
1527                         coup->gain[c][idx] = gain_cache;
1528                     }
1529                 }
1530             }
1531         }
1532     }
1533     return 0;
1534 }
1535
1536 /**
1537  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1538  *
1539  * @return  Returns number of bytes consumed.
1540  */
1541 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1542                                          GetBitContext *gb)
1543 {
1544     int i;
1545     int num_excl_chan = 0;
1546
1547     do {
1548         for (i = 0; i < 7; i++)
1549             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1550     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1551
1552     return num_excl_chan / 7;
1553 }
1554
1555 /**
1556  * Decode dynamic range information; reference: table 4.52.
1557  *
1558  * @param   cnt length of TYPE_FIL syntactic element in bytes
1559  *
1560  * @return  Returns number of bytes consumed.
1561  */
1562 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1563                                 GetBitContext *gb, int cnt)
1564 {
1565     int n             = 1;
1566     int drc_num_bands = 1;
1567     int i;
1568
1569     /* pce_tag_present? */
1570     if (get_bits1(gb)) {
1571         che_drc->pce_instance_tag  = get_bits(gb, 4);
1572         skip_bits(gb, 4); // tag_reserved_bits
1573         n++;
1574     }
1575
1576     /* excluded_chns_present? */
1577     if (get_bits1(gb)) {
1578         n += decode_drc_channel_exclusions(che_drc, gb);
1579     }
1580
1581     /* drc_bands_present? */
1582     if (get_bits1(gb)) {
1583         che_drc->band_incr            = get_bits(gb, 4);
1584         che_drc->interpolation_scheme = get_bits(gb, 4);
1585         n++;
1586         drc_num_bands += che_drc->band_incr;
1587         for (i = 0; i < drc_num_bands; i++) {
1588             che_drc->band_top[i] = get_bits(gb, 8);
1589             n++;
1590         }
1591     }
1592
1593     /* prog_ref_level_present? */
1594     if (get_bits1(gb)) {
1595         che_drc->prog_ref_level = get_bits(gb, 7);
1596         skip_bits1(gb); // prog_ref_level_reserved_bits
1597         n++;
1598     }
1599
1600     for (i = 0; i < drc_num_bands; i++) {
1601         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1602         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1603         n++;
1604     }
1605
1606     return n;
1607 }
1608
1609 /**
1610  * Decode extension data (incomplete); reference: table 4.51.
1611  *
1612  * @param   cnt length of TYPE_FIL syntactic element in bytes
1613  *
1614  * @return Returns number of bytes consumed
1615  */
1616 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1617                                     ChannelElement *che, enum RawDataBlockType elem_type)
1618 {
1619     int crc_flag = 0;
1620     int res = cnt;
1621     switch (get_bits(gb, 4)) { // extension type
1622     case EXT_SBR_DATA_CRC:
1623         crc_flag++;
1624     case EXT_SBR_DATA:
1625         if (!che) {
1626             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1627             return res;
1628         } else if (!ac->m4ac.sbr) {
1629             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1630             skip_bits_long(gb, 8 * cnt - 4);
1631             return res;
1632         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1633             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1634             skip_bits_long(gb, 8 * cnt - 4);
1635             return res;
1636         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1637             ac->m4ac.sbr = 1;
1638             ac->m4ac.ps = 1;
1639             output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1640         } else {
1641             ac->m4ac.sbr = 1;
1642         }
1643         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1644         break;
1645     case EXT_DYNAMIC_RANGE:
1646         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1647         break;
1648     case EXT_FILL:
1649     case EXT_FILL_DATA:
1650     case EXT_DATA_ELEMENT:
1651     default:
1652         skip_bits_long(gb, 8 * cnt - 4);
1653         break;
1654     };
1655     return res;
1656 }
1657
1658 /**
1659  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1660  *
1661  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1662  * @param   coef    spectral coefficients
1663  */
1664 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1665                       IndividualChannelStream *ics, int decode)
1666 {
1667     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1668     int w, filt, m, i;
1669     int bottom, top, order, start, end, size, inc;
1670     float lpc[TNS_MAX_ORDER];
1671
1672     for (w = 0; w < ics->num_windows; w++) {
1673         bottom = ics->num_swb;
1674         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1675             top    = bottom;
1676             bottom = FFMAX(0, top - tns->length[w][filt]);
1677             order  = tns->order[w][filt];
1678             if (order == 0)
1679                 continue;
1680
1681             // tns_decode_coef
1682             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1683
1684             start = ics->swb_offset[FFMIN(bottom, mmm)];
1685             end   = ics->swb_offset[FFMIN(   top, mmm)];
1686             if ((size = end - start) <= 0)
1687                 continue;
1688             if (tns->direction[w][filt]) {
1689                 inc = -1;
1690                 start = end - 1;
1691             } else {
1692                 inc = 1;
1693             }
1694             start += w * 128;
1695
1696             // ar filter
1697             for (m = 0; m < size; m++, start += inc)
1698                 for (i = 1; i <= FFMIN(m, order); i++)
1699                     coef[start] -= coef[start - i * inc] * lpc[i - 1];
1700         }
1701     }
1702 }
1703
1704 /**
1705  * Conduct IMDCT and windowing.
1706  */
1707 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1708 {
1709     IndividualChannelStream *ics = &sce->ics;
1710     float *in    = sce->coeffs;
1711     float *out   = sce->ret;
1712     float *saved = sce->saved;
1713     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1714     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1715     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1716     float *buf  = ac->buf_mdct;
1717     float *temp = ac->temp;
1718     int i;
1719
1720     // imdct
1721     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1722         for (i = 0; i < 1024; i += 128)
1723             ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1724     } else
1725         ff_imdct_half(&ac->mdct, buf, in);
1726
1727     /* window overlapping
1728      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1729      * and long to short transitions are considered to be short to short
1730      * transitions. This leaves just two cases (long to long and short to short)
1731      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1732      */
1733     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1734             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1735         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
1736     } else {
1737         for (i = 0; i < 448; i++)
1738             out[i] = saved[i] + bias;
1739
1740         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1741             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
1742             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
1743             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
1744             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
1745             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
1746             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1747         } else {
1748             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
1749             for (i = 576; i < 1024; i++)
1750                 out[i] = buf[i-512] + bias;
1751         }
1752     }
1753
1754     // buffer update
1755     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1756         for (i = 0; i < 64; i++)
1757             saved[i] = temp[64 + i] - bias;
1758         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1759         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1760         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1761         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1762     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1763         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1764         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1765     } else { // LONG_STOP or ONLY_LONG
1766         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1767     }
1768 }
1769
1770 /**
1771  * Apply dependent channel coupling (applied before IMDCT).
1772  *
1773  * @param   index   index into coupling gain array
1774  */
1775 static void apply_dependent_coupling(AACContext *ac,
1776                                      SingleChannelElement *target,
1777                                      ChannelElement *cce, int index)
1778 {
1779     IndividualChannelStream *ics = &cce->ch[0].ics;
1780     const uint16_t *offsets = ics->swb_offset;
1781     float *dest = target->coeffs;
1782     const float *src = cce->ch[0].coeffs;
1783     int g, i, group, k, idx = 0;
1784     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1785         av_log(ac->avctx, AV_LOG_ERROR,
1786                "Dependent coupling is not supported together with LTP\n");
1787         return;
1788     }
1789     for (g = 0; g < ics->num_window_groups; g++) {
1790         for (i = 0; i < ics->max_sfb; i++, idx++) {
1791             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1792                 const float gain = cce->coup.gain[index][idx];
1793                 for (group = 0; group < ics->group_len[g]; group++) {
1794                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1795                         // XXX dsputil-ize
1796                         dest[group * 128 + k] += gain * src[group * 128 + k];
1797                     }
1798                 }
1799             }
1800         }
1801         dest += ics->group_len[g] * 128;
1802         src  += ics->group_len[g] * 128;
1803     }
1804 }
1805
1806 /**
1807  * Apply independent channel coupling (applied after IMDCT).
1808  *
1809  * @param   index   index into coupling gain array
1810  */
1811 static void apply_independent_coupling(AACContext *ac,
1812                                        SingleChannelElement *target,
1813                                        ChannelElement *cce, int index)
1814 {
1815     int i;
1816     const float gain = cce->coup.gain[index][0];
1817     const float bias = ac->add_bias;
1818     const float *src = cce->ch[0].ret;
1819     float *dest = target->ret;
1820     const int len = 1024 << (ac->m4ac.sbr == 1);
1821
1822     for (i = 0; i < len; i++)
1823         dest[i] += gain * (src[i] - bias);
1824 }
1825
1826 /**
1827  * channel coupling transformation interface
1828  *
1829  * @param   apply_coupling_method   pointer to (in)dependent coupling function
1830  */
1831 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1832                                    enum RawDataBlockType type, int elem_id,
1833                                    enum CouplingPoint coupling_point,
1834                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1835 {
1836     int i, c;
1837
1838     for (i = 0; i < MAX_ELEM_ID; i++) {
1839         ChannelElement *cce = ac->che[TYPE_CCE][i];
1840         int index = 0;
1841
1842         if (cce && cce->coup.coupling_point == coupling_point) {
1843             ChannelCoupling *coup = &cce->coup;
1844
1845             for (c = 0; c <= coup->num_coupled; c++) {
1846                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1847                     if (coup->ch_select[c] != 1) {
1848                         apply_coupling_method(ac, &cc->ch[0], cce, index);
1849                         if (coup->ch_select[c] != 0)
1850                             index++;
1851                     }
1852                     if (coup->ch_select[c] != 2)
1853                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
1854                 } else
1855                     index += 1 + (coup->ch_select[c] == 3);
1856             }
1857         }
1858     }
1859 }
1860
1861 /**
1862  * Convert spectral data to float samples, applying all supported tools as appropriate.
1863  */
1864 static void spectral_to_sample(AACContext *ac)
1865 {
1866     int i, type;
1867     float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1868     for (type = 3; type >= 0; type--) {
1869         for (i = 0; i < MAX_ELEM_ID; i++) {
1870             ChannelElement *che = ac->che[type][i];
1871             if (che) {
1872                 if (type <= TYPE_CPE)
1873                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1874                 if (che->ch[0].tns.present)
1875                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1876                 if (che->ch[1].tns.present)
1877                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1878                 if (type <= TYPE_CPE)
1879                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1880                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1881                     imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1882                     if (type == TYPE_CPE) {
1883                         imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1884                     }
1885                     if (ac->m4ac.sbr > 0) {
1886                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1887                     }
1888                 }
1889                 if (type <= TYPE_CCE)
1890                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1891             }
1892         }
1893     }
1894 }
1895
1896 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1897 {
1898     int size;
1899     AACADTSHeaderInfo hdr_info;
1900
1901     size = ff_aac_parse_header(gb, &hdr_info);
1902     if (size > 0) {
1903         if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1904             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1905             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1906             ac->m4ac.chan_config = hdr_info.chan_config;
1907             if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1908                 return -7;
1909             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1910                 return -7;
1911         } else if (ac->output_configured != OC_LOCKED) {
1912             ac->output_configured = OC_NONE;
1913         }
1914         if (ac->output_configured != OC_LOCKED) {
1915             ac->m4ac.sbr = -1;
1916             ac->m4ac.ps  = -1;
1917         }
1918         ac->m4ac.sample_rate     = hdr_info.sample_rate;
1919         ac->m4ac.sampling_index  = hdr_info.sampling_index;
1920         ac->m4ac.object_type     = hdr_info.object_type;
1921         if (!ac->avctx->sample_rate)
1922             ac->avctx->sample_rate = hdr_info.sample_rate;
1923         if (hdr_info.num_aac_frames == 1) {
1924             if (!hdr_info.crc_absent)
1925                 skip_bits(gb, 16);
1926         } else {
1927             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1928             return -1;
1929         }
1930     }
1931     return size;
1932 }
1933
1934 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1935                             int *data_size, AVPacket *avpkt)
1936 {
1937     const uint8_t *buf = avpkt->data;
1938     int buf_size = avpkt->size;
1939     AACContext *ac = avctx->priv_data;
1940     ChannelElement *che = NULL, *che_prev = NULL;
1941     GetBitContext gb;
1942     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1943     int err, elem_id, data_size_tmp;
1944     int buf_consumed;
1945     int samples = 0, multiplier;
1946     int buf_offset;
1947
1948     init_get_bits(&gb, buf, buf_size * 8);
1949
1950     if (show_bits(&gb, 12) == 0xfff) {
1951         if (parse_adts_frame_header(ac, &gb) < 0) {
1952             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1953             return -1;
1954         }
1955         if (ac->m4ac.sampling_index > 12) {
1956             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1957             return -1;
1958         }
1959     }
1960
1961     memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1962     // parse
1963     while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1964         elem_id = get_bits(&gb, 4);
1965
1966         if (elem_type < TYPE_DSE) {
1967             if (!(che=get_che(ac, elem_type, elem_id))) {
1968                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
1969                        elem_type, elem_id);
1970                 return -1;
1971             }
1972             samples = 1024;
1973         }
1974
1975         switch (elem_type) {
1976
1977         case TYPE_SCE:
1978             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1979             break;
1980
1981         case TYPE_CPE:
1982             err = decode_cpe(ac, &gb, che);
1983             break;
1984
1985         case TYPE_CCE:
1986             err = decode_cce(ac, &gb, che);
1987             break;
1988
1989         case TYPE_LFE:
1990             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
1991             break;
1992
1993         case TYPE_DSE:
1994             err = skip_data_stream_element(ac, &gb);
1995             break;
1996
1997         case TYPE_PCE: {
1998             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1999             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2000             if ((err = decode_pce(ac, new_che_pos, &gb)))
2001                 break;
2002             if (ac->output_configured > OC_TRIAL_PCE)
2003                 av_log(avctx, AV_LOG_ERROR,
2004                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2005             else
2006                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2007             break;
2008         }
2009
2010         case TYPE_FIL:
2011             if (elem_id == 15)
2012                 elem_id += get_bits(&gb, 8) - 1;
2013             if (get_bits_left(&gb) < 8 * elem_id) {
2014                     av_log(avctx, AV_LOG_ERROR, overread_err);
2015                     return -1;
2016             }
2017             while (elem_id > 0)
2018                 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2019             err = 0; /* FIXME */
2020             break;
2021
2022         default:
2023             err = -1; /* should not happen, but keeps compiler happy */
2024             break;
2025         }
2026
2027         che_prev       = che;
2028         elem_type_prev = elem_type;
2029
2030         if (err)
2031             return err;
2032
2033         if (get_bits_left(&gb) < 3) {
2034             av_log(avctx, AV_LOG_ERROR, overread_err);
2035             return -1;
2036         }
2037     }
2038
2039     spectral_to_sample(ac);
2040
2041     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2042     samples <<= multiplier;
2043     if (ac->output_configured < OC_LOCKED) {
2044         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2045         avctx->frame_size = samples;
2046     }
2047
2048     data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2049     if (*data_size < data_size_tmp) {
2050         av_log(avctx, AV_LOG_ERROR,
2051                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2052                *data_size, data_size_tmp);
2053         return -1;
2054     }
2055     *data_size = data_size_tmp;
2056
2057     if (samples)
2058         ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2059
2060     if (ac->output_configured)
2061         ac->output_configured = OC_LOCKED;
2062
2063     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2064     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2065         if (buf[buf_offset])
2066             break;
2067
2068     return buf_size > buf_offset ? buf_consumed : buf_size;
2069 }
2070
2071 static av_cold int aac_decode_close(AVCodecContext *avctx)
2072 {
2073     AACContext *ac = avctx->priv_data;
2074     int i, type;
2075
2076     for (i = 0; i < MAX_ELEM_ID; i++) {
2077         for (type = 0; type < 4; type++) {
2078             if (ac->che[type][i])
2079                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2080             av_freep(&ac->che[type][i]);
2081         }
2082     }
2083
2084     ff_mdct_end(&ac->mdct);
2085     ff_mdct_end(&ac->mdct_small);
2086     return 0;
2087 }
2088
2089 AVCodec aac_decoder = {
2090     "aac",
2091     AVMEDIA_TYPE_AUDIO,
2092     CODEC_ID_AAC,
2093     sizeof(AACContext),
2094     aac_decode_init,
2095     NULL,
2096     aac_decode_close,
2097     aac_decode_frame,
2098     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2099     .sample_fmts = (const enum SampleFormat[]) {
2100         SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2101     },
2102     .channel_layouts = aac_channel_layout,
2103 };