git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * AAC decoder
  26  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  28  */
  29
  30 /*
  31  * supported tools
  32  *
  33  * Support?             Name
  34  * N (code in SoC repo) gain control
  35  * Y                    block switching
  36  * Y                    window shapes - standard
  37  * N                    window shapes - Low Delay
  38  * Y                    filterbank - standard
  39  * N (code in SoC repo) filterbank - Scalable Sample Rate
  40  * Y                    Temporal Noise Shaping
  41  * N (code in SoC repo) Long Term Prediction
  42  * Y                    intensity stereo
  43  * Y                    channel coupling
  44  * Y                    frequency domain prediction
  45  * Y                    Perceptual Noise Substitution
  46  * Y                    Mid/Side stereo
  47  * N                    Scalable Inverse AAC Quantization
  48  * N                    Frequency Selective Switch
  49  * N                    upsampling filter
  50  * Y                    quantization & coding - AAC
  51  * N                    quantization & coding - TwinVQ
  52  * N                    quantization & coding - BSAC
  53  * N                    AAC Error Resilience tools
  54  * N                    Error Resilience payload syntax
  55  * N                    Error Protection tool
  56  * N                    CELP
  57  * N                    Silence Compression
  58  * N                    HVXC
  59  * N                    HVXC 4kbits/s VR
  60  * N                    Structured Audio tools
  61  * N                    Structured Audio Sample Bank Format
  62  * N                    MIDI
  63  * N                    Harmonic and Individual Lines plus Noise
  64  * N                    Text-To-Speech Interface
  65  * Y                    Spectral Band Replication
  66  * Y (not in this code) Layer-1
  67  * Y (not in this code) Layer-2
  68  * Y (not in this code) Layer-3
  69  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  70  * Y                    Parametric Stereo
  71  * N                    Direct Stream Transfer
  72  *
  73  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  74  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  75            Parametric Stereo.
  76  */
  77
  78
  79 #include "avcodec.h"
  80 #include "internal.h"
  81 #include "get_bits.h"
  82 #include "dsputil.h"
  83 #include "fft.h"
  84 #include "lpc.h"
  85
  86 #include "aac.h"
  87 #include "aactab.h"
  88 #include "aacdectab.h"
  89 #include "cbrt_tablegen.h"
  90 #include "sbr.h"
  91 #include "aacsbr.h"
  92 #include "mpeg4audio.h"
  93 #include "aac_parser.h"
  94
  95 #include <assert.h>
  96 #include <errno.h>
  97 #include <math.h>
  98 #include <string.h>
  99
 100 #if ARCH_ARM
 101 #   include "arm/aac.h"
 102 #endif
 103
 104 union float754 {
 105     float f;
 106     uint32_t i;
 107 };
 108
 109 static VLC vlc_scalefactors;
 110 static VLC vlc_spectral[11];
 111
 112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 113
 114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 115 {
 116     /* Some buggy encoders appear to set all elem_ids to zero and rely on
 117     channels always occurring in the same order. This is expressly forbidden
 118     by the spec but we will try to work around it.
 119     */
 120     int err_printed = 0;
 121     while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
 122         if (ac->output_configured < OC_LOCKED && !err_printed) {
 123             av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
 124             err_printed = 1;
 125         }
 126         elem_id++;
 127     }
 128     if (elem_id == MAX_ELEM_ID)
 129         return NULL;
 130     ac->tags_seen_this_frame[type][elem_id] = 1;
 131
 132     if (ac->tag_che_map[type][elem_id]) {
 133         return ac->tag_che_map[type][elem_id];
 134     }
 135     if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
 136         return NULL;
 137     }
 138     switch (ac->m4ac.chan_config) {
 139     case 7:
 140         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 141             ac->tags_mapped++;
 142             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 143         }
 144     case 6:
 145         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 146            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 147            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 148         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 151         }
 152     case 5:
 153         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 154             ac->tags_mapped++;
 155             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 156         }
 157     case 4:
 158         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 159             ac->tags_mapped++;
 160             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 161         }
 162     case 3:
 163     case 2:
 164         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 165             ac->tags_mapped++;
 166             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 167         } else if (ac->m4ac.chan_config == 2) {
 168             return NULL;
 169         }
 170     case 1:
 171         if (!ac->tags_mapped && type == TYPE_SCE) {
 172             ac->tags_mapped++;
 173             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 174         }
 175     default:
 176         return NULL;
 177     }
 178 }
 179
 180 /**
 181  * Check for the channel element in the current channel position configuration.
 182  * If it exists, make sure the appropriate element is allocated and map the
 183  * channel order to match the internal FFmpeg channel layout.
 184  *
 185  * @param   che_pos current channel position configuration
 186  * @param   type channel element type
 187  * @param   id channel element id
 188  * @param   channels count of the number of channels in the configuration
 189  *
 190  * @return  Returns error status. 0 - OK, !0 - error
 191  */
 192 static av_cold int che_configure(AACContext *ac,
 193                          enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 194                          int type, int id,
 195                          int *channels)
 196 {
 197     if (che_pos[type][id]) {
 198         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 199             return AVERROR(ENOMEM);
 200         ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
 201         if (type != TYPE_CCE) {
 202             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 203             if (type == TYPE_CPE ||
 204                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 205                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 206             }
 207         }
 208     } else {
 209         if (ac->che[type][id])
 210             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 211         av_freep(&ac->che[type][id]);
 212     }
 213     return 0;
 214 }
 215
 216 /**
 217  * Configure output channel order based on the current program configuration element.
 218  *
 219  * @param   che_pos current channel position configuration
 220  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 221  *
 222  * @return  Returns error status. 0 - OK, !0 - error
 223  */
 224 static av_cold int output_configure(AACContext *ac,
 225                             enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 226                             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 227                             int channel_config, enum OCStatus oc_type)
 228 {
 229     AVCodecContext *avctx = ac->avctx;
 230     int i, type, channels = 0, ret;
 231
 232     if (new_che_pos != che_pos)
 233     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 234
 235     if (channel_config) {
 236         for (i = 0; i < tags_per_config[channel_config]; i++) {
 237             if ((ret = che_configure(ac, che_pos,
 238                                      aac_channel_layout_map[channel_config - 1][i][0],
 239                                      aac_channel_layout_map[channel_config - 1][i][1],
 240                                      &channels)))
 241                 return ret;
 242         }
 243
 244         memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 245         ac->tags_mapped = 0;
 246
 247         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 248     } else {
 249         /* Allocate or free elements depending on if they are in the
 250          * current program configuration.
 251          *
 252          * Set up default 1:1 output mapping.
 253          *
 254          * For a 5.1 stream the output order will be:
 255          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 256          */
 257
 258         for (i = 0; i < MAX_ELEM_ID; i++) {
 259             for (type = 0; type < 4; type++) {
 260                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 261                     return ret;
 262             }
 263         }
 264
 265         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 266         ac->tags_mapped = 4 * MAX_ELEM_ID;
 267
 268         avctx->channel_layout = 0;
 269     }
 270
 271     avctx->channels = channels;
 272
 273     ac->output_configured = oc_type;
 274
 275     return 0;
 276 }
 277
 278 /**
 279  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 280  *
 281  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 282  * @param sce_map mono (Single Channel Element) map
 283  * @param type speaker type/position for these channels
 284  */
 285 static void decode_channel_map(enum ChannelPosition *cpe_map,
 286                                enum ChannelPosition *sce_map,
 287                                enum ChannelPosition type,
 288                                GetBitContext *gb, int n)
 289 {
 290     while (n--) {
 291         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 292         map[get_bits(gb, 4)] = type;
 293     }
 294 }
 295
 296 /**
 297  * Decode program configuration element; reference: table 4.2.
 298  *
 299  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 300  *
 301  * @return  Returns error status. 0 - OK, !0 - error
 302  */
 303 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 304                       GetBitContext *gb)
 305 {
 306     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 307     int comment_len;
 308
 309     skip_bits(gb, 2);  // object_type
 310
 311     sampling_index = get_bits(gb, 4);
 312     if (ac->m4ac.sampling_index != sampling_index)
 313         av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 314
 315     num_front       = get_bits(gb, 4);
 316     num_side        = get_bits(gb, 4);
 317     num_back        = get_bits(gb, 4);
 318     num_lfe         = get_bits(gb, 2);
 319     num_assoc_data  = get_bits(gb, 3);
 320     num_cc          = get_bits(gb, 4);
 321
 322     if (get_bits1(gb))
 323         skip_bits(gb, 4); // mono_mixdown_tag
 324     if (get_bits1(gb))
 325         skip_bits(gb, 4); // stereo_mixdown_tag
 326
 327     if (get_bits1(gb))
 328         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 329
 330     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 331     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 332     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 333     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 334
 335     skip_bits_long(gb, 4 * num_assoc_data);
 336
 337     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 338
 339     align_get_bits(gb);
 340
 341     /* comment field, first byte is length */
 342     comment_len = get_bits(gb, 8) * 8;
 343     if (get_bits_left(gb) < comment_len) {
 344         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 345         return -1;
 346     }
 347     skip_bits_long(gb, comment_len);
 348     return 0;
 349 }
 350
 351 /**
 352  * Set up channel positions based on a default channel configuration
 353  * as specified in table 1.17.
 354  *
 355  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 356  *
 357  * @return  Returns error status. 0 - OK, !0 - error
 358  */
 359 static av_cold int set_default_channel_config(AACContext *ac,
 360                                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 361                                       int channel_config)
 362 {
 363     if (channel_config < 1 || channel_config > 7) {
 364         av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 365                channel_config);
 366         return -1;
 367     }
 368
 369     /* default channel configurations:
 370      *
 371      * 1ch : front center (mono)
 372      * 2ch : L + R (stereo)
 373      * 3ch : front center + L + R
 374      * 4ch : front center + L + R + back center
 375      * 5ch : front center + L + R + back stereo
 376      * 6ch : front center + L + R + back stereo + LFE
 377      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 378      */
 379
 380     if (channel_config != 2)
 381         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 382     if (channel_config > 1)
 383         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 384     if (channel_config == 4)
 385         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 386     if (channel_config > 4)
 387         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 388         = AAC_CHANNEL_BACK;  // back stereo
 389     if (channel_config > 5)
 390         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 391     if (channel_config == 7)
 392         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 393
 394     return 0;
 395 }
 396
 397 /**
 398  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 399  *
 400  * @return  Returns error status. 0 - OK, !0 - error
 401  */
 402 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
 403                                      int channel_config)
 404 {
 405     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 406     int extension_flag, ret;
 407
 408     if (get_bits1(gb)) { // frameLengthFlag
 409         av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
 410         return -1;
 411     }
 412
 413     if (get_bits1(gb))       // dependsOnCoreCoder
 414         skip_bits(gb, 14);   // coreCoderDelay
 415     extension_flag = get_bits1(gb);
 416
 417     if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
 418         ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
 419         skip_bits(gb, 3);     // layerNr
 420
 421     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 422     if (channel_config == 0) {
 423         skip_bits(gb, 4);  // element_instance_tag
 424         if ((ret = decode_pce(ac, new_che_pos, gb)))
 425             return ret;
 426     } else {
 427         if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
 428             return ret;
 429     }
 430     if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 431         return ret;
 432
 433     if (extension_flag) {
 434         switch (ac->m4ac.object_type) {
 435         case AOT_ER_BSAC:
 436             skip_bits(gb, 5);    // numOfSubFrame
 437             skip_bits(gb, 11);   // layer_length
 438             break;
 439         case AOT_ER_AAC_LC:
 440         case AOT_ER_AAC_LTP:
 441         case AOT_ER_AAC_SCALABLE:
 442         case AOT_ER_AAC_LD:
 443             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 444                                     * aacScalefactorDataResilienceFlag
 445                                     * aacSpectralDataResilienceFlag
 446                                     */
 447             break;
 448         }
 449         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 450     }
 451     return 0;
 452 }
 453
 454 /**
 455  * Decode audio specific configuration; reference: table 1.13.
 456  *
 457  * @param   data        pointer to AVCodecContext extradata
 458  * @param   data_size   size of AVCCodecContext extradata
 459  *
 460  * @return  Returns error status. 0 - OK, !0 - error
 461  */
 462 static int decode_audio_specific_config(AACContext *ac, void *data,
 463                                         int data_size)
 464 {
 465     GetBitContext gb;
 466     int i;
 467
 468     init_get_bits(&gb, data, data_size * 8);
 469
 470     if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
 471         return -1;
 472     if (ac->m4ac.sampling_index > 12) {
 473         av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
 474         return -1;
 475     }
 476     if (ac->m4ac.sbr == 1 && ac->m4ac.ps == -1)
 477         ac->m4ac.ps = 1;
 478
 479     skip_bits_long(&gb, i);
 480
 481     switch (ac->m4ac.object_type) {
 482     case AOT_AAC_MAIN:
 483     case AOT_AAC_LC:
 484         if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
 485             return -1;
 486         break;
 487     default:
 488         av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 489                ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
 490         return -1;
 491     }
 492     return 0;
 493 }
 494
 495 /**
 496  * linear congruential pseudorandom number generator
 497  *
 498  * @param   previous_val    pointer to the current state of the generator
 499  *
 500  * @return  Returns a 32-bit pseudorandom integer
 501  */
 502 static av_always_inline int lcg_random(int previous_val)
 503 {
 504     return previous_val * 1664525 + 1013904223;
 505 }
 506
 507 static av_always_inline void reset_predict_state(PredictorState *ps)
 508 {
 509     ps->r0   = 0.0f;
 510     ps->r1   = 0.0f;
 511     ps->cor0 = 0.0f;
 512     ps->cor1 = 0.0f;
 513     ps->var0 = 1.0f;
 514     ps->var1 = 1.0f;
 515 }
 516
 517 static void reset_all_predictors(PredictorState *ps)
 518 {
 519     int i;
 520     for (i = 0; i < MAX_PREDICTORS; i++)
 521         reset_predict_state(&ps[i]);
 522 }
 523
 524 static void reset_predictor_group(PredictorState *ps, int group_num)
 525 {
 526     int i;
 527     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 528         reset_predict_state(&ps[i]);
 529 }
 530
 531 #define AAC_INIT_VLC_STATIC(num, size) \
 532     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 533          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 534         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 535         size);
 536
 537 static av_cold int aac_decode_init(AVCodecContext *avctx)
 538 {
 539     AACContext *ac = avctx->priv_data;
 540     int i;
 541
 542     ac->avctx = avctx;
 543     ac->m4ac.sample_rate = avctx->sample_rate;
 544
 545     if (avctx->extradata_size > 0) {
 546         if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
 547             return -1;
 548     }
 549
 550     avctx->sample_fmt = SAMPLE_FMT_S16;
 551
 552     AAC_INIT_VLC_STATIC( 0, 304);
 553     AAC_INIT_VLC_STATIC( 1, 270);
 554     AAC_INIT_VLC_STATIC( 2, 550);
 555     AAC_INIT_VLC_STATIC( 3, 300);
 556     AAC_INIT_VLC_STATIC( 4, 328);
 557     AAC_INIT_VLC_STATIC( 5, 294);
 558     AAC_INIT_VLC_STATIC( 6, 306);
 559     AAC_INIT_VLC_STATIC( 7, 268);
 560     AAC_INIT_VLC_STATIC( 8, 510);
 561     AAC_INIT_VLC_STATIC( 9, 366);
 562     AAC_INIT_VLC_STATIC(10, 462);
 563
 564     ff_aac_sbr_init();
 565
 566     dsputil_init(&ac->dsp, avctx);
 567
 568     ac->random_state = 0x1f2e3d4c;
 569
 570     // -1024 - Compensate wrong IMDCT method.
 571     // 32768 - Required to scale values to the correct range for the bias method
 572     //         for float to int16 conversion.
 573
 574     if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
 575         ac->add_bias  = 385.0f;
 576         ac->sf_scale  = 1. / (-1024. * 32768.);
 577         ac->sf_offset = 0;
 578     } else {
 579         ac->add_bias  = 0.0f;
 580         ac->sf_scale  = 1. / -1024.;
 581         ac->sf_offset = 60;
 582     }
 583
 584 #if !CONFIG_HARDCODED_TABLES
 585     for (i = 0; i < 428; i++)
 586         ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.);
 587 #endif /* CONFIG_HARDCODED_TABLES */
 588
 589     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 590                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 591                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 592                     352);
 593
 594     ff_mdct_init(&ac->mdct, 11, 1, 1.0);
 595     ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
 596     // window initialization
 597     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 598     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 599     ff_init_ff_sine_windows(10);
 600     ff_init_ff_sine_windows( 7);
 601
 602     cbrt_tableinit();
 603
 604     return 0;
 605 }
 606
 607 /**
 608  * Skip data_stream_element; reference: table 4.10.
 609  */
 610 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 611 {
 612     int byte_align = get_bits1(gb);
 613     int count = get_bits(gb, 8);
 614     if (count == 255)
 615         count += get_bits(gb, 8);
 616     if (byte_align)
 617         align_get_bits(gb);
 618
 619     if (get_bits_left(gb) < 8 * count) {
 620         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 621         return -1;
 622     }
 623     skip_bits_long(gb, 8 * count);
 624     return 0;
 625 }
 626
 627 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 628                              GetBitContext *gb)
 629 {
 630     int sfb;
 631     if (get_bits1(gb)) {
 632         ics->predictor_reset_group = get_bits(gb, 5);
 633         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 634             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 635             return -1;
 636         }
 637     }
 638     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 639         ics->prediction_used[sfb] = get_bits1(gb);
 640     }
 641     return 0;
 642 }
 643
 644 /**
 645  * Decode Individual Channel Stream info; reference: table 4.6.
 646  *
 647  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 648  */
 649 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 650                            GetBitContext *gb, int common_window)
 651 {
 652     if (get_bits1(gb)) {
 653         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 654         memset(ics, 0, sizeof(IndividualChannelStream));
 655         return -1;
 656     }
 657     ics->window_sequence[1] = ics->window_sequence[0];
 658     ics->window_sequence[0] = get_bits(gb, 2);
 659     ics->use_kb_window[1]   = ics->use_kb_window[0];
 660     ics->use_kb_window[0]   = get_bits1(gb);
 661     ics->num_window_groups  = 1;
 662     ics->group_len[0]       = 1;
 663     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 664         int i;
 665         ics->max_sfb = get_bits(gb, 4);
 666         for (i = 0; i < 7; i++) {
 667             if (get_bits1(gb)) {
 668                 ics->group_len[ics->num_window_groups - 1]++;
 669             } else {
 670                 ics->num_window_groups++;
 671                 ics->group_len[ics->num_window_groups - 1] = 1;
 672             }
 673         }
 674         ics->num_windows       = 8;
 675         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 676         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 677         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 678         ics->predictor_present = 0;
 679     } else {
 680         ics->max_sfb               = get_bits(gb, 6);
 681         ics->num_windows           = 1;
 682         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 683         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 684         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 685         ics->predictor_present     = get_bits1(gb);
 686         ics->predictor_reset_group = 0;
 687         if (ics->predictor_present) {
 688             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 689                 if (decode_prediction(ac, ics, gb)) {
 690                     memset(ics, 0, sizeof(IndividualChannelStream));
 691                     return -1;
 692                 }
 693             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 694                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 695                 memset(ics, 0, sizeof(IndividualChannelStream));
 696                 return -1;
 697             } else {
 698                 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
 699                 memset(ics, 0, sizeof(IndividualChannelStream));
 700                 return -1;
 701             }
 702         }
 703     }
 704
 705     if (ics->max_sfb > ics->num_swb) {
 706         av_log(ac->avctx, AV_LOG_ERROR,
 707                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 708                ics->max_sfb, ics->num_swb);
 709         memset(ics, 0, sizeof(IndividualChannelStream));
 710         return -1;
 711     }
 712
 713     return 0;
 714 }
 715
 716 /**
 717  * Decode band types (section_data payload); reference: table 4.46.
 718  *
 719  * @param   band_type           array of the used band type
 720  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 721  *
 722  * @return  Returns error status. 0 - OK, !0 - error
 723  */
 724 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 725                              int band_type_run_end[120], GetBitContext *gb,
 726                              IndividualChannelStream *ics)
 727 {
 728     int g, idx = 0;
 729     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 730     for (g = 0; g < ics->num_window_groups; g++) {
 731         int k = 0;
 732         while (k < ics->max_sfb) {
 733             uint8_t sect_end = k;
 734             int sect_len_incr;
 735             int sect_band_type = get_bits(gb, 4);
 736             if (sect_band_type == 12) {
 737                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 738                 return -1;
 739             }
 740             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 741                 sect_end += sect_len_incr;
 742             sect_end += sect_len_incr;
 743             if (get_bits_left(gb) < 0) {
 744                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 745                 return -1;
 746             }
 747             if (sect_end > ics->max_sfb) {
 748                 av_log(ac->avctx, AV_LOG_ERROR,
 749                        "Number of bands (%d) exceeds limit (%d).\n",
 750                        sect_end, ics->max_sfb);
 751                 return -1;
 752             }
 753             for (; k < sect_end; k++) {
 754                 band_type        [idx]   = sect_band_type;
 755                 band_type_run_end[idx++] = sect_end;
 756             }
 757         }
 758     }
 759     return 0;
 760 }
 761
 762 /**
 763  * Decode scalefactors; reference: table 4.47.
 764  *
 765  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 766  * @param   band_type           array of the used band type
 767  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 768  * @param   sf                  array of scalefactors or intensity stereo positions
 769  *
 770  * @return  Returns error status. 0 - OK, !0 - error
 771  */
 772 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 773                                unsigned int global_gain,
 774                                IndividualChannelStream *ics,
 775                                enum BandType band_type[120],
 776                                int band_type_run_end[120])
 777 {
 778     const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
 779     int g, i, idx = 0;
 780     int offset[3] = { global_gain, global_gain - 90, 100 };
 781     int noise_flag = 1;
 782     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 783     for (g = 0; g < ics->num_window_groups; g++) {
 784         for (i = 0; i < ics->max_sfb;) {
 785             int run_end = band_type_run_end[idx];
 786             if (band_type[idx] == ZERO_BT) {
 787                 for (; i < run_end; i++, idx++)
 788                     sf[idx] = 0.;
 789             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 790                 for (; i < run_end; i++, idx++) {
 791                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 792                     if (offset[2] > 255U) {
 793                         av_log(ac->avctx, AV_LOG_ERROR,
 794                                "%s (%d) out of range.\n", sf_str[2], offset[2]);
 795                         return -1;
 796                     }
 797                     sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
 798                 }
 799             } else if (band_type[idx] == NOISE_BT) {
 800                 for (; i < run_end; i++, idx++) {
 801                     if (noise_flag-- > 0)
 802                         offset[1] += get_bits(gb, 9) - 256;
 803                     else
 804                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 805                     if (offset[1] > 255U) {
 806                         av_log(ac->avctx, AV_LOG_ERROR,
 807                                "%s (%d) out of range.\n", sf_str[1], offset[1]);
 808                         return -1;
 809                     }
 810                     sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
 811                 }
 812             } else {
 813                 for (; i < run_end; i++, idx++) {
 814                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 815                     if (offset[0] > 255U) {
 816                         av_log(ac->avctx, AV_LOG_ERROR,
 817                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 818                         return -1;
 819                     }
 820                     sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
 821                 }
 822             }
 823         }
 824     }
 825     return 0;
 826 }
 827
 828 /**
 829  * Decode pulse data; reference: table 4.7.
 830  */
 831 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 832                          const uint16_t *swb_offset, int num_swb)
 833 {
 834     int i, pulse_swb;
 835     pulse->num_pulse = get_bits(gb, 2) + 1;
 836     pulse_swb        = get_bits(gb, 6);
 837     if (pulse_swb >= num_swb)
 838         return -1;
 839     pulse->pos[0]    = swb_offset[pulse_swb];
 840     pulse->pos[0]   += get_bits(gb, 5);
 841     if (pulse->pos[0] > 1023)
 842         return -1;
 843     pulse->amp[0]    = get_bits(gb, 4);
 844     for (i = 1; i < pulse->num_pulse; i++) {
 845         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 846         if (pulse->pos[i] > 1023)
 847             return -1;
 848         pulse->amp[i] = get_bits(gb, 4);
 849     }
 850     return 0;
 851 }
 852
 853 /**
 854  * Decode Temporal Noise Shaping data; reference: table 4.48.
 855  *
 856  * @return  Returns error status. 0 - OK, !0 - error
 857  */
 858 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 859                       GetBitContext *gb, const IndividualChannelStream *ics)
 860 {
 861     int w, filt, i, coef_len, coef_res, coef_compress;
 862     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 863     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 864     for (w = 0; w < ics->num_windows; w++) {
 865         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 866             coef_res = get_bits1(gb);
 867
 868             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 869                 int tmp2_idx;
 870                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 871
 872                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 873                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 874                            tns->order[w][filt], tns_max_order);
 875                     tns->order[w][filt] = 0;
 876                     return -1;
 877                 }
 878                 if (tns->order[w][filt]) {
 879                     tns->direction[w][filt] = get_bits1(gb);
 880                     coef_compress = get_bits1(gb);
 881                     coef_len = coef_res + 3 - coef_compress;
 882                     tmp2_idx = 2 * coef_compress + coef_res;
 883
 884                     for (i = 0; i < tns->order[w][filt]; i++)
 885                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 886                 }
 887             }
 888         }
 889     }
 890     return 0;
 891 }
 892
 893 /**
 894  * Decode Mid/Side data; reference: table 4.54.
 895  *
 896  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 897  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 898  *                      [3] reserved for scalable AAC
 899  */
 900 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 901                                    int ms_present)
 902 {
 903     int idx;
 904     if (ms_present == 1) {
 905         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 906             cpe->ms_mask[idx] = get_bits1(gb);
 907     } else if (ms_present == 2) {
 908         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 909     }
 910 }
 911
 912 #ifndef VMUL2
 913 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 914                            const float *scale)
 915 {
 916     float s = *scale;
 917     *dst++ = v[idx    & 15] * s;
 918     *dst++ = v[idx>>4 & 15] * s;
 919     return dst;
 920 }
 921 #endif
 922
 923 #ifndef VMUL4
 924 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 925                            const float *scale)
 926 {
 927     float s = *scale;
 928     *dst++ = v[idx    & 3] * s;
 929     *dst++ = v[idx>>2 & 3] * s;
 930     *dst++ = v[idx>>4 & 3] * s;
 931     *dst++ = v[idx>>6 & 3] * s;
 932     return dst;
 933 }
 934 #endif
 935
 936 #ifndef VMUL2S
 937 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 938                             unsigned sign, const float *scale)
 939 {
 940     union float754 s0, s1;
 941
 942     s0.f = s1.f = *scale;
 943     s0.i ^= sign >> 1 << 31;
 944     s1.i ^= sign      << 31;
 945
 946     *dst++ = v[idx    & 15] * s0.f;
 947     *dst++ = v[idx>>4 & 15] * s1.f;
 948
 949     return dst;
 950 }
 951 #endif
 952
 953 #ifndef VMUL4S
 954 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 955                             unsigned sign, const float *scale)
 956 {
 957     unsigned nz = idx >> 12;
 958     union float754 s = { .f = *scale };
 959     union float754 t;
 960
 961     t.i = s.i ^ (sign & 1<<31);
 962     *dst++ = v[idx    & 3] * t.f;
 963
 964     sign <<= nz & 1; nz >>= 1;
 965     t.i = s.i ^ (sign & 1<<31);
 966     *dst++ = v[idx>>2 & 3] * t.f;
 967
 968     sign <<= nz & 1; nz >>= 1;
 969     t.i = s.i ^ (sign & 1<<31);
 970     *dst++ = v[idx>>4 & 3] * t.f;
 971
 972     sign <<= nz & 1; nz >>= 1;
 973     t.i = s.i ^ (sign & 1<<31);
 974     *dst++ = v[idx>>6 & 3] * t.f;
 975
 976     return dst;
 977 }
 978 #endif
 979
 980 /**
 981  * Decode spectral data; reference: table 4.50.
 982  * Dequantize and scale spectral data; reference: 4.6.3.3.
 983  *
 984  * @param   coef            array of dequantized, scaled spectral data
 985  * @param   sf              array of scalefactors or intensity stereo positions
 986  * @param   pulse_present   set if pulses are present
 987  * @param   pulse           pointer to pulse data struct
 988  * @param   band_type       array of the used band type
 989  *
 990  * @return  Returns error status. 0 - OK, !0 - error
 991  */
 992 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 993                                        GetBitContext *gb, const float sf[120],
 994                                        int pulse_present, const Pulse *pulse,
 995                                        const IndividualChannelStream *ics,
 996                                        enum BandType band_type[120])
 997 {
 998     int i, k, g, idx = 0;
 999     const int c = 1024 / ics->num_windows;
1000     const uint16_t *offsets = ics->swb_offset;
1001     float *coef_base = coef;
1002     int err_idx;
1003
1004     for (g = 0; g < ics->num_windows; g++)
1005         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1006
1007     for (g = 0; g < ics->num_window_groups; g++) {
1008         unsigned g_len = ics->group_len[g];
1009
1010         for (i = 0; i < ics->max_sfb; i++, idx++) {
1011             const unsigned cbt_m1 = band_type[idx] - 1;
1012             float *cfo = coef + offsets[i];
1013             int off_len = offsets[i + 1] - offsets[i];
1014             int group;
1015
1016             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1017                 for (group = 0; group < g_len; group++, cfo+=128) {
1018                     memset(cfo, 0, off_len * sizeof(float));
1019                 }
1020             } else if (cbt_m1 == NOISE_BT - 1) {
1021                 for (group = 0; group < g_len; group++, cfo+=128) {
1022                     float scale;
1023                     float band_energy;
1024
1025                     for (k = 0; k < off_len; k++) {
1026                         ac->random_state  = lcg_random(ac->random_state);
1027                         cfo[k] = ac->random_state;
1028                     }
1029
1030                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1031                     scale = sf[idx] / sqrtf(band_energy);
1032                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1033                 }
1034             } else {
1035                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1036                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1037                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1038                 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1039                 OPEN_READER(re, gb);
1040
1041                 switch (cbt_m1 >> 1) {
1042                 case 0:
1043                     for (group = 0; group < g_len; group++, cfo+=128) {
1044                         float *cf = cfo;
1045                         int len = off_len;
1046
1047                         do {
1048                             int code;
1049                             unsigned cb_idx;
1050
1051                             UPDATE_CACHE(re, gb);
1052                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1053
1054                             if (code >= cb_size) {
1055                                 err_idx = code;
1056                                 goto err_cb_overflow;
1057                             }
1058
1059                             cb_idx = cb_vector_idx[code];
1060                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1061                         } while (len -= 4);
1062                     }
1063                     break;
1064
1065                 case 1:
1066                     for (group = 0; group < g_len; group++, cfo+=128) {
1067                         float *cf = cfo;
1068                         int len = off_len;
1069
1070                         do {
1071                             int code;
1072                             unsigned nnz;
1073                             unsigned cb_idx;
1074                             uint32_t bits;
1075
1076                             UPDATE_CACHE(re, gb);
1077                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1078
1079                             if (code >= cb_size) {
1080                                 err_idx = code;
1081                                 goto err_cb_overflow;
1082                             }
1083
1084 #if MIN_CACHE_BITS < 20
1085                             UPDATE_CACHE(re, gb);
1086 #endif
1087                             cb_idx = cb_vector_idx[code];
1088                             nnz = cb_idx >> 8 & 15;
1089                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1090                             LAST_SKIP_BITS(re, gb, nnz);
1091                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1092                         } while (len -= 4);
1093                     }
1094                     break;
1095
1096                 case 2:
1097                     for (group = 0; group < g_len; group++, cfo+=128) {
1098                         float *cf = cfo;
1099                         int len = off_len;
1100
1101                         do {
1102                             int code;
1103                             unsigned cb_idx;
1104
1105                             UPDATE_CACHE(re, gb);
1106                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1107
1108                             if (code >= cb_size) {
1109                                 err_idx = code;
1110                                 goto err_cb_overflow;
1111                             }
1112
1113                             cb_idx = cb_vector_idx[code];
1114                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1115                         } while (len -= 2);
1116                     }
1117                     break;
1118
1119                 case 3:
1120                 case 4:
1121                     for (group = 0; group < g_len; group++, cfo+=128) {
1122                         float *cf = cfo;
1123                         int len = off_len;
1124
1125                         do {
1126                             int code;
1127                             unsigned nnz;
1128                             unsigned cb_idx;
1129                             unsigned sign;
1130
1131                             UPDATE_CACHE(re, gb);
1132                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1133
1134                             if (code >= cb_size) {
1135                                 err_idx = code;
1136                                 goto err_cb_overflow;
1137                             }
1138
1139                             cb_idx = cb_vector_idx[code];
1140                             nnz = cb_idx >> 8 & 15;
1141                             sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1142                             LAST_SKIP_BITS(re, gb, nnz);
1143                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1144                         } while (len -= 2);
1145                     }
1146                     break;
1147
1148                 default:
1149                     for (group = 0; group < g_len; group++, cfo+=128) {
1150                         float *cf = cfo;
1151                         uint32_t *icf = (uint32_t *) cf;
1152                         int len = off_len;
1153
1154                         do {
1155                             int code;
1156                             unsigned nzt, nnz;
1157                             unsigned cb_idx;
1158                             uint32_t bits;
1159                             int j;
1160
1161                             UPDATE_CACHE(re, gb);
1162                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1163
1164                             if (!code) {
1165                                 *icf++ = 0;
1166                                 *icf++ = 0;
1167                                 continue;
1168                             }
1169
1170                             if (code >= cb_size) {
1171                                 err_idx = code;
1172                                 goto err_cb_overflow;
1173                             }
1174
1175                             cb_idx = cb_vector_idx[code];
1176                             nnz = cb_idx >> 12;
1177                             nzt = cb_idx >> 8;
1178                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1179                             LAST_SKIP_BITS(re, gb, nnz);
1180
1181                             for (j = 0; j < 2; j++) {
1182                                 if (nzt & 1<<j) {
1183                                     uint32_t b;
1184                                     int n;
1185                                     /* The total length of escape_sequence must be < 22 bits according
1186                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1187                                     UPDATE_CACHE(re, gb);
1188                                     b = GET_CACHE(re, gb);
1189                                     b = 31 - av_log2(~b);
1190
1191                                     if (b > 8) {
1192                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1193                                         return -1;
1194                                     }
1195
1196 #if MIN_CACHE_BITS < 21
1197                                     LAST_SKIP_BITS(re, gb, b + 1);
1198                                     UPDATE_CACHE(re, gb);
1199 #else
1200                                     SKIP_BITS(re, gb, b + 1);
1201 #endif
1202                                     b += 4;
1203                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1204                                     LAST_SKIP_BITS(re, gb, b);
1205                                     *icf++ = cbrt_tab[n] | (bits & 1<<31);
1206                                     bits <<= 1;
1207                                 } else {
1208                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1209                                     *icf++ = (bits & 1<<31) | v;
1210                                     bits <<= !!v;
1211                                 }
1212                                 cb_idx >>= 4;
1213                             }
1214                         } while (len -= 2);
1215
1216                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1217                     }
1218                 }
1219
1220                 CLOSE_READER(re, gb);
1221             }
1222         }
1223         coef += g_len << 7;
1224     }
1225
1226     if (pulse_present) {
1227         idx = 0;
1228         for (i = 0; i < pulse->num_pulse; i++) {
1229             float co = coef_base[ pulse->pos[i] ];
1230             while (offsets[idx + 1] <= pulse->pos[i])
1231                 idx++;
1232             if (band_type[idx] != NOISE_BT && sf[idx]) {
1233                 float ico = -pulse->amp[i];
1234                 if (co) {
1235                     co /= sf[idx];
1236                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1237                 }
1238                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1239             }
1240         }
1241     }
1242     return 0;
1243
1244 err_cb_overflow:
1245     av_log(ac->avctx, AV_LOG_ERROR,
1246            "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1247            band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1248     return -1;
1249 }
1250
1251 static av_always_inline float flt16_round(float pf)
1252 {
1253     union float754 tmp;
1254     tmp.f = pf;
1255     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1256     return tmp.f;
1257 }
1258
1259 static av_always_inline float flt16_even(float pf)
1260 {
1261     union float754 tmp;
1262     tmp.f = pf;
1263     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1264     return tmp.f;
1265 }
1266
1267 static av_always_inline float flt16_trunc(float pf)
1268 {
1269     union float754 pun;
1270     pun.f = pf;
1271     pun.i &= 0xFFFF0000U;
1272     return pun.f;
1273 }
1274
1275 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1276                     int output_enable)
1277 {
1278     const float a     = 0.953125; // 61.0 / 64
1279     const float alpha = 0.90625;  // 29.0 / 32
1280     float e0, e1;
1281     float pv;
1282     float k1, k2;
1283
1284     k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1285     k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1286
1287     pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1288     if (output_enable)
1289         *coef += pv * ac->sf_scale;
1290
1291     e0 = *coef / ac->sf_scale;
1292     e1 = e0 - k1 * ps->r0;
1293
1294     ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1295     ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1296     ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1297     ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1298
1299     ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1300     ps->r0 = flt16_trunc(a * e0);
1301 }
1302
1303 /**
1304  * Apply AAC-Main style frequency domain prediction.
1305  */
1306 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1307 {
1308     int sfb, k;
1309
1310     if (!sce->ics.predictor_initialized) {
1311         reset_all_predictors(sce->predictor_state);
1312         sce->ics.predictor_initialized = 1;
1313     }
1314
1315     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1316         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1317             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1318                 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1319                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1320             }
1321         }
1322         if (sce->ics.predictor_reset_group)
1323             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1324     } else
1325         reset_all_predictors(sce->predictor_state);
1326 }
1327
1328 /**
1329  * Decode an individual_channel_stream payload; reference: table 4.44.
1330  *
1331  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1332  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1333  *
1334  * @return  Returns error status. 0 - OK, !0 - error
1335  */
1336 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1337                       GetBitContext *gb, int common_window, int scale_flag)
1338 {
1339     Pulse pulse;
1340     TemporalNoiseShaping    *tns = &sce->tns;
1341     IndividualChannelStream *ics = &sce->ics;
1342     float *out = sce->coeffs;
1343     int global_gain, pulse_present = 0;
1344
1345     /* This assignment is to silence a GCC warning about the variable being used
1346      * uninitialized when in fact it always is.
1347      */
1348     pulse.num_pulse = 0;
1349
1350     global_gain = get_bits(gb, 8);
1351
1352     if (!common_window && !scale_flag) {
1353         if (decode_ics_info(ac, ics, gb, 0) < 0)
1354             return -1;
1355     }
1356
1357     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1358         return -1;
1359     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1360         return -1;
1361
1362     pulse_present = 0;
1363     if (!scale_flag) {
1364         if ((pulse_present = get_bits1(gb))) {
1365             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1366                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1367                 return -1;
1368             }
1369             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1370                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1371                 return -1;
1372             }
1373         }
1374         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1375             return -1;
1376         if (get_bits1(gb)) {
1377             av_log_missing_feature(ac->avctx, "SSR", 1);
1378             return -1;
1379         }
1380     }
1381
1382     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1383         return -1;
1384
1385     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1386         apply_prediction(ac, sce);
1387
1388     return 0;
1389 }
1390
1391 /**
1392  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1393  */
1394 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1395 {
1396     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1397     float *ch0 = cpe->ch[0].coeffs;
1398     float *ch1 = cpe->ch[1].coeffs;
1399     int g, i, group, idx = 0;
1400     const uint16_t *offsets = ics->swb_offset;
1401     for (g = 0; g < ics->num_window_groups; g++) {
1402         for (i = 0; i < ics->max_sfb; i++, idx++) {
1403             if (cpe->ms_mask[idx] &&
1404                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1405                 for (group = 0; group < ics->group_len[g]; group++) {
1406                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1407                                               ch1 + group * 128 + offsets[i],
1408                                               offsets[i+1] - offsets[i]);
1409                 }
1410             }
1411         }
1412         ch0 += ics->group_len[g] * 128;
1413         ch1 += ics->group_len[g] * 128;
1414     }
1415 }
1416
1417 /**
1418  * intensity stereo decoding; reference: 4.6.8.2.3
1419  *
1420  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1421  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1422  *                      [3] reserved for scalable AAC
1423  */
1424 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1425 {
1426     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1427     SingleChannelElement         *sce1 = &cpe->ch[1];
1428     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1429     const uint16_t *offsets = ics->swb_offset;
1430     int g, group, i, k, idx = 0;
1431     int c;
1432     float scale;
1433     for (g = 0; g < ics->num_window_groups; g++) {
1434         for (i = 0; i < ics->max_sfb;) {
1435             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1436                 const int bt_run_end = sce1->band_type_run_end[idx];
1437                 for (; i < bt_run_end; i++, idx++) {
1438                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1439                     if (ms_present)
1440                         c *= 1 - 2 * cpe->ms_mask[idx];
1441                     scale = c * sce1->sf[idx];
1442                     for (group = 0; group < ics->group_len[g]; group++)
1443                         for (k = offsets[i]; k < offsets[i + 1]; k++)
1444                             coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1445                 }
1446             } else {
1447                 int bt_run_end = sce1->band_type_run_end[idx];
1448                 idx += bt_run_end - i;
1449                 i    = bt_run_end;
1450             }
1451         }
1452         coef0 += ics->group_len[g] * 128;
1453         coef1 += ics->group_len[g] * 128;
1454     }
1455 }
1456
1457 /**
1458  * Decode a channel_pair_element; reference: table 4.4.
1459  *
1460  * @param   elem_id Identifies the instance of a syntax element.
1461  *
1462  * @return  Returns error status. 0 - OK, !0 - error
1463  */
1464 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1465 {
1466     int i, ret, common_window, ms_present = 0;
1467
1468     common_window = get_bits1(gb);
1469     if (common_window) {
1470         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1471             return -1;
1472         i = cpe->ch[1].ics.use_kb_window[0];
1473         cpe->ch[1].ics = cpe->ch[0].ics;
1474         cpe->ch[1].ics.use_kb_window[1] = i;
1475         ms_present = get_bits(gb, 2);
1476         if (ms_present == 3) {
1477             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1478             return -1;
1479         } else if (ms_present)
1480             decode_mid_side_stereo(cpe, gb, ms_present);
1481     }
1482     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1483         return ret;
1484     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1485         return ret;
1486
1487     if (common_window) {
1488         if (ms_present)
1489             apply_mid_side_stereo(ac, cpe);
1490         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1491             apply_prediction(ac, &cpe->ch[0]);
1492             apply_prediction(ac, &cpe->ch[1]);
1493         }
1494     }
1495
1496     apply_intensity_stereo(cpe, ms_present);
1497     return 0;
1498 }
1499
1500 /**
1501  * Decode coupling_channel_element; reference: table 4.8.
1502  *
1503  * @param   elem_id Identifies the instance of a syntax element.
1504  *
1505  * @return  Returns error status. 0 - OK, !0 - error
1506  */
1507 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1508 {
1509     int num_gain = 0;
1510     int c, g, sfb, ret;
1511     int sign;
1512     float scale;
1513     SingleChannelElement *sce = &che->ch[0];
1514     ChannelCoupling     *coup = &che->coup;
1515
1516     coup->coupling_point = 2 * get_bits1(gb);
1517     coup->num_coupled = get_bits(gb, 3);
1518     for (c = 0; c <= coup->num_coupled; c++) {
1519         num_gain++;
1520         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1521         coup->id_select[c] = get_bits(gb, 4);
1522         if (coup->type[c] == TYPE_CPE) {
1523             coup->ch_select[c] = get_bits(gb, 2);
1524             if (coup->ch_select[c] == 3)
1525                 num_gain++;
1526         } else
1527             coup->ch_select[c] = 2;
1528     }
1529     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1530
1531     sign  = get_bits(gb, 1);
1532     scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1533
1534     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1535         return ret;
1536
1537     for (c = 0; c < num_gain; c++) {
1538         int idx  = 0;
1539         int cge  = 1;
1540         int gain = 0;
1541         float gain_cache = 1.;
1542         if (c) {
1543             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1544             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1545             gain_cache = pow(scale, -gain);
1546         }
1547         if (coup->coupling_point == AFTER_IMDCT) {
1548             coup->gain[c][0] = gain_cache;
1549         } else {
1550             for (g = 0; g < sce->ics.num_window_groups; g++) {
1551                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1552                     if (sce->band_type[idx] != ZERO_BT) {
1553                         if (!cge) {
1554                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1555                             if (t) {
1556                                 int s = 1;
1557                                 t = gain += t;
1558                                 if (sign) {
1559                                     s  -= 2 * (t & 0x1);
1560                                     t >>= 1;
1561                                 }
1562                                 gain_cache = pow(scale, -t) * s;
1563                             }
1564                         }
1565                         coup->gain[c][idx] = gain_cache;
1566                     }
1567                 }
1568             }
1569         }
1570     }
1571     return 0;
1572 }
1573
1574 /**
1575  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1576  *
1577  * @return  Returns number of bytes consumed.
1578  */
1579 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1580                                          GetBitContext *gb)
1581 {
1582     int i;
1583     int num_excl_chan = 0;
1584
1585     do {
1586         for (i = 0; i < 7; i++)
1587             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1588     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1589
1590     return num_excl_chan / 7;
1591 }
1592
1593 /**
1594  * Decode dynamic range information; reference: table 4.52.
1595  *
1596  * @param   cnt length of TYPE_FIL syntactic element in bytes
1597  *
1598  * @return  Returns number of bytes consumed.
1599  */
1600 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1601                                 GetBitContext *gb, int cnt)
1602 {
1603     int n             = 1;
1604     int drc_num_bands = 1;
1605     int i;
1606
1607     /* pce_tag_present? */
1608     if (get_bits1(gb)) {
1609         che_drc->pce_instance_tag  = get_bits(gb, 4);
1610         skip_bits(gb, 4); // tag_reserved_bits
1611         n++;
1612     }
1613
1614     /* excluded_chns_present? */
1615     if (get_bits1(gb)) {
1616         n += decode_drc_channel_exclusions(che_drc, gb);
1617     }
1618
1619     /* drc_bands_present? */
1620     if (get_bits1(gb)) {
1621         che_drc->band_incr            = get_bits(gb, 4);
1622         che_drc->interpolation_scheme = get_bits(gb, 4);
1623         n++;
1624         drc_num_bands += che_drc->band_incr;
1625         for (i = 0; i < drc_num_bands; i++) {
1626             che_drc->band_top[i] = get_bits(gb, 8);
1627             n++;
1628         }
1629     }
1630
1631     /* prog_ref_level_present? */
1632     if (get_bits1(gb)) {
1633         che_drc->prog_ref_level = get_bits(gb, 7);
1634         skip_bits1(gb); // prog_ref_level_reserved_bits
1635         n++;
1636     }
1637
1638     for (i = 0; i < drc_num_bands; i++) {
1639         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1640         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1641         n++;
1642     }
1643
1644     return n;
1645 }
1646
1647 /**
1648  * Decode extension data (incomplete); reference: table 4.51.
1649  *
1650  * @param   cnt length of TYPE_FIL syntactic element in bytes
1651  *
1652  * @return Returns number of bytes consumed
1653  */
1654 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1655                                     ChannelElement *che, enum RawDataBlockType elem_type)
1656 {
1657     int crc_flag = 0;
1658     int res = cnt;
1659     switch (get_bits(gb, 4)) { // extension type
1660     case EXT_SBR_DATA_CRC:
1661         crc_flag++;
1662     case EXT_SBR_DATA:
1663         if (!che) {
1664             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1665             return res;
1666         } else if (!ac->m4ac.sbr) {
1667             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1668             skip_bits_long(gb, 8 * cnt - 4);
1669             return res;
1670         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1671             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1672             skip_bits_long(gb, 8 * cnt - 4);
1673             return res;
1674         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1675             ac->m4ac.sbr = 1;
1676             ac->m4ac.ps = 1;
1677             output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1678         } else {
1679             ac->m4ac.sbr = 1;
1680         }
1681         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1682         break;
1683     case EXT_DYNAMIC_RANGE:
1684         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1685         break;
1686     case EXT_FILL:
1687     case EXT_FILL_DATA:
1688     case EXT_DATA_ELEMENT:
1689     default:
1690         skip_bits_long(gb, 8 * cnt - 4);
1691         break;
1692     };
1693     return res;
1694 }
1695
1696 /**
1697  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1698  *
1699  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1700  * @param   coef    spectral coefficients
1701  */
1702 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1703                       IndividualChannelStream *ics, int decode)
1704 {
1705     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1706     int w, filt, m, i;
1707     int bottom, top, order, start, end, size, inc;
1708     float lpc[TNS_MAX_ORDER];
1709
1710     for (w = 0; w < ics->num_windows; w++) {
1711         bottom = ics->num_swb;
1712         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1713             top    = bottom;
1714             bottom = FFMAX(0, top - tns->length[w][filt]);
1715             order  = tns->order[w][filt];
1716             if (order == 0)
1717                 continue;
1718
1719             // tns_decode_coef
1720             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1721
1722             start = ics->swb_offset[FFMIN(bottom, mmm)];
1723             end   = ics->swb_offset[FFMIN(   top, mmm)];
1724             if ((size = end - start) <= 0)
1725                 continue;
1726             if (tns->direction[w][filt]) {
1727                 inc = -1;
1728                 start = end - 1;
1729             } else {
1730                 inc = 1;
1731             }
1732             start += w * 128;
1733
1734             // ar filter
1735             for (m = 0; m < size; m++, start += inc)
1736                 for (i = 1; i <= FFMIN(m, order); i++)
1737                     coef[start] -= coef[start - i * inc] * lpc[i - 1];
1738         }
1739     }
1740 }
1741
1742 /**
1743  * Conduct IMDCT and windowing.
1744  */
1745 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1746 {
1747     IndividualChannelStream *ics = &sce->ics;
1748     float *in    = sce->coeffs;
1749     float *out   = sce->ret;
1750     float *saved = sce->saved;
1751     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1752     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1753     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1754     float *buf  = ac->buf_mdct;
1755     float *temp = ac->temp;
1756     int i;
1757
1758     // imdct
1759     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1760         if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1761             av_log(ac->avctx, AV_LOG_WARNING,
1762                    "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1763                    "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1764         for (i = 0; i < 1024; i += 128)
1765             ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1766     } else
1767         ff_imdct_half(&ac->mdct, buf, in);
1768
1769     /* window overlapping
1770      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1771      * and long to short transitions are considered to be short to short
1772      * transitions. This leaves just two cases (long to long and short to short)
1773      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1774      */
1775     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1776             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1777         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
1778     } else {
1779         for (i = 0; i < 448; i++)
1780             out[i] = saved[i] + bias;
1781
1782         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1783             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
1784             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
1785             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
1786             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
1787             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
1788             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1789         } else {
1790             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
1791             for (i = 576; i < 1024; i++)
1792                 out[i] = buf[i-512] + bias;
1793         }
1794     }
1795
1796     // buffer update
1797     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1798         for (i = 0; i < 64; i++)
1799             saved[i] = temp[64 + i] - bias;
1800         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1801         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1802         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1803         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1804     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1805         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1806         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1807     } else { // LONG_STOP or ONLY_LONG
1808         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1809     }
1810 }
1811
1812 /**
1813  * Apply dependent channel coupling (applied before IMDCT).
1814  *
1815  * @param   index   index into coupling gain array
1816  */
1817 static void apply_dependent_coupling(AACContext *ac,
1818                                      SingleChannelElement *target,
1819                                      ChannelElement *cce, int index)
1820 {
1821     IndividualChannelStream *ics = &cce->ch[0].ics;
1822     const uint16_t *offsets = ics->swb_offset;
1823     float *dest = target->coeffs;
1824     const float *src = cce->ch[0].coeffs;
1825     int g, i, group, k, idx = 0;
1826     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1827         av_log(ac->avctx, AV_LOG_ERROR,
1828                "Dependent coupling is not supported together with LTP\n");
1829         return;
1830     }
1831     for (g = 0; g < ics->num_window_groups; g++) {
1832         for (i = 0; i < ics->max_sfb; i++, idx++) {
1833             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1834                 const float gain = cce->coup.gain[index][idx];
1835                 for (group = 0; group < ics->group_len[g]; group++) {
1836                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1837                         // XXX dsputil-ize
1838                         dest[group * 128 + k] += gain * src[group * 128 + k];
1839                     }
1840                 }
1841             }
1842         }
1843         dest += ics->group_len[g] * 128;
1844         src  += ics->group_len[g] * 128;
1845     }
1846 }
1847
1848 /**
1849  * Apply independent channel coupling (applied after IMDCT).
1850  *
1851  * @param   index   index into coupling gain array
1852  */
1853 static void apply_independent_coupling(AACContext *ac,
1854                                        SingleChannelElement *target,
1855                                        ChannelElement *cce, int index)
1856 {
1857     int i;
1858     const float gain = cce->coup.gain[index][0];
1859     const float bias = ac->add_bias;
1860     const float *src = cce->ch[0].ret;
1861     float *dest = target->ret;
1862     const int len = 1024 << (ac->m4ac.sbr == 1);
1863
1864     for (i = 0; i < len; i++)
1865         dest[i] += gain * (src[i] - bias);
1866 }
1867
1868 /**
1869  * channel coupling transformation interface
1870  *
1871  * @param   index   index into coupling gain array
1872  * @param   apply_coupling_method   pointer to (in)dependent coupling function
1873  */
1874 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1875                                    enum RawDataBlockType type, int elem_id,
1876                                    enum CouplingPoint coupling_point,
1877                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1878 {
1879     int i, c;
1880
1881     for (i = 0; i < MAX_ELEM_ID; i++) {
1882         ChannelElement *cce = ac->che[TYPE_CCE][i];
1883         int index = 0;
1884
1885         if (cce && cce->coup.coupling_point == coupling_point) {
1886             ChannelCoupling *coup = &cce->coup;
1887
1888             for (c = 0; c <= coup->num_coupled; c++) {
1889                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1890                     if (coup->ch_select[c] != 1) {
1891                         apply_coupling_method(ac, &cc->ch[0], cce, index);
1892                         if (coup->ch_select[c] != 0)
1893                             index++;
1894                     }
1895                     if (coup->ch_select[c] != 2)
1896                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
1897                 } else
1898                     index += 1 + (coup->ch_select[c] == 3);
1899             }
1900         }
1901     }
1902 }
1903
1904 /**
1905  * Convert spectral data to float samples, applying all supported tools as appropriate.
1906  */
1907 static void spectral_to_sample(AACContext *ac)
1908 {
1909     int i, type;
1910     float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1911     for (type = 3; type >= 0; type--) {
1912         for (i = 0; i < MAX_ELEM_ID; i++) {
1913             ChannelElement *che = ac->che[type][i];
1914             if (che) {
1915                 if (type <= TYPE_CPE)
1916                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1917                 if (che->ch[0].tns.present)
1918                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1919                 if (che->ch[1].tns.present)
1920                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1921                 if (type <= TYPE_CPE)
1922                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1923                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1924                     imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1925                     if (type == TYPE_CPE) {
1926                         imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1927                     }
1928                     if (ac->m4ac.sbr > 0) {
1929                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1930                     }
1931                 }
1932                 if (type <= TYPE_CCE)
1933                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1934             }
1935         }
1936     }
1937 }
1938
1939 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1940 {
1941     int size;
1942     AACADTSHeaderInfo hdr_info;
1943
1944     size = ff_aac_parse_header(gb, &hdr_info);
1945     if (size > 0) {
1946         if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1947             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1948             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1949             ac->m4ac.chan_config = hdr_info.chan_config;
1950             if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1951                 return -7;
1952             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1953                 return -7;
1954         } else if (ac->output_configured != OC_LOCKED) {
1955             ac->output_configured = OC_NONE;
1956         }
1957         if (ac->output_configured != OC_LOCKED) {
1958             ac->m4ac.sbr = -1;
1959             ac->m4ac.ps  = -1;
1960         }
1961         ac->m4ac.sample_rate     = hdr_info.sample_rate;
1962         ac->m4ac.sampling_index  = hdr_info.sampling_index;
1963         ac->m4ac.object_type     = hdr_info.object_type;
1964         if (!ac->avctx->sample_rate)
1965             ac->avctx->sample_rate = hdr_info.sample_rate;
1966         if (hdr_info.num_aac_frames == 1) {
1967             if (!hdr_info.crc_absent)
1968                 skip_bits(gb, 16);
1969         } else {
1970             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1971             return -1;
1972         }
1973     }
1974     return size;
1975 }
1976
1977 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1978                             int *data_size, AVPacket *avpkt)
1979 {
1980     const uint8_t *buf = avpkt->data;
1981     int buf_size = avpkt->size;
1982     AACContext *ac = avctx->priv_data;
1983     ChannelElement *che = NULL, *che_prev = NULL;
1984     GetBitContext gb;
1985     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1986     int err, elem_id, data_size_tmp;
1987     int buf_consumed;
1988     int samples = 0, multiplier;
1989     int buf_offset;
1990
1991     init_get_bits(&gb, buf, buf_size * 8);
1992
1993     if (show_bits(&gb, 12) == 0xfff) {
1994         if (parse_adts_frame_header(ac, &gb) < 0) {
1995             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1996             return -1;
1997         }
1998         if (ac->m4ac.sampling_index > 12) {
1999             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
2000             return -1;
2001         }
2002     }
2003
2004     memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
2005     // parse
2006     while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
2007         elem_id = get_bits(&gb, 4);
2008
2009         if (elem_type < TYPE_DSE) {
2010         if (!(che=get_che(ac, elem_type, elem_id))) {
2011             av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id);
2012             return -1;
2013         }
2014             samples = 1024;
2015         }
2016
2017         switch (elem_type) {
2018
2019         case TYPE_SCE:
2020             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2021             break;
2022
2023         case TYPE_CPE:
2024             err = decode_cpe(ac, &gb, che);
2025             break;
2026
2027         case TYPE_CCE:
2028             err = decode_cce(ac, &gb, che);
2029             break;
2030
2031         case TYPE_LFE:
2032             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2033             break;
2034
2035         case TYPE_DSE:
2036             err = skip_data_stream_element(ac, &gb);
2037             break;
2038
2039         case TYPE_PCE: {
2040             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2041             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2042             if ((err = decode_pce(ac, new_che_pos, &gb)))
2043                 break;
2044             if (ac->output_configured > OC_TRIAL_PCE)
2045                 av_log(avctx, AV_LOG_ERROR,
2046                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2047             else
2048                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2049             break;
2050         }
2051
2052         case TYPE_FIL:
2053             if (elem_id == 15)
2054                 elem_id += get_bits(&gb, 8) - 1;
2055             if (get_bits_left(&gb) < 8 * elem_id) {
2056                     av_log(avctx, AV_LOG_ERROR, overread_err);
2057                     return -1;
2058             }
2059             while (elem_id > 0)
2060                 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2061             err = 0; /* FIXME */
2062             break;
2063
2064         default:
2065             err = -1; /* should not happen, but keeps compiler happy */
2066             break;
2067         }
2068
2069         che_prev       = che;
2070         elem_type_prev = elem_type;
2071
2072         if (err)
2073             return err;
2074
2075         if (get_bits_left(&gb) < 3) {
2076             av_log(avctx, AV_LOG_ERROR, overread_err);
2077             return -1;
2078         }
2079     }
2080
2081     spectral_to_sample(ac);
2082
2083     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2084     samples <<= multiplier;
2085     if (ac->output_configured < OC_LOCKED) {
2086         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2087         avctx->frame_size = samples;
2088     }
2089
2090     data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2091     if (*data_size < data_size_tmp) {
2092         av_log(avctx, AV_LOG_ERROR,
2093                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2094                *data_size, data_size_tmp);
2095         return -1;
2096     }
2097     *data_size = data_size_tmp;
2098
2099     if (samples)
2100     ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2101
2102     if (ac->output_configured)
2103         ac->output_configured = OC_LOCKED;
2104
2105     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2106     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2107         if (buf[buf_offset])
2108             break;
2109
2110     return buf_size > buf_offset ? buf_consumed : buf_size;
2111 }
2112
2113 static av_cold int aac_decode_close(AVCodecContext *avctx)
2114 {
2115     AACContext *ac = avctx->priv_data;
2116     int i, type;
2117
2118     for (i = 0; i < MAX_ELEM_ID; i++) {
2119         for (type = 0; type < 4; type++) {
2120             if (ac->che[type][i])
2121                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2122             av_freep(&ac->che[type][i]);
2123         }
2124     }
2125
2126     ff_mdct_end(&ac->mdct);
2127     ff_mdct_end(&ac->mdct_small);
2128     return 0;
2129 }
2130
2131 AVCodec aac_decoder = {
2132     "aac",
2133     AVMEDIA_TYPE_AUDIO,
2134     CODEC_ID_AAC,
2135     sizeof(AACContext),
2136     aac_decode_init,
2137     NULL,
2138     aac_decode_close,
2139     aac_decode_frame,
2140     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2141     .sample_fmts = (const enum SampleFormat[]) {
2142         SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2143     },
2144     .channel_layouts = aac_channel_layout,
2145 };