git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * AAC decoder
  26  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  28  */
  29
  30 /*
  31  * supported tools
  32  *
  33  * Support?             Name
  34  * N (code in SoC repo) gain control
  35  * Y                    block switching
  36  * Y                    window shapes - standard
  37  * N                    window shapes - Low Delay
  38  * Y                    filterbank - standard
  39  * N (code in SoC repo) filterbank - Scalable Sample Rate
  40  * Y                    Temporal Noise Shaping
  41  * N (code in SoC repo) Long Term Prediction
  42  * Y                    intensity stereo
  43  * Y                    channel coupling
  44  * Y                    frequency domain prediction
  45  * Y                    Perceptual Noise Substitution
  46  * Y                    Mid/Side stereo
  47  * N                    Scalable Inverse AAC Quantization
  48  * N                    Frequency Selective Switch
  49  * N                    upsampling filter
  50  * Y                    quantization & coding - AAC
  51  * N                    quantization & coding - TwinVQ
  52  * N                    quantization & coding - BSAC
  53  * N                    AAC Error Resilience tools
  54  * N                    Error Resilience payload syntax
  55  * N                    Error Protection tool
  56  * N                    CELP
  57  * N                    Silence Compression
  58  * N                    HVXC
  59  * N                    HVXC 4kbits/s VR
  60  * N                    Structured Audio tools
  61  * N                    Structured Audio Sample Bank Format
  62  * N                    MIDI
  63  * N                    Harmonic and Individual Lines plus Noise
  64  * N                    Text-To-Speech Interface
  65  * Y                    Spectral Band Replication
  66  * Y (not in this code) Layer-1
  67  * Y (not in this code) Layer-2
  68  * Y (not in this code) Layer-3
  69  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  70  * N (planned)          Parametric Stereo
  71  * N                    Direct Stream Transfer
  72  *
  73  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  74  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  75            Parametric Stereo.
  76  */
  77
  78
  79 #include "avcodec.h"
  80 #include "internal.h"
  81 #include "get_bits.h"
  82 #include "dsputil.h"
  83 #include "fft.h"
  84 #include "lpc.h"
  85
  86 #include "aac.h"
  87 #include "aactab.h"
  88 #include "aacdectab.h"
  89 #include "cbrt_tablegen.h"
  90 #include "sbr.h"
  91 #include "aacsbr.h"
  92 #include "mpeg4audio.h"
  93 #include "aac_parser.h"
  94
  95 #include <assert.h>
  96 #include <errno.h>
  97 #include <math.h>
  98 #include <string.h>
  99
 100 #if ARCH_ARM
 101 #   include "arm/aac.h"
 102 #endif
 103
 104 union float754 {
 105     float f;
 106     uint32_t i;
 107 };
 108
 109 static VLC vlc_scalefactors;
 110 static VLC vlc_spectral[11];
 111
 112 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 113
 114 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 115 {
 116     /* Some buggy encoders appear to set all elem_ids to zero and rely on
 117     channels always occurring in the same order. This is expressly forbidden
 118     by the spec but we will try to work around it.
 119     */
 120     int err_printed = 0;
 121     while (ac->tags_seen_this_frame[type][elem_id] && elem_id < MAX_ELEM_ID) {
 122         if (ac->output_configured < OC_LOCKED && !err_printed) {
 123             av_log(ac->avctx, AV_LOG_WARNING, "Duplicate channel tag found, attempting to remap.\n");
 124             err_printed = 1;
 125         }
 126         elem_id++;
 127     }
 128     if (elem_id == MAX_ELEM_ID)
 129         return NULL;
 130     ac->tags_seen_this_frame[type][elem_id] = 1;
 131
 132     if (ac->tag_che_map[type][elem_id]) {
 133         return ac->tag_che_map[type][elem_id];
 134     }
 135     if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
 136         return NULL;
 137     }
 138     switch (ac->m4ac.chan_config) {
 139     case 7:
 140         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 141             ac->tags_mapped++;
 142             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 143         }
 144     case 6:
 145         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 146            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 147            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 148         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 151         }
 152     case 5:
 153         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 154             ac->tags_mapped++;
 155             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 156         }
 157     case 4:
 158         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 159             ac->tags_mapped++;
 160             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 161         }
 162     case 3:
 163     case 2:
 164         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 165             ac->tags_mapped++;
 166             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 167         } else if (ac->m4ac.chan_config == 2) {
 168             return NULL;
 169         }
 170     case 1:
 171         if (!ac->tags_mapped && type == TYPE_SCE) {
 172             ac->tags_mapped++;
 173             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 174         }
 175     default:
 176         return NULL;
 177     }
 178 }
 179
 180 /**
 181  * Check for the channel element in the current channel position configuration.
 182  * If it exists, make sure the appropriate element is allocated and map the
 183  * channel order to match the internal FFmpeg channel layout.
 184  *
 185  * @param   che_pos current channel position configuration
 186  * @param   type channel element type
 187  * @param   id channel element id
 188  * @param   channels count of the number of channels in the configuration
 189  *
 190  * @return  Returns error status. 0 - OK, !0 - error
 191  */
 192 static av_cold int che_configure(AACContext *ac,
 193                          enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 194                          int type, int id,
 195                          int *channels)
 196 {
 197     if (che_pos[type][id]) {
 198         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 199             return AVERROR(ENOMEM);
 200         ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
 201         if (type != TYPE_CCE) {
 202             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 203             if (type == TYPE_CPE) {
 204                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 205             }
 206         }
 207     } else {
 208         if (ac->che[type][id])
 209             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 210         av_freep(&ac->che[type][id]);
 211     }
 212     return 0;
 213 }
 214
 215 /**
 216  * Configure output channel order based on the current program configuration element.
 217  *
 218  * @param   che_pos current channel position configuration
 219  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 220  *
 221  * @return  Returns error status. 0 - OK, !0 - error
 222  */
 223 static av_cold int output_configure(AACContext *ac,
 224                             enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 225                             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 226                             int channel_config, enum OCStatus oc_type)
 227 {
 228     AVCodecContext *avctx = ac->avctx;
 229     int i, type, channels = 0, ret;
 230
 231     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 232
 233     if (channel_config) {
 234         for (i = 0; i < tags_per_config[channel_config]; i++) {
 235             if ((ret = che_configure(ac, che_pos,
 236                                      aac_channel_layout_map[channel_config - 1][i][0],
 237                                      aac_channel_layout_map[channel_config - 1][i][1],
 238                                      &channels)))
 239                 return ret;
 240         }
 241
 242         memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 243         ac->tags_mapped = 0;
 244
 245         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 246     } else {
 247         /* Allocate or free elements depending on if they are in the
 248          * current program configuration.
 249          *
 250          * Set up default 1:1 output mapping.
 251          *
 252          * For a 5.1 stream the output order will be:
 253          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 254          */
 255
 256         for (i = 0; i < MAX_ELEM_ID; i++) {
 257             for (type = 0; type < 4; type++) {
 258                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 259                     return ret;
 260             }
 261         }
 262
 263         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 264         ac->tags_mapped = 4 * MAX_ELEM_ID;
 265
 266         avctx->channel_layout = 0;
 267     }
 268
 269     avctx->channels = channels;
 270
 271     ac->output_configured = oc_type;
 272
 273     return 0;
 274 }
 275
 276 /**
 277  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 278  *
 279  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 280  * @param sce_map mono (Single Channel Element) map
 281  * @param type speaker type/position for these channels
 282  */
 283 static void decode_channel_map(enum ChannelPosition *cpe_map,
 284                                enum ChannelPosition *sce_map,
 285                                enum ChannelPosition type,
 286                                GetBitContext *gb, int n)
 287 {
 288     while (n--) {
 289         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 290         map[get_bits(gb, 4)] = type;
 291     }
 292 }
 293
 294 /**
 295  * Decode program configuration element; reference: table 4.2.
 296  *
 297  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 298  *
 299  * @return  Returns error status. 0 - OK, !0 - error
 300  */
 301 static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 302                       GetBitContext *gb)
 303 {
 304     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 305     int comment_len;
 306
 307     skip_bits(gb, 2);  // object_type
 308
 309     sampling_index = get_bits(gb, 4);
 310     if (ac->m4ac.sampling_index != sampling_index)
 311         av_log(ac->avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 312
 313     num_front       = get_bits(gb, 4);
 314     num_side        = get_bits(gb, 4);
 315     num_back        = get_bits(gb, 4);
 316     num_lfe         = get_bits(gb, 2);
 317     num_assoc_data  = get_bits(gb, 3);
 318     num_cc          = get_bits(gb, 4);
 319
 320     if (get_bits1(gb))
 321         skip_bits(gb, 4); // mono_mixdown_tag
 322     if (get_bits1(gb))
 323         skip_bits(gb, 4); // stereo_mixdown_tag
 324
 325     if (get_bits1(gb))
 326         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 327
 328     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 329     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 330     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 331     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 332
 333     skip_bits_long(gb, 4 * num_assoc_data);
 334
 335     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 336
 337     align_get_bits(gb);
 338
 339     /* comment field, first byte is length */
 340     comment_len = get_bits(gb, 8) * 8;
 341     if (get_bits_left(gb) < comment_len) {
 342         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 343         return -1;
 344     }
 345     skip_bits_long(gb, comment_len);
 346     return 0;
 347 }
 348
 349 /**
 350  * Set up channel positions based on a default channel configuration
 351  * as specified in table 1.17.
 352  *
 353  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 354  *
 355  * @return  Returns error status. 0 - OK, !0 - error
 356  */
 357 static av_cold int set_default_channel_config(AACContext *ac,
 358                                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 359                                       int channel_config)
 360 {
 361     if (channel_config < 1 || channel_config > 7) {
 362         av_log(ac->avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 363                channel_config);
 364         return -1;
 365     }
 366
 367     /* default channel configurations:
 368      *
 369      * 1ch : front center (mono)
 370      * 2ch : L + R (stereo)
 371      * 3ch : front center + L + R
 372      * 4ch : front center + L + R + back center
 373      * 5ch : front center + L + R + back stereo
 374      * 6ch : front center + L + R + back stereo + LFE
 375      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 376      */
 377
 378     if (channel_config != 2)
 379         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 380     if (channel_config > 1)
 381         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 382     if (channel_config == 4)
 383         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 384     if (channel_config > 4)
 385         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 386         = AAC_CHANNEL_BACK;  // back stereo
 387     if (channel_config > 5)
 388         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 389     if (channel_config == 7)
 390         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 391
 392     return 0;
 393 }
 394
 395 /**
 396  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 397  *
 398  * @return  Returns error status. 0 - OK, !0 - error
 399  */
 400 static int decode_ga_specific_config(AACContext *ac, GetBitContext *gb,
 401                                      int channel_config)
 402 {
 403     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 404     int extension_flag, ret;
 405
 406     if (get_bits1(gb)) { // frameLengthFlag
 407         av_log_missing_feature(ac->avctx, "960/120 MDCT window is", 1);
 408         return -1;
 409     }
 410
 411     if (get_bits1(gb))       // dependsOnCoreCoder
 412         skip_bits(gb, 14);   // coreCoderDelay
 413     extension_flag = get_bits1(gb);
 414
 415     if (ac->m4ac.object_type == AOT_AAC_SCALABLE ||
 416         ac->m4ac.object_type == AOT_ER_AAC_SCALABLE)
 417         skip_bits(gb, 3);     // layerNr
 418
 419     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 420     if (channel_config == 0) {
 421         skip_bits(gb, 4);  // element_instance_tag
 422         if ((ret = decode_pce(ac, new_che_pos, gb)))
 423             return ret;
 424     } else {
 425         if ((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
 426             return ret;
 427     }
 428     if ((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 429         return ret;
 430
 431     if (extension_flag) {
 432         switch (ac->m4ac.object_type) {
 433         case AOT_ER_BSAC:
 434             skip_bits(gb, 5);    // numOfSubFrame
 435             skip_bits(gb, 11);   // layer_length
 436             break;
 437         case AOT_ER_AAC_LC:
 438         case AOT_ER_AAC_LTP:
 439         case AOT_ER_AAC_SCALABLE:
 440         case AOT_ER_AAC_LD:
 441             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 442                                     * aacScalefactorDataResilienceFlag
 443                                     * aacSpectralDataResilienceFlag
 444                                     */
 445             break;
 446         }
 447         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 448     }
 449     return 0;
 450 }
 451
 452 /**
 453  * Decode audio specific configuration; reference: table 1.13.
 454  *
 455  * @param   data        pointer to AVCodecContext extradata
 456  * @param   data_size   size of AVCCodecContext extradata
 457  *
 458  * @return  Returns error status. 0 - OK, !0 - error
 459  */
 460 static int decode_audio_specific_config(AACContext *ac, void *data,
 461                                         int data_size)
 462 {
 463     GetBitContext gb;
 464     int i;
 465
 466     init_get_bits(&gb, data, data_size * 8);
 467
 468     if ((i = ff_mpeg4audio_get_config(&ac->m4ac, data, data_size)) < 0)
 469         return -1;
 470     if (ac->m4ac.sampling_index > 12) {
 471         av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
 472         return -1;
 473     }
 474
 475     skip_bits_long(&gb, i);
 476
 477     switch (ac->m4ac.object_type) {
 478     case AOT_AAC_MAIN:
 479     case AOT_AAC_LC:
 480         if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
 481             return -1;
 482         break;
 483     default:
 484         av_log(ac->avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 485                ac->m4ac.sbr == 1? "SBR+" : "", ac->m4ac.object_type);
 486         return -1;
 487     }
 488     return 0;
 489 }
 490
 491 /**
 492  * linear congruential pseudorandom number generator
 493  *
 494  * @param   previous_val    pointer to the current state of the generator
 495  *
 496  * @return  Returns a 32-bit pseudorandom integer
 497  */
 498 static av_always_inline int lcg_random(int previous_val)
 499 {
 500     return previous_val * 1664525 + 1013904223;
 501 }
 502
 503 static av_always_inline void reset_predict_state(PredictorState *ps)
 504 {
 505     ps->r0   = 0.0f;
 506     ps->r1   = 0.0f;
 507     ps->cor0 = 0.0f;
 508     ps->cor1 = 0.0f;
 509     ps->var0 = 1.0f;
 510     ps->var1 = 1.0f;
 511 }
 512
 513 static void reset_all_predictors(PredictorState *ps)
 514 {
 515     int i;
 516     for (i = 0; i < MAX_PREDICTORS; i++)
 517         reset_predict_state(&ps[i]);
 518 }
 519
 520 static void reset_predictor_group(PredictorState *ps, int group_num)
 521 {
 522     int i;
 523     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 524         reset_predict_state(&ps[i]);
 525 }
 526
 527 static av_cold int aac_decode_init(AVCodecContext *avctx)
 528 {
 529     AACContext *ac = avctx->priv_data;
 530     int i;
 531
 532     ac->avctx = avctx;
 533     ac->m4ac.sample_rate = avctx->sample_rate;
 534
 535     if (avctx->extradata_size > 0) {
 536         if (decode_audio_specific_config(ac, avctx->extradata, avctx->extradata_size))
 537             return -1;
 538     }
 539
 540     avctx->sample_fmt = SAMPLE_FMT_S16;
 541
 542     AAC_INIT_VLC_STATIC( 0, 304);
 543     AAC_INIT_VLC_STATIC( 1, 270);
 544     AAC_INIT_VLC_STATIC( 2, 550);
 545     AAC_INIT_VLC_STATIC( 3, 300);
 546     AAC_INIT_VLC_STATIC( 4, 328);
 547     AAC_INIT_VLC_STATIC( 5, 294);
 548     AAC_INIT_VLC_STATIC( 6, 306);
 549     AAC_INIT_VLC_STATIC( 7, 268);
 550     AAC_INIT_VLC_STATIC( 8, 510);
 551     AAC_INIT_VLC_STATIC( 9, 366);
 552     AAC_INIT_VLC_STATIC(10, 462);
 553
 554     ff_aac_sbr_init();
 555
 556     dsputil_init(&ac->dsp, avctx);
 557
 558     ac->random_state = 0x1f2e3d4c;
 559
 560     // -1024 - Compensate wrong IMDCT method.
 561     // 32768 - Required to scale values to the correct range for the bias method
 562     //         for float to int16 conversion.
 563
 564     if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
 565         ac->add_bias  = 385.0f;
 566         ac->sf_scale  = 1. / (-1024. * 32768.);
 567         ac->sf_offset = 0;
 568     } else {
 569         ac->add_bias  = 0.0f;
 570         ac->sf_scale  = 1. / -1024.;
 571         ac->sf_offset = 60;
 572     }
 573
 574 #if !CONFIG_HARDCODED_TABLES
 575     for (i = 0; i < 428; i++)
 576         ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.);
 577 #endif /* CONFIG_HARDCODED_TABLES */
 578
 579     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 580                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 581                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 582                     352);
 583
 584     ff_mdct_init(&ac->mdct, 11, 1, 1.0);
 585     ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
 586     // window initialization
 587     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 588     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 589     ff_init_ff_sine_windows(10);
 590     ff_init_ff_sine_windows( 7);
 591
 592     cbrt_tableinit();
 593
 594     return 0;
 595 }
 596
 597 /**
 598  * Skip data_stream_element; reference: table 4.10.
 599  */
 600 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 601 {
 602     int byte_align = get_bits1(gb);
 603     int count = get_bits(gb, 8);
 604     if (count == 255)
 605         count += get_bits(gb, 8);
 606     if (byte_align)
 607         align_get_bits(gb);
 608
 609     if (get_bits_left(gb) < 8 * count) {
 610         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 611         return -1;
 612     }
 613     skip_bits_long(gb, 8 * count);
 614     return 0;
 615 }
 616
 617 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 618                              GetBitContext *gb)
 619 {
 620     int sfb;
 621     if (get_bits1(gb)) {
 622         ics->predictor_reset_group = get_bits(gb, 5);
 623         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 624             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 625             return -1;
 626         }
 627     }
 628     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 629         ics->prediction_used[sfb] = get_bits1(gb);
 630     }
 631     return 0;
 632 }
 633
 634 /**
 635  * Decode Individual Channel Stream info; reference: table 4.6.
 636  *
 637  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 638  */
 639 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 640                            GetBitContext *gb, int common_window)
 641 {
 642     if (get_bits1(gb)) {
 643         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 644         memset(ics, 0, sizeof(IndividualChannelStream));
 645         return -1;
 646     }
 647     ics->window_sequence[1] = ics->window_sequence[0];
 648     ics->window_sequence[0] = get_bits(gb, 2);
 649     ics->use_kb_window[1]   = ics->use_kb_window[0];
 650     ics->use_kb_window[0]   = get_bits1(gb);
 651     ics->num_window_groups  = 1;
 652     ics->group_len[0]       = 1;
 653     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 654         int i;
 655         ics->max_sfb = get_bits(gb, 4);
 656         for (i = 0; i < 7; i++) {
 657             if (get_bits1(gb)) {
 658                 ics->group_len[ics->num_window_groups - 1]++;
 659             } else {
 660                 ics->num_window_groups++;
 661                 ics->group_len[ics->num_window_groups - 1] = 1;
 662             }
 663         }
 664         ics->num_windows       = 8;
 665         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 666         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 667         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 668         ics->predictor_present = 0;
 669     } else {
 670         ics->max_sfb               = get_bits(gb, 6);
 671         ics->num_windows           = 1;
 672         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 673         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 674         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 675         ics->predictor_present     = get_bits1(gb);
 676         ics->predictor_reset_group = 0;
 677         if (ics->predictor_present) {
 678             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 679                 if (decode_prediction(ac, ics, gb)) {
 680                     memset(ics, 0, sizeof(IndividualChannelStream));
 681                     return -1;
 682                 }
 683             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 684                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 685                 memset(ics, 0, sizeof(IndividualChannelStream));
 686                 return -1;
 687             } else {
 688                 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
 689                 memset(ics, 0, sizeof(IndividualChannelStream));
 690                 return -1;
 691             }
 692         }
 693     }
 694
 695     if (ics->max_sfb > ics->num_swb) {
 696         av_log(ac->avctx, AV_LOG_ERROR,
 697                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 698                ics->max_sfb, ics->num_swb);
 699         memset(ics, 0, sizeof(IndividualChannelStream));
 700         return -1;
 701     }
 702
 703     return 0;
 704 }
 705
 706 /**
 707  * Decode band types (section_data payload); reference: table 4.46.
 708  *
 709  * @param   band_type           array of the used band type
 710  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 711  *
 712  * @return  Returns error status. 0 - OK, !0 - error
 713  */
 714 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 715                              int band_type_run_end[120], GetBitContext *gb,
 716                              IndividualChannelStream *ics)
 717 {
 718     int g, idx = 0;
 719     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 720     for (g = 0; g < ics->num_window_groups; g++) {
 721         int k = 0;
 722         while (k < ics->max_sfb) {
 723             uint8_t sect_end = k;
 724             int sect_len_incr;
 725             int sect_band_type = get_bits(gb, 4);
 726             if (sect_band_type == 12) {
 727                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 728                 return -1;
 729             }
 730             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 731                 sect_end += sect_len_incr;
 732             sect_end += sect_len_incr;
 733             if (get_bits_left(gb) < 0) {
 734                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 735                 return -1;
 736             }
 737             if (sect_end > ics->max_sfb) {
 738                 av_log(ac->avctx, AV_LOG_ERROR,
 739                        "Number of bands (%d) exceeds limit (%d).\n",
 740                        sect_end, ics->max_sfb);
 741                 return -1;
 742             }
 743             for (; k < sect_end; k++) {
 744                 band_type        [idx]   = sect_band_type;
 745                 band_type_run_end[idx++] = sect_end;
 746             }
 747         }
 748     }
 749     return 0;
 750 }
 751
 752 /**
 753  * Decode scalefactors; reference: table 4.47.
 754  *
 755  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 756  * @param   band_type           array of the used band type
 757  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 758  * @param   sf                  array of scalefactors or intensity stereo positions
 759  *
 760  * @return  Returns error status. 0 - OK, !0 - error
 761  */
 762 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 763                                unsigned int global_gain,
 764                                IndividualChannelStream *ics,
 765                                enum BandType band_type[120],
 766                                int band_type_run_end[120])
 767 {
 768     const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
 769     int g, i, idx = 0;
 770     int offset[3] = { global_gain, global_gain - 90, 100 };
 771     int noise_flag = 1;
 772     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 773     for (g = 0; g < ics->num_window_groups; g++) {
 774         for (i = 0; i < ics->max_sfb;) {
 775             int run_end = band_type_run_end[idx];
 776             if (band_type[idx] == ZERO_BT) {
 777                 for (; i < run_end; i++, idx++)
 778                     sf[idx] = 0.;
 779             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 780                 for (; i < run_end; i++, idx++) {
 781                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 782                     if (offset[2] > 255U) {
 783                         av_log(ac->avctx, AV_LOG_ERROR,
 784                                "%s (%d) out of range.\n", sf_str[2], offset[2]);
 785                         return -1;
 786                     }
 787                     sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
 788                 }
 789             } else if (band_type[idx] == NOISE_BT) {
 790                 for (; i < run_end; i++, idx++) {
 791                     if (noise_flag-- > 0)
 792                         offset[1] += get_bits(gb, 9) - 256;
 793                     else
 794                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 795                     if (offset[1] > 255U) {
 796                         av_log(ac->avctx, AV_LOG_ERROR,
 797                                "%s (%d) out of range.\n", sf_str[1], offset[1]);
 798                         return -1;
 799                     }
 800                     sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
 801                 }
 802             } else {
 803                 for (; i < run_end; i++, idx++) {
 804                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 805                     if (offset[0] > 255U) {
 806                         av_log(ac->avctx, AV_LOG_ERROR,
 807                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 808                         return -1;
 809                     }
 810                     sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
 811                 }
 812             }
 813         }
 814     }
 815     return 0;
 816 }
 817
 818 /**
 819  * Decode pulse data; reference: table 4.7.
 820  */
 821 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 822                          const uint16_t *swb_offset, int num_swb)
 823 {
 824     int i, pulse_swb;
 825     pulse->num_pulse = get_bits(gb, 2) + 1;
 826     pulse_swb        = get_bits(gb, 6);
 827     if (pulse_swb >= num_swb)
 828         return -1;
 829     pulse->pos[0]    = swb_offset[pulse_swb];
 830     pulse->pos[0]   += get_bits(gb, 5);
 831     if (pulse->pos[0] > 1023)
 832         return -1;
 833     pulse->amp[0]    = get_bits(gb, 4);
 834     for (i = 1; i < pulse->num_pulse; i++) {
 835         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 836         if (pulse->pos[i] > 1023)
 837             return -1;
 838         pulse->amp[i] = get_bits(gb, 4);
 839     }
 840     return 0;
 841 }
 842
 843 /**
 844  * Decode Temporal Noise Shaping data; reference: table 4.48.
 845  *
 846  * @return  Returns error status. 0 - OK, !0 - error
 847  */
 848 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 849                       GetBitContext *gb, const IndividualChannelStream *ics)
 850 {
 851     int w, filt, i, coef_len, coef_res, coef_compress;
 852     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 853     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 854     for (w = 0; w < ics->num_windows; w++) {
 855         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 856             coef_res = get_bits1(gb);
 857
 858             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 859                 int tmp2_idx;
 860                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 861
 862                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 863                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 864                            tns->order[w][filt], tns_max_order);
 865                     tns->order[w][filt] = 0;
 866                     return -1;
 867                 }
 868                 if (tns->order[w][filt]) {
 869                     tns->direction[w][filt] = get_bits1(gb);
 870                     coef_compress = get_bits1(gb);
 871                     coef_len = coef_res + 3 - coef_compress;
 872                     tmp2_idx = 2 * coef_compress + coef_res;
 873
 874                     for (i = 0; i < tns->order[w][filt]; i++)
 875                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 876                 }
 877             }
 878         }
 879     }
 880     return 0;
 881 }
 882
 883 /**
 884  * Decode Mid/Side data; reference: table 4.54.
 885  *
 886  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 887  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 888  *                      [3] reserved for scalable AAC
 889  */
 890 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 891                                    int ms_present)
 892 {
 893     int idx;
 894     if (ms_present == 1) {
 895         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 896             cpe->ms_mask[idx] = get_bits1(gb);
 897     } else if (ms_present == 2) {
 898         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 899     }
 900 }
 901
 902 #ifndef VMUL2
 903 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 904                            const float *scale)
 905 {
 906     float s = *scale;
 907     *dst++ = v[idx    & 15] * s;
 908     *dst++ = v[idx>>4 & 15] * s;
 909     return dst;
 910 }
 911 #endif
 912
 913 #ifndef VMUL4
 914 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 915                            const float *scale)
 916 {
 917     float s = *scale;
 918     *dst++ = v[idx    & 3] * s;
 919     *dst++ = v[idx>>2 & 3] * s;
 920     *dst++ = v[idx>>4 & 3] * s;
 921     *dst++ = v[idx>>6 & 3] * s;
 922     return dst;
 923 }
 924 #endif
 925
 926 #ifndef VMUL2S
 927 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 928                             unsigned sign, const float *scale)
 929 {
 930     union float754 s0, s1;
 931
 932     s0.f = s1.f = *scale;
 933     s0.i ^= sign >> 1 << 31;
 934     s1.i ^= sign      << 31;
 935
 936     *dst++ = v[idx    & 15] * s0.f;
 937     *dst++ = v[idx>>4 & 15] * s1.f;
 938
 939     return dst;
 940 }
 941 #endif
 942
 943 #ifndef VMUL4S
 944 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 945                             unsigned sign, const float *scale)
 946 {
 947     unsigned nz = idx >> 12;
 948     union float754 s = { .f = *scale };
 949     union float754 t;
 950
 951     t.i = s.i ^ (sign & 1<<31);
 952     *dst++ = v[idx    & 3] * t.f;
 953
 954     sign <<= nz & 1; nz >>= 1;
 955     t.i = s.i ^ (sign & 1<<31);
 956     *dst++ = v[idx>>2 & 3] * t.f;
 957
 958     sign <<= nz & 1; nz >>= 1;
 959     t.i = s.i ^ (sign & 1<<31);
 960     *dst++ = v[idx>>4 & 3] * t.f;
 961
 962     sign <<= nz & 1; nz >>= 1;
 963     t.i = s.i ^ (sign & 1<<31);
 964     *dst++ = v[idx>>6 & 3] * t.f;
 965
 966     return dst;
 967 }
 968 #endif
 969
 970 /**
 971  * Decode spectral data; reference: table 4.50.
 972  * Dequantize and scale spectral data; reference: 4.6.3.3.
 973  *
 974  * @param   coef            array of dequantized, scaled spectral data
 975  * @param   sf              array of scalefactors or intensity stereo positions
 976  * @param   pulse_present   set if pulses are present
 977  * @param   pulse           pointer to pulse data struct
 978  * @param   band_type       array of the used band type
 979  *
 980  * @return  Returns error status. 0 - OK, !0 - error
 981  */
 982 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 983                                        GetBitContext *gb, const float sf[120],
 984                                        int pulse_present, const Pulse *pulse,
 985                                        const IndividualChannelStream *ics,
 986                                        enum BandType band_type[120])
 987 {
 988     int i, k, g, idx = 0;
 989     const int c = 1024 / ics->num_windows;
 990     const uint16_t *offsets = ics->swb_offset;
 991     float *coef_base = coef;
 992     int err_idx;
 993
 994     for (g = 0; g < ics->num_windows; g++)
 995         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
 996
 997     for (g = 0; g < ics->num_window_groups; g++) {
 998         unsigned g_len = ics->group_len[g];
 999
1000         for (i = 0; i < ics->max_sfb; i++, idx++) {
1001             const unsigned cbt_m1 = band_type[idx] - 1;
1002             float *cfo = coef + offsets[i];
1003             int off_len = offsets[i + 1] - offsets[i];
1004             int group;
1005
1006             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1007                 for (group = 0; group < g_len; group++, cfo+=128) {
1008                     memset(cfo, 0, off_len * sizeof(float));
1009                 }
1010             } else if (cbt_m1 == NOISE_BT - 1) {
1011                 for (group = 0; group < g_len; group++, cfo+=128) {
1012                     float scale;
1013                     float band_energy;
1014
1015                     for (k = 0; k < off_len; k++) {
1016                         ac->random_state  = lcg_random(ac->random_state);
1017                         cfo[k] = ac->random_state;
1018                     }
1019
1020                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1021                     scale = sf[idx] / sqrtf(band_energy);
1022                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1023                 }
1024             } else {
1025                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1026                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1027                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1028                 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
1029                 OPEN_READER(re, gb);
1030
1031                 switch (cbt_m1 >> 1) {
1032                 case 0:
1033                     for (group = 0; group < g_len; group++, cfo+=128) {
1034                         float *cf = cfo;
1035                         int len = off_len;
1036
1037                         do {
1038                             int code;
1039                             unsigned cb_idx;
1040
1041                             UPDATE_CACHE(re, gb);
1042                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1043
1044                             if (code >= cb_size) {
1045                                 err_idx = code;
1046                                 goto err_cb_overflow;
1047                             }
1048
1049                             cb_idx = cb_vector_idx[code];
1050                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1051                         } while (len -= 4);
1052                     }
1053                     break;
1054
1055                 case 1:
1056                     for (group = 0; group < g_len; group++, cfo+=128) {
1057                         float *cf = cfo;
1058                         int len = off_len;
1059
1060                         do {
1061                             int code;
1062                             unsigned nnz;
1063                             unsigned cb_idx;
1064                             uint32_t bits;
1065
1066                             UPDATE_CACHE(re, gb);
1067                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1068
1069                             if (code >= cb_size) {
1070                                 err_idx = code;
1071                                 goto err_cb_overflow;
1072                             }
1073
1074 #if MIN_CACHE_BITS < 20
1075                             UPDATE_CACHE(re, gb);
1076 #endif
1077                             cb_idx = cb_vector_idx[code];
1078                             nnz = cb_idx >> 8 & 15;
1079                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1080                             LAST_SKIP_BITS(re, gb, nnz);
1081                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1082                         } while (len -= 4);
1083                     }
1084                     break;
1085
1086                 case 2:
1087                     for (group = 0; group < g_len; group++, cfo+=128) {
1088                         float *cf = cfo;
1089                         int len = off_len;
1090
1091                         do {
1092                             int code;
1093                             unsigned cb_idx;
1094
1095                             UPDATE_CACHE(re, gb);
1096                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1097
1098                             if (code >= cb_size) {
1099                                 err_idx = code;
1100                                 goto err_cb_overflow;
1101                             }
1102
1103                             cb_idx = cb_vector_idx[code];
1104                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1105                         } while (len -= 2);
1106                     }
1107                     break;
1108
1109                 case 3:
1110                 case 4:
1111                     for (group = 0; group < g_len; group++, cfo+=128) {
1112                         float *cf = cfo;
1113                         int len = off_len;
1114
1115                         do {
1116                             int code;
1117                             unsigned nnz;
1118                             unsigned cb_idx;
1119                             unsigned sign;
1120
1121                             UPDATE_CACHE(re, gb);
1122                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1123
1124                             if (code >= cb_size) {
1125                                 err_idx = code;
1126                                 goto err_cb_overflow;
1127                             }
1128
1129                             cb_idx = cb_vector_idx[code];
1130                             nnz = cb_idx >> 8 & 15;
1131                             sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1132                             LAST_SKIP_BITS(re, gb, nnz);
1133                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1134                         } while (len -= 2);
1135                     }
1136                     break;
1137
1138                 default:
1139                     for (group = 0; group < g_len; group++, cfo+=128) {
1140                         float *cf = cfo;
1141                         uint32_t *icf = (uint32_t *) cf;
1142                         int len = off_len;
1143
1144                         do {
1145                             int code;
1146                             unsigned nzt, nnz;
1147                             unsigned cb_idx;
1148                             uint32_t bits;
1149                             int j;
1150
1151                             UPDATE_CACHE(re, gb);
1152                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1153
1154                             if (!code) {
1155                                 *icf++ = 0;
1156                                 *icf++ = 0;
1157                                 continue;
1158                             }
1159
1160                             if (code >= cb_size) {
1161                                 err_idx = code;
1162                                 goto err_cb_overflow;
1163                             }
1164
1165                             cb_idx = cb_vector_idx[code];
1166                             nnz = cb_idx >> 12;
1167                             nzt = cb_idx >> 8;
1168                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1169                             LAST_SKIP_BITS(re, gb, nnz);
1170
1171                             for (j = 0; j < 2; j++) {
1172                                 if (nzt & 1<<j) {
1173                                     uint32_t b;
1174                                     int n;
1175                                     /* The total length of escape_sequence must be < 22 bits according
1176                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1177                                     UPDATE_CACHE(re, gb);
1178                                     b = GET_CACHE(re, gb);
1179                                     b = 31 - av_log2(~b);
1180
1181                                     if (b > 8) {
1182                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1183                                         return -1;
1184                                     }
1185
1186 #if MIN_CACHE_BITS < 21
1187                                     LAST_SKIP_BITS(re, gb, b + 1);
1188                                     UPDATE_CACHE(re, gb);
1189 #else
1190                                     SKIP_BITS(re, gb, b + 1);
1191 #endif
1192                                     b += 4;
1193                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1194                                     LAST_SKIP_BITS(re, gb, b);
1195                                     *icf++ = cbrt_tab[n] | (bits & 1<<31);
1196                                     bits <<= 1;
1197                                 } else {
1198                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1199                                     *icf++ = (bits & 1<<31) | v;
1200                                     bits <<= !!v;
1201                                 }
1202                                 cb_idx >>= 4;
1203                             }
1204                         } while (len -= 2);
1205
1206                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1207                     }
1208                 }
1209
1210                 CLOSE_READER(re, gb);
1211             }
1212         }
1213         coef += g_len << 7;
1214     }
1215
1216     if (pulse_present) {
1217         idx = 0;
1218         for (i = 0; i < pulse->num_pulse; i++) {
1219             float co = coef_base[ pulse->pos[i] ];
1220             while (offsets[idx + 1] <= pulse->pos[i])
1221                 idx++;
1222             if (band_type[idx] != NOISE_BT && sf[idx]) {
1223                 float ico = -pulse->amp[i];
1224                 if (co) {
1225                     co /= sf[idx];
1226                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1227                 }
1228                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1229             }
1230         }
1231     }
1232     return 0;
1233
1234 err_cb_overflow:
1235     av_log(ac->avctx, AV_LOG_ERROR,
1236            "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
1237            band_type[idx], err_idx, ff_aac_spectral_sizes[band_type[idx]]);
1238     return -1;
1239 }
1240
1241 static av_always_inline float flt16_round(float pf)
1242 {
1243     union float754 tmp;
1244     tmp.f = pf;
1245     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1246     return tmp.f;
1247 }
1248
1249 static av_always_inline float flt16_even(float pf)
1250 {
1251     union float754 tmp;
1252     tmp.f = pf;
1253     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1254     return tmp.f;
1255 }
1256
1257 static av_always_inline float flt16_trunc(float pf)
1258 {
1259     union float754 pun;
1260     pun.f = pf;
1261     pun.i &= 0xFFFF0000U;
1262     return pun.f;
1263 }
1264
1265 static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
1266                     int output_enable)
1267 {
1268     const float a     = 0.953125; // 61.0 / 64
1269     const float alpha = 0.90625;  // 29.0 / 32
1270     float e0, e1;
1271     float pv;
1272     float k1, k2;
1273
1274     k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
1275     k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
1276
1277     pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
1278     if (output_enable)
1279         *coef += pv * ac->sf_scale;
1280
1281     e0 = *coef / ac->sf_scale;
1282     e1 = e0 - k1 * ps->r0;
1283
1284     ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
1285     ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
1286     ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
1287     ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
1288
1289     ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
1290     ps->r0 = flt16_trunc(a * e0);
1291 }
1292
1293 /**
1294  * Apply AAC-Main style frequency domain prediction.
1295  */
1296 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1297 {
1298     int sfb, k;
1299
1300     if (!sce->ics.predictor_initialized) {
1301         reset_all_predictors(sce->predictor_state);
1302         sce->ics.predictor_initialized = 1;
1303     }
1304
1305     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1306         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1307             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1308                 predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
1309                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1310             }
1311         }
1312         if (sce->ics.predictor_reset_group)
1313             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1314     } else
1315         reset_all_predictors(sce->predictor_state);
1316 }
1317
1318 /**
1319  * Decode an individual_channel_stream payload; reference: table 4.44.
1320  *
1321  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1322  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1323  *
1324  * @return  Returns error status. 0 - OK, !0 - error
1325  */
1326 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1327                       GetBitContext *gb, int common_window, int scale_flag)
1328 {
1329     Pulse pulse;
1330     TemporalNoiseShaping    *tns = &sce->tns;
1331     IndividualChannelStream *ics = &sce->ics;
1332     float *out = sce->coeffs;
1333     int global_gain, pulse_present = 0;
1334
1335     /* This assignment is to silence a GCC warning about the variable being used
1336      * uninitialized when in fact it always is.
1337      */
1338     pulse.num_pulse = 0;
1339
1340     global_gain = get_bits(gb, 8);
1341
1342     if (!common_window && !scale_flag) {
1343         if (decode_ics_info(ac, ics, gb, 0) < 0)
1344             return -1;
1345     }
1346
1347     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1348         return -1;
1349     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1350         return -1;
1351
1352     pulse_present = 0;
1353     if (!scale_flag) {
1354         if ((pulse_present = get_bits1(gb))) {
1355             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1356                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1357                 return -1;
1358             }
1359             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1360                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1361                 return -1;
1362             }
1363         }
1364         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1365             return -1;
1366         if (get_bits1(gb)) {
1367             av_log_missing_feature(ac->avctx, "SSR", 1);
1368             return -1;
1369         }
1370     }
1371
1372     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1373         return -1;
1374
1375     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1376         apply_prediction(ac, sce);
1377
1378     return 0;
1379 }
1380
1381 /**
1382  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1383  */
1384 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1385 {
1386     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1387     float *ch0 = cpe->ch[0].coeffs;
1388     float *ch1 = cpe->ch[1].coeffs;
1389     int g, i, group, idx = 0;
1390     const uint16_t *offsets = ics->swb_offset;
1391     for (g = 0; g < ics->num_window_groups; g++) {
1392         for (i = 0; i < ics->max_sfb; i++, idx++) {
1393             if (cpe->ms_mask[idx] &&
1394                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1395                 for (group = 0; group < ics->group_len[g]; group++) {
1396                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1397                                               ch1 + group * 128 + offsets[i],
1398                                               offsets[i+1] - offsets[i]);
1399                 }
1400             }
1401         }
1402         ch0 += ics->group_len[g] * 128;
1403         ch1 += ics->group_len[g] * 128;
1404     }
1405 }
1406
1407 /**
1408  * intensity stereo decoding; reference: 4.6.8.2.3
1409  *
1410  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1411  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1412  *                      [3] reserved for scalable AAC
1413  */
1414 static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
1415 {
1416     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1417     SingleChannelElement         *sce1 = &cpe->ch[1];
1418     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1419     const uint16_t *offsets = ics->swb_offset;
1420     int g, group, i, k, idx = 0;
1421     int c;
1422     float scale;
1423     for (g = 0; g < ics->num_window_groups; g++) {
1424         for (i = 0; i < ics->max_sfb;) {
1425             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1426                 const int bt_run_end = sce1->band_type_run_end[idx];
1427                 for (; i < bt_run_end; i++, idx++) {
1428                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1429                     if (ms_present)
1430                         c *= 1 - 2 * cpe->ms_mask[idx];
1431                     scale = c * sce1->sf[idx];
1432                     for (group = 0; group < ics->group_len[g]; group++)
1433                         for (k = offsets[i]; k < offsets[i + 1]; k++)
1434                             coef1[group * 128 + k] = scale * coef0[group * 128 + k];
1435                 }
1436             } else {
1437                 int bt_run_end = sce1->band_type_run_end[idx];
1438                 idx += bt_run_end - i;
1439                 i    = bt_run_end;
1440             }
1441         }
1442         coef0 += ics->group_len[g] * 128;
1443         coef1 += ics->group_len[g] * 128;
1444     }
1445 }
1446
1447 /**
1448  * Decode a channel_pair_element; reference: table 4.4.
1449  *
1450  * @param   elem_id Identifies the instance of a syntax element.
1451  *
1452  * @return  Returns error status. 0 - OK, !0 - error
1453  */
1454 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1455 {
1456     int i, ret, common_window, ms_present = 0;
1457
1458     common_window = get_bits1(gb);
1459     if (common_window) {
1460         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1461             return -1;
1462         i = cpe->ch[1].ics.use_kb_window[0];
1463         cpe->ch[1].ics = cpe->ch[0].ics;
1464         cpe->ch[1].ics.use_kb_window[1] = i;
1465         ms_present = get_bits(gb, 2);
1466         if (ms_present == 3) {
1467             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1468             return -1;
1469         } else if (ms_present)
1470             decode_mid_side_stereo(cpe, gb, ms_present);
1471     }
1472     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1473         return ret;
1474     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1475         return ret;
1476
1477     if (common_window) {
1478         if (ms_present)
1479             apply_mid_side_stereo(ac, cpe);
1480         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1481             apply_prediction(ac, &cpe->ch[0]);
1482             apply_prediction(ac, &cpe->ch[1]);
1483         }
1484     }
1485
1486     apply_intensity_stereo(cpe, ms_present);
1487     return 0;
1488 }
1489
1490 /**
1491  * Decode coupling_channel_element; reference: table 4.8.
1492  *
1493  * @param   elem_id Identifies the instance of a syntax element.
1494  *
1495  * @return  Returns error status. 0 - OK, !0 - error
1496  */
1497 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1498 {
1499     int num_gain = 0;
1500     int c, g, sfb, ret;
1501     int sign;
1502     float scale;
1503     SingleChannelElement *sce = &che->ch[0];
1504     ChannelCoupling     *coup = &che->coup;
1505
1506     coup->coupling_point = 2 * get_bits1(gb);
1507     coup->num_coupled = get_bits(gb, 3);
1508     for (c = 0; c <= coup->num_coupled; c++) {
1509         num_gain++;
1510         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1511         coup->id_select[c] = get_bits(gb, 4);
1512         if (coup->type[c] == TYPE_CPE) {
1513             coup->ch_select[c] = get_bits(gb, 2);
1514             if (coup->ch_select[c] == 3)
1515                 num_gain++;
1516         } else
1517             coup->ch_select[c] = 2;
1518     }
1519     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1520
1521     sign  = get_bits(gb, 1);
1522     scale = pow(2., pow(2., (int)get_bits(gb, 2) - 3));
1523
1524     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1525         return ret;
1526
1527     for (c = 0; c < num_gain; c++) {
1528         int idx  = 0;
1529         int cge  = 1;
1530         int gain = 0;
1531         float gain_cache = 1.;
1532         if (c) {
1533             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1534             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1535             gain_cache = pow(scale, -gain);
1536         }
1537         if (coup->coupling_point == AFTER_IMDCT) {
1538             coup->gain[c][0] = gain_cache;
1539         } else {
1540             for (g = 0; g < sce->ics.num_window_groups; g++) {
1541                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1542                     if (sce->band_type[idx] != ZERO_BT) {
1543                         if (!cge) {
1544                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1545                             if (t) {
1546                                 int s = 1;
1547                                 t = gain += t;
1548                                 if (sign) {
1549                                     s  -= 2 * (t & 0x1);
1550                                     t >>= 1;
1551                                 }
1552                                 gain_cache = pow(scale, -t) * s;
1553                             }
1554                         }
1555                         coup->gain[c][idx] = gain_cache;
1556                     }
1557                 }
1558             }
1559         }
1560     }
1561     return 0;
1562 }
1563
1564 /**
1565  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1566  *
1567  * @return  Returns number of bytes consumed.
1568  */
1569 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1570                                          GetBitContext *gb)
1571 {
1572     int i;
1573     int num_excl_chan = 0;
1574
1575     do {
1576         for (i = 0; i < 7; i++)
1577             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1578     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1579
1580     return num_excl_chan / 7;
1581 }
1582
1583 /**
1584  * Decode dynamic range information; reference: table 4.52.
1585  *
1586  * @param   cnt length of TYPE_FIL syntactic element in bytes
1587  *
1588  * @return  Returns number of bytes consumed.
1589  */
1590 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1591                                 GetBitContext *gb, int cnt)
1592 {
1593     int n             = 1;
1594     int drc_num_bands = 1;
1595     int i;
1596
1597     /* pce_tag_present? */
1598     if (get_bits1(gb)) {
1599         che_drc->pce_instance_tag  = get_bits(gb, 4);
1600         skip_bits(gb, 4); // tag_reserved_bits
1601         n++;
1602     }
1603
1604     /* excluded_chns_present? */
1605     if (get_bits1(gb)) {
1606         n += decode_drc_channel_exclusions(che_drc, gb);
1607     }
1608
1609     /* drc_bands_present? */
1610     if (get_bits1(gb)) {
1611         che_drc->band_incr            = get_bits(gb, 4);
1612         che_drc->interpolation_scheme = get_bits(gb, 4);
1613         n++;
1614         drc_num_bands += che_drc->band_incr;
1615         for (i = 0; i < drc_num_bands; i++) {
1616             che_drc->band_top[i] = get_bits(gb, 8);
1617             n++;
1618         }
1619     }
1620
1621     /* prog_ref_level_present? */
1622     if (get_bits1(gb)) {
1623         che_drc->prog_ref_level = get_bits(gb, 7);
1624         skip_bits1(gb); // prog_ref_level_reserved_bits
1625         n++;
1626     }
1627
1628     for (i = 0; i < drc_num_bands; i++) {
1629         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1630         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1631         n++;
1632     }
1633
1634     return n;
1635 }
1636
1637 /**
1638  * Decode extension data (incomplete); reference: table 4.51.
1639  *
1640  * @param   cnt length of TYPE_FIL syntactic element in bytes
1641  *
1642  * @return Returns number of bytes consumed
1643  */
1644 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1645                                     ChannelElement *che, enum RawDataBlockType elem_type)
1646 {
1647     int crc_flag = 0;
1648     int res = cnt;
1649     switch (get_bits(gb, 4)) { // extension type
1650     case EXT_SBR_DATA_CRC:
1651         crc_flag++;
1652     case EXT_SBR_DATA:
1653         if (!che) {
1654             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1655             return res;
1656         } else if (!ac->m4ac.sbr) {
1657             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1658             skip_bits_long(gb, 8 * cnt - 4);
1659             return res;
1660         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1661             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1662             skip_bits_long(gb, 8 * cnt - 4);
1663             return res;
1664         } else {
1665             ac->m4ac.sbr = 1;
1666         }
1667         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1668         break;
1669     case EXT_DYNAMIC_RANGE:
1670         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1671         break;
1672     case EXT_FILL:
1673     case EXT_FILL_DATA:
1674     case EXT_DATA_ELEMENT:
1675     default:
1676         skip_bits_long(gb, 8 * cnt - 4);
1677         break;
1678     };
1679     return res;
1680 }
1681
1682 /**
1683  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1684  *
1685  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1686  * @param   coef    spectral coefficients
1687  */
1688 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1689                       IndividualChannelStream *ics, int decode)
1690 {
1691     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1692     int w, filt, m, i;
1693     int bottom, top, order, start, end, size, inc;
1694     float lpc[TNS_MAX_ORDER];
1695
1696     for (w = 0; w < ics->num_windows; w++) {
1697         bottom = ics->num_swb;
1698         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1699             top    = bottom;
1700             bottom = FFMAX(0, top - tns->length[w][filt]);
1701             order  = tns->order[w][filt];
1702             if (order == 0)
1703                 continue;
1704
1705             // tns_decode_coef
1706             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1707
1708             start = ics->swb_offset[FFMIN(bottom, mmm)];
1709             end   = ics->swb_offset[FFMIN(   top, mmm)];
1710             if ((size = end - start) <= 0)
1711                 continue;
1712             if (tns->direction[w][filt]) {
1713                 inc = -1;
1714                 start = end - 1;
1715             } else {
1716                 inc = 1;
1717             }
1718             start += w * 128;
1719
1720             // ar filter
1721             for (m = 0; m < size; m++, start += inc)
1722                 for (i = 1; i <= FFMIN(m, order); i++)
1723                     coef[start] -= coef[start - i * inc] * lpc[i - 1];
1724         }
1725     }
1726 }
1727
1728 /**
1729  * Conduct IMDCT and windowing.
1730  */
1731 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
1732 {
1733     IndividualChannelStream *ics = &sce->ics;
1734     float *in    = sce->coeffs;
1735     float *out   = sce->ret;
1736     float *saved = sce->saved;
1737     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1738     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1739     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1740     float *buf  = ac->buf_mdct;
1741     float *temp = ac->temp;
1742     int i;
1743
1744     // imdct
1745     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1746         if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1747             av_log(ac->avctx, AV_LOG_WARNING,
1748                    "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1749                    "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1750         for (i = 0; i < 1024; i += 128)
1751             ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1752     } else
1753         ff_imdct_half(&ac->mdct, buf, in);
1754
1755     /* window overlapping
1756      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1757      * and long to short transitions are considered to be short to short
1758      * transitions. This leaves just two cases (long to long and short to short)
1759      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1760      */
1761     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1762             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1763         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
1764     } else {
1765         for (i = 0; i < 448; i++)
1766             out[i] = saved[i] + bias;
1767
1768         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1769             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
1770             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
1771             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
1772             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
1773             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
1774             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1775         } else {
1776             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
1777             for (i = 576; i < 1024; i++)
1778                 out[i] = buf[i-512] + bias;
1779         }
1780     }
1781
1782     // buffer update
1783     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1784         for (i = 0; i < 64; i++)
1785             saved[i] = temp[64 + i] - bias;
1786         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1787         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1788         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1789         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1790     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1791         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1792         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1793     } else { // LONG_STOP or ONLY_LONG
1794         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1795     }
1796 }
1797
1798 /**
1799  * Apply dependent channel coupling (applied before IMDCT).
1800  *
1801  * @param   index   index into coupling gain array
1802  */
1803 static void apply_dependent_coupling(AACContext *ac,
1804                                      SingleChannelElement *target,
1805                                      ChannelElement *cce, int index)
1806 {
1807     IndividualChannelStream *ics = &cce->ch[0].ics;
1808     const uint16_t *offsets = ics->swb_offset;
1809     float *dest = target->coeffs;
1810     const float *src = cce->ch[0].coeffs;
1811     int g, i, group, k, idx = 0;
1812     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1813         av_log(ac->avctx, AV_LOG_ERROR,
1814                "Dependent coupling is not supported together with LTP\n");
1815         return;
1816     }
1817     for (g = 0; g < ics->num_window_groups; g++) {
1818         for (i = 0; i < ics->max_sfb; i++, idx++) {
1819             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1820                 const float gain = cce->coup.gain[index][idx];
1821                 for (group = 0; group < ics->group_len[g]; group++) {
1822                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1823                         // XXX dsputil-ize
1824                         dest[group * 128 + k] += gain * src[group * 128 + k];
1825                     }
1826                 }
1827             }
1828         }
1829         dest += ics->group_len[g] * 128;
1830         src  += ics->group_len[g] * 128;
1831     }
1832 }
1833
1834 /**
1835  * Apply independent channel coupling (applied after IMDCT).
1836  *
1837  * @param   index   index into coupling gain array
1838  */
1839 static void apply_independent_coupling(AACContext *ac,
1840                                        SingleChannelElement *target,
1841                                        ChannelElement *cce, int index)
1842 {
1843     int i;
1844     const float gain = cce->coup.gain[index][0];
1845     const float bias = ac->add_bias;
1846     const float *src = cce->ch[0].ret;
1847     float *dest = target->ret;
1848     const int len = 1024 << (ac->m4ac.sbr == 1);
1849
1850     for (i = 0; i < len; i++)
1851         dest[i] += gain * (src[i] - bias);
1852 }
1853
1854 /**
1855  * channel coupling transformation interface
1856  *
1857  * @param   index   index into coupling gain array
1858  * @param   apply_coupling_method   pointer to (in)dependent coupling function
1859  */
1860 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1861                                    enum RawDataBlockType type, int elem_id,
1862                                    enum CouplingPoint coupling_point,
1863                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1864 {
1865     int i, c;
1866
1867     for (i = 0; i < MAX_ELEM_ID; i++) {
1868         ChannelElement *cce = ac->che[TYPE_CCE][i];
1869         int index = 0;
1870
1871         if (cce && cce->coup.coupling_point == coupling_point) {
1872             ChannelCoupling *coup = &cce->coup;
1873
1874             for (c = 0; c <= coup->num_coupled; c++) {
1875                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1876                     if (coup->ch_select[c] != 1) {
1877                         apply_coupling_method(ac, &cc->ch[0], cce, index);
1878                         if (coup->ch_select[c] != 0)
1879                             index++;
1880                     }
1881                     if (coup->ch_select[c] != 2)
1882                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
1883                 } else
1884                     index += 1 + (coup->ch_select[c] == 3);
1885             }
1886         }
1887     }
1888 }
1889
1890 /**
1891  * Convert spectral data to float samples, applying all supported tools as appropriate.
1892  */
1893 static void spectral_to_sample(AACContext *ac)
1894 {
1895     int i, type;
1896     float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
1897     for (type = 3; type >= 0; type--) {
1898         for (i = 0; i < MAX_ELEM_ID; i++) {
1899             ChannelElement *che = ac->che[type][i];
1900             if (che) {
1901                 if (type <= TYPE_CPE)
1902                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1903                 if (che->ch[0].tns.present)
1904                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1905                 if (che->ch[1].tns.present)
1906                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1907                 if (type <= TYPE_CPE)
1908                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
1909                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
1910                     imdct_and_windowing(ac, &che->ch[0], imdct_bias);
1911                     if (type == TYPE_CPE) {
1912                         imdct_and_windowing(ac, &che->ch[1], imdct_bias);
1913                     }
1914                     if (ac->m4ac.sbr > 0) {
1915                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
1916                     }
1917                 }
1918                 if (type <= TYPE_CCE)
1919                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
1920             }
1921         }
1922     }
1923 }
1924
1925 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
1926 {
1927     int size;
1928     AACADTSHeaderInfo hdr_info;
1929
1930     size = ff_aac_parse_header(gb, &hdr_info);
1931     if (size > 0) {
1932         if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
1933             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
1934             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
1935             ac->m4ac.chan_config = hdr_info.chan_config;
1936             if (set_default_channel_config(ac, new_che_pos, hdr_info.chan_config))
1937                 return -7;
1938             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
1939                 return -7;
1940         } else if (ac->output_configured != OC_LOCKED) {
1941             ac->output_configured = OC_NONE;
1942         }
1943         if (ac->output_configured != OC_LOCKED)
1944             ac->m4ac.sbr = -1;
1945         ac->m4ac.sample_rate     = hdr_info.sample_rate;
1946         ac->m4ac.sampling_index  = hdr_info.sampling_index;
1947         ac->m4ac.object_type     = hdr_info.object_type;
1948         if (!ac->avctx->sample_rate)
1949             ac->avctx->sample_rate = hdr_info.sample_rate;
1950         if (hdr_info.num_aac_frames == 1) {
1951             if (!hdr_info.crc_absent)
1952                 skip_bits(gb, 16);
1953         } else {
1954             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
1955             return -1;
1956         }
1957     }
1958     return size;
1959 }
1960
1961 static int aac_decode_frame(AVCodecContext *avctx, void *data,
1962                             int *data_size, AVPacket *avpkt)
1963 {
1964     const uint8_t *buf = avpkt->data;
1965     int buf_size = avpkt->size;
1966     AACContext *ac = avctx->priv_data;
1967     ChannelElement *che = NULL, *che_prev = NULL;
1968     GetBitContext gb;
1969     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
1970     int err, elem_id, data_size_tmp;
1971     int buf_consumed;
1972     int samples = 1024, multiplier;
1973     int buf_offset;
1974
1975     init_get_bits(&gb, buf, buf_size * 8);
1976
1977     if (show_bits(&gb, 12) == 0xfff) {
1978         if (parse_adts_frame_header(ac, &gb) < 0) {
1979             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
1980             return -1;
1981         }
1982         if (ac->m4ac.sampling_index > 12) {
1983             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
1984             return -1;
1985         }
1986     }
1987
1988     memset(ac->tags_seen_this_frame, 0, sizeof(ac->tags_seen_this_frame));
1989     // parse
1990     while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
1991         elem_id = get_bits(&gb, 4);
1992
1993         if (elem_type < TYPE_DSE && !(che=get_che(ac, elem_type, elem_id))) {
1994             av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id);
1995             return -1;
1996         }
1997
1998         switch (elem_type) {
1999
2000         case TYPE_SCE:
2001             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2002             break;
2003
2004         case TYPE_CPE:
2005             err = decode_cpe(ac, &gb, che);
2006             break;
2007
2008         case TYPE_CCE:
2009             err = decode_cce(ac, &gb, che);
2010             break;
2011
2012         case TYPE_LFE:
2013             err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
2014             break;
2015
2016         case TYPE_DSE:
2017             err = skip_data_stream_element(ac, &gb);
2018             break;
2019
2020         case TYPE_PCE: {
2021             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2022             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2023             if ((err = decode_pce(ac, new_che_pos, &gb)))
2024                 break;
2025             if (ac->output_configured > OC_TRIAL_PCE)
2026                 av_log(avctx, AV_LOG_ERROR,
2027                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2028             else
2029                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2030             break;
2031         }
2032
2033         case TYPE_FIL:
2034             if (elem_id == 15)
2035                 elem_id += get_bits(&gb, 8) - 1;
2036             if (get_bits_left(&gb) < 8 * elem_id) {
2037                     av_log(avctx, AV_LOG_ERROR, overread_err);
2038                     return -1;
2039             }
2040             while (elem_id > 0)
2041                 elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
2042             err = 0; /* FIXME */
2043             break;
2044
2045         default:
2046             err = -1; /* should not happen, but keeps compiler happy */
2047             break;
2048         }
2049
2050         che_prev       = che;
2051         elem_type_prev = elem_type;
2052
2053         if (err)
2054             return err;
2055
2056         if (get_bits_left(&gb) < 3) {
2057             av_log(avctx, AV_LOG_ERROR, overread_err);
2058             return -1;
2059         }
2060     }
2061
2062     spectral_to_sample(ac);
2063
2064     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2065     samples <<= multiplier;
2066     if (ac->output_configured < OC_LOCKED) {
2067         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2068         avctx->frame_size = samples;
2069     }
2070
2071     data_size_tmp = samples * avctx->channels * sizeof(int16_t);
2072     if (*data_size < data_size_tmp) {
2073         av_log(avctx, AV_LOG_ERROR,
2074                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2075                *data_size, data_size_tmp);
2076         return -1;
2077     }
2078     *data_size = data_size_tmp;
2079
2080     ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2081
2082     if (ac->output_configured)
2083         ac->output_configured = OC_LOCKED;
2084
2085     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2086     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2087         if (buf[buf_offset])
2088             break;
2089
2090     return buf_size > buf_offset ? buf_consumed : buf_size;
2091 }
2092
2093 static av_cold int aac_decode_close(AVCodecContext *avctx)
2094 {
2095     AACContext *ac = avctx->priv_data;
2096     int i, type;
2097
2098     for (i = 0; i < MAX_ELEM_ID; i++) {
2099         for (type = 0; type < 4; type++) {
2100             if (ac->che[type][i])
2101                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2102             av_freep(&ac->che[type][i]);
2103         }
2104     }
2105
2106     ff_mdct_end(&ac->mdct);
2107     ff_mdct_end(&ac->mdct_small);
2108     return 0;
2109 }
2110
2111 AVCodec aac_decoder = {
2112     "aac",
2113     AVMEDIA_TYPE_AUDIO,
2114     CODEC_ID_AAC,
2115     sizeof(AACContext),
2116     aac_decode_init,
2117     NULL,
2118     aac_decode_close,
2119     aac_decode_frame,
2120     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2121     .sample_fmts = (const enum SampleFormat[]) {
2122         SAMPLE_FMT_S16,SAMPLE_FMT_NONE
2123     },
2124     .channel_layouts = aac_channel_layout,
2125 };