git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * AAC LATM decoder
   7  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
   8  * Copyright (c) 2010      Janne Grunau <janne-ffmpeg@jannau.net>
   9  *
  10  * This file is part of FFmpeg.
  11  *
  12  * FFmpeg is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public
  14  * License as published by the Free Software Foundation; either
  15  * version 2.1 of the License, or (at your option) any later version.
  16  *
  17  * FFmpeg is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with FFmpeg; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25  */
  26
  27 /**
  28  * @file
  29  * AAC decoder
  30  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  31  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  32  */
  33
  34 /*
  35  * supported tools
  36  *
  37  * Support?             Name
  38  * N (code in SoC repo) gain control
  39  * Y                    block switching
  40  * Y                    window shapes - standard
  41  * N                    window shapes - Low Delay
  42  * Y                    filterbank - standard
  43  * N (code in SoC repo) filterbank - Scalable Sample Rate
  44  * Y                    Temporal Noise Shaping
  45  * Y                    Long Term Prediction
  46  * Y                    intensity stereo
  47  * Y                    channel coupling
  48  * Y                    frequency domain prediction
  49  * Y                    Perceptual Noise Substitution
  50  * Y                    Mid/Side stereo
  51  * N                    Scalable Inverse AAC Quantization
  52  * N                    Frequency Selective Switch
  53  * N                    upsampling filter
  54  * Y                    quantization & coding - AAC
  55  * N                    quantization & coding - TwinVQ
  56  * N                    quantization & coding - BSAC
  57  * N                    AAC Error Resilience tools
  58  * N                    Error Resilience payload syntax
  59  * N                    Error Protection tool
  60  * N                    CELP
  61  * N                    Silence Compression
  62  * N                    HVXC
  63  * N                    HVXC 4kbits/s VR
  64  * N                    Structured Audio tools
  65  * N                    Structured Audio Sample Bank Format
  66  * N                    MIDI
  67  * N                    Harmonic and Individual Lines plus Noise
  68  * N                    Text-To-Speech Interface
  69  * Y                    Spectral Band Replication
  70  * Y (not in this code) Layer-1
  71  * Y (not in this code) Layer-2
  72  * Y (not in this code) Layer-3
  73  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  74  * Y                    Parametric Stereo
  75  * N                    Direct Stream Transfer
  76  *
  77  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  78  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  79            Parametric Stereo.
  80  */
  81
  82
  83 #include "avcodec.h"
  84 #include "internal.h"
  85 #include "get_bits.h"
  86 #include "dsputil.h"
  87 #include "fft.h"
  88 #include "fmtconvert.h"
  89 #include "lpc.h"
  90 #include "kbdwin.h"
  91 #include "sinewin.h"
  92
  93 #include "aac.h"
  94 #include "aactab.h"
  95 #include "aacdectab.h"
  96 #include "cbrt_tablegen.h"
  97 #include "sbr.h"
  98 #include "aacsbr.h"
  99 #include "mpeg4audio.h"
 100 #include "aacadtsdec.h"
 101
 102 #include <assert.h>
 103 #include <errno.h>
 104 #include <math.h>
 105 #include <string.h>
 106
 107 #if ARCH_ARM
 108 #   include "arm/aac.h"
 109 #endif
 110
 111 union float754 {
 112     float f;
 113     uint32_t i;
 114 };
 115
 116 static VLC vlc_scalefactors;
 117 static VLC vlc_spectral[11];
 118
 119 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 120
 121 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 122 {
 123     // For PCE based channel configurations map the channels solely based on tags.
 124     if (!ac->m4ac.chan_config) {
 125         return ac->tag_che_map[type][elem_id];
 126     }
 127     // For indexed channel configurations map the channels solely based on position.
 128     switch (ac->m4ac.chan_config) {
 129     case 7:
 130         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 131             ac->tags_mapped++;
 132             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 133         }
 134     case 6:
 135         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 136            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 137            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 138         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 139             ac->tags_mapped++;
 140             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 141         }
 142     case 5:
 143         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 144             ac->tags_mapped++;
 145             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 146         }
 147     case 4:
 148         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 151         }
 152     case 3:
 153     case 2:
 154         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 155             ac->tags_mapped++;
 156             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 157         } else if (ac->m4ac.chan_config == 2) {
 158             return NULL;
 159         }
 160     case 1:
 161         if (!ac->tags_mapped && type == TYPE_SCE) {
 162             ac->tags_mapped++;
 163             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 164         }
 165     default:
 166         return NULL;
 167     }
 168 }
 169
 170 /**
 171  * Check for the channel element in the current channel position configuration.
 172  * If it exists, make sure the appropriate element is allocated and map the
 173  * channel order to match the internal FFmpeg channel layout.
 174  *
 175  * @param   che_pos current channel position configuration
 176  * @param   type channel element type
 177  * @param   id channel element id
 178  * @param   channels count of the number of channels in the configuration
 179  *
 180  * @return  Returns error status. 0 - OK, !0 - error
 181  */
 182 static av_cold int che_configure(AACContext *ac,
 183                                  enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 184                                  int type, int id, int *channels)
 185 {
 186     if (che_pos[type][id]) {
 187         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 188             return AVERROR(ENOMEM);
 189         ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
 190         if (type != TYPE_CCE) {
 191             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 192             if (type == TYPE_CPE ||
 193                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 194                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 195             }
 196         }
 197     } else {
 198         if (ac->che[type][id])
 199             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 200         av_freep(&ac->che[type][id]);
 201     }
 202     return 0;
 203 }
 204
 205 /**
 206  * Configure output channel order based on the current program configuration element.
 207  *
 208  * @param   che_pos current channel position configuration
 209  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 210  *
 211  * @return  Returns error status. 0 - OK, !0 - error
 212  */
 213 static av_cold int output_configure(AACContext *ac,
 214                                     enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 215                                     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 216                                     int channel_config, enum OCStatus oc_type)
 217 {
 218     AVCodecContext *avctx = ac->avctx;
 219     int i, type, channels = 0, ret;
 220
 221     if (new_che_pos != che_pos)
 222     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 223
 224     if (channel_config) {
 225         for (i = 0; i < tags_per_config[channel_config]; i++) {
 226             if ((ret = che_configure(ac, che_pos,
 227                                      aac_channel_layout_map[channel_config - 1][i][0],
 228                                      aac_channel_layout_map[channel_config - 1][i][1],
 229                                      &channels)))
 230                 return ret;
 231         }
 232
 233         memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 234
 235         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 236     } else {
 237         /* Allocate or free elements depending on if they are in the
 238          * current program configuration.
 239          *
 240          * Set up default 1:1 output mapping.
 241          *
 242          * For a 5.1 stream the output order will be:
 243          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 244          */
 245
 246         for (i = 0; i < MAX_ELEM_ID; i++) {
 247             for (type = 0; type < 4; type++) {
 248                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 249                     return ret;
 250             }
 251         }
 252
 253         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 254
 255         avctx->channel_layout = 0;
 256     }
 257
 258     avctx->channels = channels;
 259
 260     ac->output_configured = oc_type;
 261
 262     return 0;
 263 }
 264
 265 /**
 266  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 267  *
 268  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 269  * @param sce_map mono (Single Channel Element) map
 270  * @param type speaker type/position for these channels
 271  */
 272 static void decode_channel_map(enum ChannelPosition *cpe_map,
 273                                enum ChannelPosition *sce_map,
 274                                enum ChannelPosition type,
 275                                GetBitContext *gb, int n)
 276 {
 277     while (n--) {
 278         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 279         map[get_bits(gb, 4)] = type;
 280     }
 281 }
 282
 283 /**
 284  * Decode program configuration element; reference: table 4.2.
 285  *
 286  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 287  *
 288  * @return  Returns error status. 0 - OK, !0 - error
 289  */
 290 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
 291                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 292                       GetBitContext *gb)
 293 {
 294     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 295     int comment_len;
 296
 297     skip_bits(gb, 2);  // object_type
 298
 299     sampling_index = get_bits(gb, 4);
 300     if (m4ac->sampling_index != sampling_index)
 301         av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 302
 303     num_front       = get_bits(gb, 4);
 304     num_side        = get_bits(gb, 4);
 305     num_back        = get_bits(gb, 4);
 306     num_lfe         = get_bits(gb, 2);
 307     num_assoc_data  = get_bits(gb, 3);
 308     num_cc          = get_bits(gb, 4);
 309
 310     if (get_bits1(gb))
 311         skip_bits(gb, 4); // mono_mixdown_tag
 312     if (get_bits1(gb))
 313         skip_bits(gb, 4); // stereo_mixdown_tag
 314
 315     if (get_bits1(gb))
 316         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 317
 318     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 319     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 320     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 321     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 322
 323     skip_bits_long(gb, 4 * num_assoc_data);
 324
 325     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 326
 327     align_get_bits(gb);
 328
 329     /* comment field, first byte is length */
 330     comment_len = get_bits(gb, 8) * 8;
 331     if (get_bits_left(gb) < comment_len) {
 332         av_log(avctx, AV_LOG_ERROR, overread_err);
 333         return -1;
 334     }
 335     skip_bits_long(gb, comment_len);
 336     return 0;
 337 }
 338
 339 /**
 340  * Set up channel positions based on a default channel configuration
 341  * as specified in table 1.17.
 342  *
 343  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 344  *
 345  * @return  Returns error status. 0 - OK, !0 - error
 346  */
 347 static av_cold int set_default_channel_config(AVCodecContext *avctx,
 348                                               enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 349                                               int channel_config)
 350 {
 351     if (channel_config < 1 || channel_config > 7) {
 352         av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 353                channel_config);
 354         return -1;
 355     }
 356
 357     /* default channel configurations:
 358      *
 359      * 1ch : front center (mono)
 360      * 2ch : L + R (stereo)
 361      * 3ch : front center + L + R
 362      * 4ch : front center + L + R + back center
 363      * 5ch : front center + L + R + back stereo
 364      * 6ch : front center + L + R + back stereo + LFE
 365      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 366      */
 367
 368     if (channel_config != 2)
 369         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 370     if (channel_config > 1)
 371         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 372     if (channel_config == 4)
 373         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 374     if (channel_config > 4)
 375         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 376         = AAC_CHANNEL_BACK;  // back stereo
 377     if (channel_config > 5)
 378         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 379     if (channel_config == 7)
 380         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 381
 382     return 0;
 383 }
 384
 385 /**
 386  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 387  *
 388  * @param   ac          pointer to AACContext, may be null
 389  * @param   avctx       pointer to AVCCodecContext, used for logging
 390  *
 391  * @return  Returns error status. 0 - OK, !0 - error
 392  */
 393 static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
 394                                      GetBitContext *gb,
 395                                      MPEG4AudioConfig *m4ac,
 396                                      int channel_config)
 397 {
 398     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 399     int extension_flag, ret;
 400
 401     if (get_bits1(gb)) { // frameLengthFlag
 402         av_log_missing_feature(avctx, "960/120 MDCT window is", 1);
 403         return -1;
 404     }
 405
 406     if (get_bits1(gb))       // dependsOnCoreCoder
 407         skip_bits(gb, 14);   // coreCoderDelay
 408     extension_flag = get_bits1(gb);
 409
 410     if (m4ac->object_type == AOT_AAC_SCALABLE ||
 411         m4ac->object_type == AOT_ER_AAC_SCALABLE)
 412         skip_bits(gb, 3);     // layerNr
 413
 414     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 415     if (channel_config == 0) {
 416         skip_bits(gb, 4);  // element_instance_tag
 417         if ((ret = decode_pce(avctx, m4ac, new_che_pos, gb)))
 418             return ret;
 419     } else {
 420         if ((ret = set_default_channel_config(avctx, new_che_pos, channel_config)))
 421             return ret;
 422     }
 423     if (ac && (ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 424         return ret;
 425
 426     if (extension_flag) {
 427         switch (m4ac->object_type) {
 428         case AOT_ER_BSAC:
 429             skip_bits(gb, 5);    // numOfSubFrame
 430             skip_bits(gb, 11);   // layer_length
 431             break;
 432         case AOT_ER_AAC_LC:
 433         case AOT_ER_AAC_LTP:
 434         case AOT_ER_AAC_SCALABLE:
 435         case AOT_ER_AAC_LD:
 436             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 437                                     * aacScalefactorDataResilienceFlag
 438                                     * aacSpectralDataResilienceFlag
 439                                     */
 440             break;
 441         }
 442         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 443     }
 444     return 0;
 445 }
 446
 447 /**
 448  * Decode audio specific configuration; reference: table 1.13.
 449  *
 450  * @param   ac          pointer to AACContext, may be null
 451  * @param   avctx       pointer to AVCCodecContext, used for logging
 452  * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
 453  * @param   data        pointer to AVCodecContext extradata
 454  * @param   data_size   size of AVCCodecContext extradata
 455  *
 456  * @return  Returns error status or number of consumed bits. <0 - error
 457  */
 458 static int decode_audio_specific_config(AACContext *ac,
 459                                         AVCodecContext *avctx,
 460                                         MPEG4AudioConfig *m4ac,
 461                                         const uint8_t *data, int data_size)
 462 {
 463     GetBitContext gb;
 464     int i;
 465
 466     av_dlog(avctx, "extradata size %d\n", avctx->extradata_size);
 467     for (i = 0; i < avctx->extradata_size; i++)
 468          av_dlog(avctx, "%02x ", avctx->extradata[i]);
 469     av_dlog(avctx, "\n");
 470
 471     init_get_bits(&gb, data, data_size * 8);
 472
 473     if ((i = ff_mpeg4audio_get_config(m4ac, data, data_size)) < 0)
 474         return -1;
 475     if (m4ac->sampling_index > 12) {
 476         av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
 477         return -1;
 478     }
 479     if (m4ac->sbr == 1 && m4ac->ps == -1)
 480         m4ac->ps = 1;
 481
 482     skip_bits_long(&gb, i);
 483
 484     switch (m4ac->object_type) {
 485     case AOT_AAC_MAIN:
 486     case AOT_AAC_LC:
 487     case AOT_AAC_LTP:
 488         if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
 489             return -1;
 490         break;
 491     default:
 492         av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 493                m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
 494         return -1;
 495     }
 496
 497     av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
 498             m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
 499             m4ac->sample_rate, m4ac->sbr, m4ac->ps);
 500
 501     return get_bits_count(&gb);
 502 }
 503
 504 /**
 505  * linear congruential pseudorandom number generator
 506  *
 507  * @param   previous_val    pointer to the current state of the generator
 508  *
 509  * @return  Returns a 32-bit pseudorandom integer
 510  */
 511 static av_always_inline int lcg_random(int previous_val)
 512 {
 513     return previous_val * 1664525 + 1013904223;
 514 }
 515
 516 static av_always_inline void reset_predict_state(PredictorState *ps)
 517 {
 518     ps->r0   = 0.0f;
 519     ps->r1   = 0.0f;
 520     ps->cor0 = 0.0f;
 521     ps->cor1 = 0.0f;
 522     ps->var0 = 1.0f;
 523     ps->var1 = 1.0f;
 524 }
 525
 526 static void reset_all_predictors(PredictorState *ps)
 527 {
 528     int i;
 529     for (i = 0; i < MAX_PREDICTORS; i++)
 530         reset_predict_state(&ps[i]);
 531 }
 532
 533 static void reset_predictor_group(PredictorState *ps, int group_num)
 534 {
 535     int i;
 536     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 537         reset_predict_state(&ps[i]);
 538 }
 539
 540 #define AAC_INIT_VLC_STATIC(num, size) \
 541     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 542          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 543         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 544         size);
 545
 546 static av_cold int aac_decode_init(AVCodecContext *avctx)
 547 {
 548     AACContext *ac = avctx->priv_data;
 549
 550     ac->avctx = avctx;
 551     ac->m4ac.sample_rate = avctx->sample_rate;
 552
 553     if (avctx->extradata_size > 0) {
 554         if (decode_audio_specific_config(ac, ac->avctx, &ac->m4ac,
 555                                          avctx->extradata,
 556                                          avctx->extradata_size) < 0)
 557             return -1;
 558     }
 559
 560     avctx->sample_fmt = avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT ?
 561                         AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16;
 562
 563     AAC_INIT_VLC_STATIC( 0, 304);
 564     AAC_INIT_VLC_STATIC( 1, 270);
 565     AAC_INIT_VLC_STATIC( 2, 550);
 566     AAC_INIT_VLC_STATIC( 3, 300);
 567     AAC_INIT_VLC_STATIC( 4, 328);
 568     AAC_INIT_VLC_STATIC( 5, 294);
 569     AAC_INIT_VLC_STATIC( 6, 306);
 570     AAC_INIT_VLC_STATIC( 7, 268);
 571     AAC_INIT_VLC_STATIC( 8, 510);
 572     AAC_INIT_VLC_STATIC( 9, 366);
 573     AAC_INIT_VLC_STATIC(10, 462);
 574
 575     ff_aac_sbr_init();
 576
 577     dsputil_init(&ac->dsp, avctx);
 578     ff_fmt_convert_init(&ac->fmt_conv, avctx);
 579
 580     ac->random_state = 0x1f2e3d4c;
 581
 582     ff_aac_tableinit();
 583
 584     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 585                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 586                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 587                     352);
 588
 589     ff_mdct_init(&ac->mdct,       11, 1, 1.0/1024.0);
 590     ff_mdct_init(&ac->mdct_small,  8, 1, 1.0/128.0);
 591     ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0);
 592     // window initialization
 593     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 594     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 595     ff_init_ff_sine_windows(10);
 596     ff_init_ff_sine_windows( 7);
 597
 598     cbrt_tableinit();
 599
 600     return 0;
 601 }
 602
 603 /**
 604  * Skip data_stream_element; reference: table 4.10.
 605  */
 606 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 607 {
 608     int byte_align = get_bits1(gb);
 609     int count = get_bits(gb, 8);
 610     if (count == 255)
 611         count += get_bits(gb, 8);
 612     if (byte_align)
 613         align_get_bits(gb);
 614
 615     if (get_bits_left(gb) < 8 * count) {
 616         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 617         return -1;
 618     }
 619     skip_bits_long(gb, 8 * count);
 620     return 0;
 621 }
 622
 623 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 624                              GetBitContext *gb)
 625 {
 626     int sfb;
 627     if (get_bits1(gb)) {
 628         ics->predictor_reset_group = get_bits(gb, 5);
 629         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 630             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 631             return -1;
 632         }
 633     }
 634     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 635         ics->prediction_used[sfb] = get_bits1(gb);
 636     }
 637     return 0;
 638 }
 639
 640 /**
 641  * Decode Long Term Prediction data; reference: table 4.xx.
 642  */
 643 static void decode_ltp(AACContext *ac, LongTermPrediction *ltp,
 644                        GetBitContext *gb, uint8_t max_sfb)
 645 {
 646     int sfb;
 647
 648     ltp->lag  = get_bits(gb, 11);
 649     ltp->coef = ltp_coef[get_bits(gb, 3)];
 650     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
 651         ltp->used[sfb] = get_bits1(gb);
 652 }
 653
 654 /**
 655  * Decode Individual Channel Stream info; reference: table 4.6.
 656  *
 657  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 658  */
 659 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 660                            GetBitContext *gb, int common_window)
 661 {
 662     if (get_bits1(gb)) {
 663         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 664         memset(ics, 0, sizeof(IndividualChannelStream));
 665         return -1;
 666     }
 667     ics->window_sequence[1] = ics->window_sequence[0];
 668     ics->window_sequence[0] = get_bits(gb, 2);
 669     ics->use_kb_window[1]   = ics->use_kb_window[0];
 670     ics->use_kb_window[0]   = get_bits1(gb);
 671     ics->num_window_groups  = 1;
 672     ics->group_len[0]       = 1;
 673     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 674         int i;
 675         ics->max_sfb = get_bits(gb, 4);
 676         for (i = 0; i < 7; i++) {
 677             if (get_bits1(gb)) {
 678                 ics->group_len[ics->num_window_groups - 1]++;
 679             } else {
 680                 ics->num_window_groups++;
 681                 ics->group_len[ics->num_window_groups - 1] = 1;
 682             }
 683         }
 684         ics->num_windows       = 8;
 685         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 686         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 687         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 688         ics->predictor_present = 0;
 689     } else {
 690         ics->max_sfb               = get_bits(gb, 6);
 691         ics->num_windows           = 1;
 692         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 693         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 694         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 695         ics->predictor_present     = get_bits1(gb);
 696         ics->predictor_reset_group = 0;
 697         if (ics->predictor_present) {
 698             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 699                 if (decode_prediction(ac, ics, gb)) {
 700                     memset(ics, 0, sizeof(IndividualChannelStream));
 701                     return -1;
 702                 }
 703             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 704                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 705                 memset(ics, 0, sizeof(IndividualChannelStream));
 706                 return -1;
 707             } else {
 708                 if ((ics->ltp.present = get_bits(gb, 1)))
 709                     decode_ltp(ac, &ics->ltp, gb, ics->max_sfb);
 710             }
 711         }
 712     }
 713
 714     if (ics->max_sfb > ics->num_swb) {
 715         av_log(ac->avctx, AV_LOG_ERROR,
 716                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 717                ics->max_sfb, ics->num_swb);
 718         memset(ics, 0, sizeof(IndividualChannelStream));
 719         return -1;
 720     }
 721
 722     return 0;
 723 }
 724
 725 /**
 726  * Decode band types (section_data payload); reference: table 4.46.
 727  *
 728  * @param   band_type           array of the used band type
 729  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 730  *
 731  * @return  Returns error status. 0 - OK, !0 - error
 732  */
 733 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 734                              int band_type_run_end[120], GetBitContext *gb,
 735                              IndividualChannelStream *ics)
 736 {
 737     int g, idx = 0;
 738     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 739     for (g = 0; g < ics->num_window_groups; g++) {
 740         int k = 0;
 741         while (k < ics->max_sfb) {
 742             uint8_t sect_end = k;
 743             int sect_len_incr;
 744             int sect_band_type = get_bits(gb, 4);
 745             if (sect_band_type == 12) {
 746                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 747                 return -1;
 748             }
 749             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 750                 sect_end += sect_len_incr;
 751             sect_end += sect_len_incr;
 752             if (get_bits_left(gb) < 0) {
 753                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 754                 return -1;
 755             }
 756             if (sect_end > ics->max_sfb) {
 757                 av_log(ac->avctx, AV_LOG_ERROR,
 758                        "Number of bands (%d) exceeds limit (%d).\n",
 759                        sect_end, ics->max_sfb);
 760                 return -1;
 761             }
 762             for (; k < sect_end; k++) {
 763                 band_type        [idx]   = sect_band_type;
 764                 band_type_run_end[idx++] = sect_end;
 765             }
 766         }
 767     }
 768     return 0;
 769 }
 770
 771 /**
 772  * Decode scalefactors; reference: table 4.47.
 773  *
 774  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 775  * @param   band_type           array of the used band type
 776  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 777  * @param   sf                  array of scalefactors or intensity stereo positions
 778  *
 779  * @return  Returns error status. 0 - OK, !0 - error
 780  */
 781 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 782                                unsigned int global_gain,
 783                                IndividualChannelStream *ics,
 784                                enum BandType band_type[120],
 785                                int band_type_run_end[120])
 786 {
 787     int g, i, idx = 0;
 788     int offset[3] = { global_gain, global_gain - 90, 0 };
 789     int clipped_offset;
 790     int noise_flag = 1;
 791     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 792     for (g = 0; g < ics->num_window_groups; g++) {
 793         for (i = 0; i < ics->max_sfb;) {
 794             int run_end = band_type_run_end[idx];
 795             if (band_type[idx] == ZERO_BT) {
 796                 for (; i < run_end; i++, idx++)
 797                     sf[idx] = 0.;
 798             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 799                 for (; i < run_end; i++, idx++) {
 800                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 801                     clipped_offset = av_clip(offset[2], -155, 100);
 802                     if (offset[2] != clipped_offset) {
 803                         av_log_ask_for_sample(ac->avctx, "Intensity stereo "
 804                                 "position clipped (%d -> %d).\nIf you heard an "
 805                                 "audible artifact, there may be a bug in the "
 806                                 "decoder. ", offset[2], clipped_offset);
 807                     }
 808                     sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
 809                 }
 810             } else if (band_type[idx] == NOISE_BT) {
 811                 for (; i < run_end; i++, idx++) {
 812                     if (noise_flag-- > 0)
 813                         offset[1] += get_bits(gb, 9) - 256;
 814                     else
 815                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 816                     clipped_offset = av_clip(offset[1], -100, 155);
 817                     if (offset[2] != clipped_offset) {
 818                         av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
 819                                 "(%d -> %d).\nIf you heard an audible "
 820                                 "artifact, there may be a bug in the decoder. ",
 821                                 offset[1], clipped_offset);
 822                     }
 823                     sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
 824                 }
 825             } else {
 826                 for (; i < run_end; i++, idx++) {
 827                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 828                     if (offset[0] > 255U) {
 829                         av_log(ac->avctx, AV_LOG_ERROR,
 830                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 831                         return -1;
 832                     }
 833                     sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
 834                 }
 835             }
 836         }
 837     }
 838     return 0;
 839 }
 840
 841 /**
 842  * Decode pulse data; reference: table 4.7.
 843  */
 844 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 845                          const uint16_t *swb_offset, int num_swb)
 846 {
 847     int i, pulse_swb;
 848     pulse->num_pulse = get_bits(gb, 2) + 1;
 849     pulse_swb        = get_bits(gb, 6);
 850     if (pulse_swb >= num_swb)
 851         return -1;
 852     pulse->pos[0]    = swb_offset[pulse_swb];
 853     pulse->pos[0]   += get_bits(gb, 5);
 854     if (pulse->pos[0] > 1023)
 855         return -1;
 856     pulse->amp[0]    = get_bits(gb, 4);
 857     for (i = 1; i < pulse->num_pulse; i++) {
 858         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 859         if (pulse->pos[i] > 1023)
 860             return -1;
 861         pulse->amp[i] = get_bits(gb, 4);
 862     }
 863     return 0;
 864 }
 865
 866 /**
 867  * Decode Temporal Noise Shaping data; reference: table 4.48.
 868  *
 869  * @return  Returns error status. 0 - OK, !0 - error
 870  */
 871 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 872                       GetBitContext *gb, const IndividualChannelStream *ics)
 873 {
 874     int w, filt, i, coef_len, coef_res, coef_compress;
 875     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 876     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 877     for (w = 0; w < ics->num_windows; w++) {
 878         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 879             coef_res = get_bits1(gb);
 880
 881             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 882                 int tmp2_idx;
 883                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 884
 885                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 886                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 887                            tns->order[w][filt], tns_max_order);
 888                     tns->order[w][filt] = 0;
 889                     return -1;
 890                 }
 891                 if (tns->order[w][filt]) {
 892                     tns->direction[w][filt] = get_bits1(gb);
 893                     coef_compress = get_bits1(gb);
 894                     coef_len = coef_res + 3 - coef_compress;
 895                     tmp2_idx = 2 * coef_compress + coef_res;
 896
 897                     for (i = 0; i < tns->order[w][filt]; i++)
 898                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 899                 }
 900             }
 901         }
 902     }
 903     return 0;
 904 }
 905
 906 /**
 907  * Decode Mid/Side data; reference: table 4.54.
 908  *
 909  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 910  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 911  *                      [3] reserved for scalable AAC
 912  */
 913 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 914                                    int ms_present)
 915 {
 916     int idx;
 917     if (ms_present == 1) {
 918         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 919             cpe->ms_mask[idx] = get_bits1(gb);
 920     } else if (ms_present == 2) {
 921         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 922     }
 923 }
 924
 925 #ifndef VMUL2
 926 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 927                            const float *scale)
 928 {
 929     float s = *scale;
 930     *dst++ = v[idx    & 15] * s;
 931     *dst++ = v[idx>>4 & 15] * s;
 932     return dst;
 933 }
 934 #endif
 935
 936 #ifndef VMUL4
 937 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 938                            const float *scale)
 939 {
 940     float s = *scale;
 941     *dst++ = v[idx    & 3] * s;
 942     *dst++ = v[idx>>2 & 3] * s;
 943     *dst++ = v[idx>>4 & 3] * s;
 944     *dst++ = v[idx>>6 & 3] * s;
 945     return dst;
 946 }
 947 #endif
 948
 949 #ifndef VMUL2S
 950 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 951                             unsigned sign, const float *scale)
 952 {
 953     union float754 s0, s1;
 954
 955     s0.f = s1.f = *scale;
 956     s0.i ^= sign >> 1 << 31;
 957     s1.i ^= sign      << 31;
 958
 959     *dst++ = v[idx    & 15] * s0.f;
 960     *dst++ = v[idx>>4 & 15] * s1.f;
 961
 962     return dst;
 963 }
 964 #endif
 965
 966 #ifndef VMUL4S
 967 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 968                             unsigned sign, const float *scale)
 969 {
 970     unsigned nz = idx >> 12;
 971     union float754 s = { .f = *scale };
 972     union float754 t;
 973
 974     t.i = s.i ^ (sign & 1U<<31);
 975     *dst++ = v[idx    & 3] * t.f;
 976
 977     sign <<= nz & 1; nz >>= 1;
 978     t.i = s.i ^ (sign & 1U<<31);
 979     *dst++ = v[idx>>2 & 3] * t.f;
 980
 981     sign <<= nz & 1; nz >>= 1;
 982     t.i = s.i ^ (sign & 1U<<31);
 983     *dst++ = v[idx>>4 & 3] * t.f;
 984
 985     sign <<= nz & 1; nz >>= 1;
 986     t.i = s.i ^ (sign & 1U<<31);
 987     *dst++ = v[idx>>6 & 3] * t.f;
 988
 989     return dst;
 990 }
 991 #endif
 992
 993 /**
 994  * Decode spectral data; reference: table 4.50.
 995  * Dequantize and scale spectral data; reference: 4.6.3.3.
 996  *
 997  * @param   coef            array of dequantized, scaled spectral data
 998  * @param   sf              array of scalefactors or intensity stereo positions
 999  * @param   pulse_present   set if pulses are present
1000  * @param   pulse           pointer to pulse data struct
1001  * @param   band_type       array of the used band type
1002  *
1003  * @return  Returns error status. 0 - OK, !0 - error
1004  */
1005 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1006                                        GetBitContext *gb, const float sf[120],
1007                                        int pulse_present, const Pulse *pulse,
1008                                        const IndividualChannelStream *ics,
1009                                        enum BandType band_type[120])
1010 {
1011     int i, k, g, idx = 0;
1012     const int c = 1024 / ics->num_windows;
1013     const uint16_t *offsets = ics->swb_offset;
1014     float *coef_base = coef;
1015
1016     for (g = 0; g < ics->num_windows; g++)
1017         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1018
1019     for (g = 0; g < ics->num_window_groups; g++) {
1020         unsigned g_len = ics->group_len[g];
1021
1022         for (i = 0; i < ics->max_sfb; i++, idx++) {
1023             const unsigned cbt_m1 = band_type[idx] - 1;
1024             float *cfo = coef + offsets[i];
1025             int off_len = offsets[i + 1] - offsets[i];
1026             int group;
1027
1028             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1029                 for (group = 0; group < g_len; group++, cfo+=128) {
1030                     memset(cfo, 0, off_len * sizeof(float));
1031                 }
1032             } else if (cbt_m1 == NOISE_BT - 1) {
1033                 for (group = 0; group < g_len; group++, cfo+=128) {
1034                     float scale;
1035                     float band_energy;
1036
1037                     for (k = 0; k < off_len; k++) {
1038                         ac->random_state  = lcg_random(ac->random_state);
1039                         cfo[k] = ac->random_state;
1040                     }
1041
1042                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1043                     scale = sf[idx] / sqrtf(band_energy);
1044                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1045                 }
1046             } else {
1047                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1048                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1049                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1050                 OPEN_READER(re, gb);
1051
1052                 switch (cbt_m1 >> 1) {
1053                 case 0:
1054                     for (group = 0; group < g_len; group++, cfo+=128) {
1055                         float *cf = cfo;
1056                         int len = off_len;
1057
1058                         do {
1059                             int code;
1060                             unsigned cb_idx;
1061
1062                             UPDATE_CACHE(re, gb);
1063                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1064                             cb_idx = cb_vector_idx[code];
1065                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1066                         } while (len -= 4);
1067                     }
1068                     break;
1069
1070                 case 1:
1071                     for (group = 0; group < g_len; group++, cfo+=128) {
1072                         float *cf = cfo;
1073                         int len = off_len;
1074
1075                         do {
1076                             int code;
1077                             unsigned nnz;
1078                             unsigned cb_idx;
1079                             uint32_t bits;
1080
1081                             UPDATE_CACHE(re, gb);
1082                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1083                             cb_idx = cb_vector_idx[code];
1084                             nnz = cb_idx >> 8 & 15;
1085                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1086                             LAST_SKIP_BITS(re, gb, nnz);
1087                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1088                         } while (len -= 4);
1089                     }
1090                     break;
1091
1092                 case 2:
1093                     for (group = 0; group < g_len; group++, cfo+=128) {
1094                         float *cf = cfo;
1095                         int len = off_len;
1096
1097                         do {
1098                             int code;
1099                             unsigned cb_idx;
1100
1101                             UPDATE_CACHE(re, gb);
1102                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1103                             cb_idx = cb_vector_idx[code];
1104                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1105                         } while (len -= 2);
1106                     }
1107                     break;
1108
1109                 case 3:
1110                 case 4:
1111                     for (group = 0; group < g_len; group++, cfo+=128) {
1112                         float *cf = cfo;
1113                         int len = off_len;
1114
1115                         do {
1116                             int code;
1117                             unsigned nnz;
1118                             unsigned cb_idx;
1119                             unsigned sign;
1120
1121                             UPDATE_CACHE(re, gb);
1122                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1123                             cb_idx = cb_vector_idx[code];
1124                             nnz = cb_idx >> 8 & 15;
1125                             sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1126                             LAST_SKIP_BITS(re, gb, nnz);
1127                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1128                         } while (len -= 2);
1129                     }
1130                     break;
1131
1132                 default:
1133                     for (group = 0; group < g_len; group++, cfo+=128) {
1134                         float *cf = cfo;
1135                         uint32_t *icf = (uint32_t *) cf;
1136                         int len = off_len;
1137
1138                         do {
1139                             int code;
1140                             unsigned nzt, nnz;
1141                             unsigned cb_idx;
1142                             uint32_t bits;
1143                             int j;
1144
1145                             UPDATE_CACHE(re, gb);
1146                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1147
1148                             if (!code) {
1149                                 *icf++ = 0;
1150                                 *icf++ = 0;
1151                                 continue;
1152                             }
1153
1154                             cb_idx = cb_vector_idx[code];
1155                             nnz = cb_idx >> 12;
1156                             nzt = cb_idx >> 8;
1157                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1158                             LAST_SKIP_BITS(re, gb, nnz);
1159
1160                             for (j = 0; j < 2; j++) {
1161                                 if (nzt & 1<<j) {
1162                                     uint32_t b;
1163                                     int n;
1164                                     /* The total length of escape_sequence must be < 22 bits according
1165                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1166                                     UPDATE_CACHE(re, gb);
1167                                     b = GET_CACHE(re, gb);
1168                                     b = 31 - av_log2(~b);
1169
1170                                     if (b > 8) {
1171                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1172                                         return -1;
1173                                     }
1174
1175                                     SKIP_BITS(re, gb, b + 1);
1176                                     b += 4;
1177                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1178                                     LAST_SKIP_BITS(re, gb, b);
1179                                     *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1180                                     bits <<= 1;
1181                                 } else {
1182                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1183                                     *icf++ = (bits & 1U<<31) | v;
1184                                     bits <<= !!v;
1185                                 }
1186                                 cb_idx >>= 4;
1187                             }
1188                         } while (len -= 2);
1189
1190                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1191                     }
1192                 }
1193
1194                 CLOSE_READER(re, gb);
1195             }
1196         }
1197         coef += g_len << 7;
1198     }
1199
1200     if (pulse_present) {
1201         idx = 0;
1202         for (i = 0; i < pulse->num_pulse; i++) {
1203             float co = coef_base[ pulse->pos[i] ];
1204             while (offsets[idx + 1] <= pulse->pos[i])
1205                 idx++;
1206             if (band_type[idx] != NOISE_BT && sf[idx]) {
1207                 float ico = -pulse->amp[i];
1208                 if (co) {
1209                     co /= sf[idx];
1210                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1211                 }
1212                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1213             }
1214         }
1215     }
1216     return 0;
1217 }
1218
1219 static av_always_inline float flt16_round(float pf)
1220 {
1221     union float754 tmp;
1222     tmp.f = pf;
1223     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1224     return tmp.f;
1225 }
1226
1227 static av_always_inline float flt16_even(float pf)
1228 {
1229     union float754 tmp;
1230     tmp.f = pf;
1231     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1232     return tmp.f;
1233 }
1234
1235 static av_always_inline float flt16_trunc(float pf)
1236 {
1237     union float754 pun;
1238     pun.f = pf;
1239     pun.i &= 0xFFFF0000U;
1240     return pun.f;
1241 }
1242
1243 static av_always_inline void predict(PredictorState *ps, float *coef,
1244                                      int output_enable)
1245 {
1246     const float a     = 0.953125; // 61.0 / 64
1247     const float alpha = 0.90625;  // 29.0 / 32
1248     float e0, e1;
1249     float pv;
1250     float k1, k2;
1251     float   r0 = ps->r0,     r1 = ps->r1;
1252     float cor0 = ps->cor0, cor1 = ps->cor1;
1253     float var0 = ps->var0, var1 = ps->var1;
1254
1255     k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1256     k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1257
1258     pv = flt16_round(k1 * r0 + k2 * r1);
1259     if (output_enable)
1260         *coef += pv;
1261
1262     e0 = *coef;
1263     e1 = e0 - k1 * r0;
1264
1265     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1266     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1267     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1268     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1269
1270     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1271     ps->r0 = flt16_trunc(a * e0);
1272 }
1273
1274 /**
1275  * Apply AAC-Main style frequency domain prediction.
1276  */
1277 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1278 {
1279     int sfb, k;
1280
1281     if (!sce->ics.predictor_initialized) {
1282         reset_all_predictors(sce->predictor_state);
1283         sce->ics.predictor_initialized = 1;
1284     }
1285
1286     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1287         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1288             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1289                 predict(&sce->predictor_state[k], &sce->coeffs[k],
1290                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1291             }
1292         }
1293         if (sce->ics.predictor_reset_group)
1294             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1295     } else
1296         reset_all_predictors(sce->predictor_state);
1297 }
1298
1299 /**
1300  * Decode an individual_channel_stream payload; reference: table 4.44.
1301  *
1302  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1303  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1304  *
1305  * @return  Returns error status. 0 - OK, !0 - error
1306  */
1307 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1308                       GetBitContext *gb, int common_window, int scale_flag)
1309 {
1310     Pulse pulse;
1311     TemporalNoiseShaping    *tns = &sce->tns;
1312     IndividualChannelStream *ics = &sce->ics;
1313     float *out = sce->coeffs;
1314     int global_gain, pulse_present = 0;
1315
1316     /* This assignment is to silence a GCC warning about the variable being used
1317      * uninitialized when in fact it always is.
1318      */
1319     pulse.num_pulse = 0;
1320
1321     global_gain = get_bits(gb, 8);
1322
1323     if (!common_window && !scale_flag) {
1324         if (decode_ics_info(ac, ics, gb, 0) < 0)
1325             return -1;
1326     }
1327
1328     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1329         return -1;
1330     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1331         return -1;
1332
1333     pulse_present = 0;
1334     if (!scale_flag) {
1335         if ((pulse_present = get_bits1(gb))) {
1336             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1337                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1338                 return -1;
1339             }
1340             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1341                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1342                 return -1;
1343             }
1344         }
1345         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1346             return -1;
1347         if (get_bits1(gb)) {
1348             av_log_missing_feature(ac->avctx, "SSR", 1);
1349             return -1;
1350         }
1351     }
1352
1353     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1354         return -1;
1355
1356     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1357         apply_prediction(ac, sce);
1358
1359     return 0;
1360 }
1361
1362 /**
1363  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1364  */
1365 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1366 {
1367     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1368     float *ch0 = cpe->ch[0].coeffs;
1369     float *ch1 = cpe->ch[1].coeffs;
1370     int g, i, group, idx = 0;
1371     const uint16_t *offsets = ics->swb_offset;
1372     for (g = 0; g < ics->num_window_groups; g++) {
1373         for (i = 0; i < ics->max_sfb; i++, idx++) {
1374             if (cpe->ms_mask[idx] &&
1375                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1376                 for (group = 0; group < ics->group_len[g]; group++) {
1377                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1378                                               ch1 + group * 128 + offsets[i],
1379                                               offsets[i+1] - offsets[i]);
1380                 }
1381             }
1382         }
1383         ch0 += ics->group_len[g] * 128;
1384         ch1 += ics->group_len[g] * 128;
1385     }
1386 }
1387
1388 /**
1389  * intensity stereo decoding; reference: 4.6.8.2.3
1390  *
1391  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1392  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1393  *                      [3] reserved for scalable AAC
1394  */
1395 static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
1396 {
1397     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1398     SingleChannelElement         *sce1 = &cpe->ch[1];
1399     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1400     const uint16_t *offsets = ics->swb_offset;
1401     int g, group, i, idx = 0;
1402     int c;
1403     float scale;
1404     for (g = 0; g < ics->num_window_groups; g++) {
1405         for (i = 0; i < ics->max_sfb;) {
1406             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1407                 const int bt_run_end = sce1->band_type_run_end[idx];
1408                 for (; i < bt_run_end; i++, idx++) {
1409                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1410                     if (ms_present)
1411                         c *= 1 - 2 * cpe->ms_mask[idx];
1412                     scale = c * sce1->sf[idx];
1413                     for (group = 0; group < ics->group_len[g]; group++)
1414                         ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1415                                                    coef0 + group * 128 + offsets[i],
1416                                                    scale,
1417                                                    offsets[i + 1] - offsets[i]);
1418                 }
1419             } else {
1420                 int bt_run_end = sce1->band_type_run_end[idx];
1421                 idx += bt_run_end - i;
1422                 i    = bt_run_end;
1423             }
1424         }
1425         coef0 += ics->group_len[g] * 128;
1426         coef1 += ics->group_len[g] * 128;
1427     }
1428 }
1429
1430 /**
1431  * Decode a channel_pair_element; reference: table 4.4.
1432  *
1433  * @return  Returns error status. 0 - OK, !0 - error
1434  */
1435 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1436 {
1437     int i, ret, common_window, ms_present = 0;
1438
1439     common_window = get_bits1(gb);
1440     if (common_window) {
1441         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1442             return -1;
1443         i = cpe->ch[1].ics.use_kb_window[0];
1444         cpe->ch[1].ics = cpe->ch[0].ics;
1445         cpe->ch[1].ics.use_kb_window[1] = i;
1446         if (cpe->ch[1].ics.predictor_present && (ac->m4ac.object_type != AOT_AAC_MAIN))
1447             if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1448                 decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1449         ms_present = get_bits(gb, 2);
1450         if (ms_present == 3) {
1451             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1452             return -1;
1453         } else if (ms_present)
1454             decode_mid_side_stereo(cpe, gb, ms_present);
1455     }
1456     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1457         return ret;
1458     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1459         return ret;
1460
1461     if (common_window) {
1462         if (ms_present)
1463             apply_mid_side_stereo(ac, cpe);
1464         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1465             apply_prediction(ac, &cpe->ch[0]);
1466             apply_prediction(ac, &cpe->ch[1]);
1467         }
1468     }
1469
1470     apply_intensity_stereo(ac, cpe, ms_present);
1471     return 0;
1472 }
1473
1474 static const float cce_scale[] = {
1475     1.09050773266525765921, //2^(1/8)
1476     1.18920711500272106672, //2^(1/4)
1477     M_SQRT2,
1478     2,
1479 };
1480
1481 /**
1482  * Decode coupling_channel_element; reference: table 4.8.
1483  *
1484  * @return  Returns error status. 0 - OK, !0 - error
1485  */
1486 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1487 {
1488     int num_gain = 0;
1489     int c, g, sfb, ret;
1490     int sign;
1491     float scale;
1492     SingleChannelElement *sce = &che->ch[0];
1493     ChannelCoupling     *coup = &che->coup;
1494
1495     coup->coupling_point = 2 * get_bits1(gb);
1496     coup->num_coupled = get_bits(gb, 3);
1497     for (c = 0; c <= coup->num_coupled; c++) {
1498         num_gain++;
1499         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1500         coup->id_select[c] = get_bits(gb, 4);
1501         if (coup->type[c] == TYPE_CPE) {
1502             coup->ch_select[c] = get_bits(gb, 2);
1503             if (coup->ch_select[c] == 3)
1504                 num_gain++;
1505         } else
1506             coup->ch_select[c] = 2;
1507     }
1508     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1509
1510     sign  = get_bits(gb, 1);
1511     scale = cce_scale[get_bits(gb, 2)];
1512
1513     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1514         return ret;
1515
1516     for (c = 0; c < num_gain; c++) {
1517         int idx  = 0;
1518         int cge  = 1;
1519         int gain = 0;
1520         float gain_cache = 1.;
1521         if (c) {
1522             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1523             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1524             gain_cache = powf(scale, -gain);
1525         }
1526         if (coup->coupling_point == AFTER_IMDCT) {
1527             coup->gain[c][0] = gain_cache;
1528         } else {
1529             for (g = 0; g < sce->ics.num_window_groups; g++) {
1530                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1531                     if (sce->band_type[idx] != ZERO_BT) {
1532                         if (!cge) {
1533                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1534                             if (t) {
1535                                 int s = 1;
1536                                 t = gain += t;
1537                                 if (sign) {
1538                                     s  -= 2 * (t & 0x1);
1539                                     t >>= 1;
1540                                 }
1541                                 gain_cache = powf(scale, -t) * s;
1542                             }
1543                         }
1544                         coup->gain[c][idx] = gain_cache;
1545                     }
1546                 }
1547             }
1548         }
1549     }
1550     return 0;
1551 }
1552
1553 /**
1554  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1555  *
1556  * @return  Returns number of bytes consumed.
1557  */
1558 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1559                                          GetBitContext *gb)
1560 {
1561     int i;
1562     int num_excl_chan = 0;
1563
1564     do {
1565         for (i = 0; i < 7; i++)
1566             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1567     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1568
1569     return num_excl_chan / 7;
1570 }
1571
1572 /**
1573  * Decode dynamic range information; reference: table 4.52.
1574  *
1575  * @param   cnt length of TYPE_FIL syntactic element in bytes
1576  *
1577  * @return  Returns number of bytes consumed.
1578  */
1579 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1580                                 GetBitContext *gb, int cnt)
1581 {
1582     int n             = 1;
1583     int drc_num_bands = 1;
1584     int i;
1585
1586     /* pce_tag_present? */
1587     if (get_bits1(gb)) {
1588         che_drc->pce_instance_tag  = get_bits(gb, 4);
1589         skip_bits(gb, 4); // tag_reserved_bits
1590         n++;
1591     }
1592
1593     /* excluded_chns_present? */
1594     if (get_bits1(gb)) {
1595         n += decode_drc_channel_exclusions(che_drc, gb);
1596     }
1597
1598     /* drc_bands_present? */
1599     if (get_bits1(gb)) {
1600         che_drc->band_incr            = get_bits(gb, 4);
1601         che_drc->interpolation_scheme = get_bits(gb, 4);
1602         n++;
1603         drc_num_bands += che_drc->band_incr;
1604         for (i = 0; i < drc_num_bands; i++) {
1605             che_drc->band_top[i] = get_bits(gb, 8);
1606             n++;
1607         }
1608     }
1609
1610     /* prog_ref_level_present? */
1611     if (get_bits1(gb)) {
1612         che_drc->prog_ref_level = get_bits(gb, 7);
1613         skip_bits1(gb); // prog_ref_level_reserved_bits
1614         n++;
1615     }
1616
1617     for (i = 0; i < drc_num_bands; i++) {
1618         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1619         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1620         n++;
1621     }
1622
1623     return n;
1624 }
1625
1626 /**
1627  * Decode extension data (incomplete); reference: table 4.51.
1628  *
1629  * @param   cnt length of TYPE_FIL syntactic element in bytes
1630  *
1631  * @return Returns number of bytes consumed
1632  */
1633 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1634                                     ChannelElement *che, enum RawDataBlockType elem_type)
1635 {
1636     int crc_flag = 0;
1637     int res = cnt;
1638     switch (get_bits(gb, 4)) { // extension type
1639     case EXT_SBR_DATA_CRC:
1640         crc_flag++;
1641     case EXT_SBR_DATA:
1642         if (!che) {
1643             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1644             return res;
1645         } else if (!ac->m4ac.sbr) {
1646             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1647             skip_bits_long(gb, 8 * cnt - 4);
1648             return res;
1649         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1650             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1651             skip_bits_long(gb, 8 * cnt - 4);
1652             return res;
1653         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1654             ac->m4ac.sbr = 1;
1655             ac->m4ac.ps = 1;
1656             output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1657         } else {
1658             ac->m4ac.sbr = 1;
1659         }
1660         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1661         break;
1662     case EXT_DYNAMIC_RANGE:
1663         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1664         break;
1665     case EXT_FILL:
1666     case EXT_FILL_DATA:
1667     case EXT_DATA_ELEMENT:
1668     default:
1669         skip_bits_long(gb, 8 * cnt - 4);
1670         break;
1671     };
1672     return res;
1673 }
1674
1675 /**
1676  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1677  *
1678  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1679  * @param   coef    spectral coefficients
1680  */
1681 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1682                       IndividualChannelStream *ics, int decode)
1683 {
1684     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1685     int w, filt, m, i;
1686     int bottom, top, order, start, end, size, inc;
1687     float lpc[TNS_MAX_ORDER];
1688     float tmp[TNS_MAX_ORDER];
1689
1690     for (w = 0; w < ics->num_windows; w++) {
1691         bottom = ics->num_swb;
1692         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1693             top    = bottom;
1694             bottom = FFMAX(0, top - tns->length[w][filt]);
1695             order  = tns->order[w][filt];
1696             if (order == 0)
1697                 continue;
1698
1699             // tns_decode_coef
1700             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1701
1702             start = ics->swb_offset[FFMIN(bottom, mmm)];
1703             end   = ics->swb_offset[FFMIN(   top, mmm)];
1704             if ((size = end - start) <= 0)
1705                 continue;
1706             if (tns->direction[w][filt]) {
1707                 inc = -1;
1708                 start = end - 1;
1709             } else {
1710                 inc = 1;
1711             }
1712             start += w * 128;
1713
1714             if (decode) {
1715                 // ar filter
1716                 for (m = 0; m < size; m++, start += inc)
1717                     for (i = 1; i <= FFMIN(m, order); i++)
1718                         coef[start] -= coef[start - i * inc] * lpc[i - 1];
1719             } else {
1720                 // ma filter
1721                 for (m = 0; m < size; m++, start += inc) {
1722                     tmp[0] = coef[start];
1723                     for (i = 1; i <= FFMIN(m, order); i++)
1724                         coef[start] += tmp[i] * lpc[i - 1];
1725                     for (i = order; i > 0; i--)
1726                         tmp[i] = tmp[i - 1];
1727                 }
1728             }
1729         }
1730     }
1731 }
1732
1733 /**
1734  *  Apply windowing and MDCT to obtain the spectral
1735  *  coefficient from the predicted sample by LTP.
1736  */
1737 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
1738                                    float *in, IndividualChannelStream *ics)
1739 {
1740     const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1741     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1742     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1743     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1744
1745     if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
1746         ac->dsp.vector_fmul(in, in, lwindow_prev, 1024);
1747     } else {
1748         memset(in, 0, 448 * sizeof(float));
1749         ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
1750         memcpy(in + 576, in + 576, 448 * sizeof(float));
1751     }
1752     if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
1753         ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
1754     } else {
1755         memcpy(in + 1024, in + 1024, 448 * sizeof(float));
1756         ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
1757         memset(in + 1024 + 576, 0, 448 * sizeof(float));
1758     }
1759     ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
1760 }
1761
1762 /**
1763  * Apply the long term prediction
1764  */
1765 static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
1766 {
1767     const LongTermPrediction *ltp = &sce->ics.ltp;
1768     const uint16_t *offsets = sce->ics.swb_offset;
1769     int i, sfb;
1770
1771     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1772         float *predTime = sce->ret;
1773         float *predFreq = ac->buf_mdct;
1774         int16_t num_samples = 2048;
1775
1776         if (ltp->lag < 1024)
1777             num_samples = ltp->lag + 1024;
1778         for (i = 0; i < num_samples; i++)
1779             predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
1780         memset(&predTime[i], 0, (2048 - i) * sizeof(float));
1781
1782         windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
1783
1784         if (sce->tns.present)
1785             apply_tns(predFreq, &sce->tns, &sce->ics, 0);
1786
1787         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
1788             if (ltp->used[sfb])
1789                 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
1790                     sce->coeffs[i] += predFreq[i];
1791     }
1792 }
1793
1794 /**
1795  * Update the LTP buffer for next frame
1796  */
1797 static void update_ltp(AACContext *ac, SingleChannelElement *sce)
1798 {
1799     IndividualChannelStream *ics = &sce->ics;
1800     float *saved     = sce->saved;
1801     float *saved_ltp = sce->coeffs;
1802     const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1803     const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1804     int i;
1805
1806     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1807         memcpy(saved_ltp,       saved, 512 * sizeof(float));
1808         memset(saved_ltp + 576, 0,     448 * sizeof(float));
1809         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
1810         for (i = 0; i < 64; i++)
1811             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
1812     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1813         memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float));
1814         memset(saved_ltp + 576, 0,                  448 * sizeof(float));
1815         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
1816         for (i = 0; i < 64; i++)
1817             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
1818     } else { // LONG_STOP or ONLY_LONG
1819         ac->dsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
1820         for (i = 0; i < 512; i++)
1821             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
1822     }
1823
1824     memcpy(sce->ltp_state, &sce->ltp_state[1024], 1024 * sizeof(int16_t));
1825     ac->fmt_conv.float_to_int16(&(sce->ltp_state[1024]), sce->ret,  1024);
1826     ac->fmt_conv.float_to_int16(&(sce->ltp_state[2048]), saved_ltp, 1024);
1827 }
1828
1829 /**
1830  * Conduct IMDCT and windowing.
1831  */
1832 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
1833 {
1834     IndividualChannelStream *ics = &sce->ics;
1835     float *in    = sce->coeffs;
1836     float *out   = sce->ret;
1837     float *saved = sce->saved;
1838     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1839     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1840     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1841     float *buf  = ac->buf_mdct;
1842     float *temp = ac->temp;
1843     int i;
1844
1845     // imdct
1846     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1847         for (i = 0; i < 1024; i += 128)
1848             ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
1849     } else
1850         ac->mdct.imdct_half(&ac->mdct, buf, in);
1851
1852     /* window overlapping
1853      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1854      * and long to short transitions are considered to be short to short
1855      * transitions. This leaves just two cases (long to long and short to short)
1856      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1857      */
1858     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1859             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1860         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
1861     } else {
1862         memcpy(                        out,               saved,            448 * sizeof(float));
1863
1864         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1865             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
1866             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
1867             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
1868             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
1869             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
1870             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1871         } else {
1872             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
1873             memcpy(                    out + 576,         buf + 64,         448 * sizeof(float));
1874         }
1875     }
1876
1877     // buffer update
1878     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1879         memcpy(                    saved,       temp + 64,         64 * sizeof(float));
1880         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
1881         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
1882         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
1883         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1884     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1885         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1886         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1887     } else { // LONG_STOP or ONLY_LONG
1888         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1889     }
1890 }
1891
1892 /**
1893  * Apply dependent channel coupling (applied before IMDCT).
1894  *
1895  * @param   index   index into coupling gain array
1896  */
1897 static void apply_dependent_coupling(AACContext *ac,
1898                                      SingleChannelElement *target,
1899                                      ChannelElement *cce, int index)
1900 {
1901     IndividualChannelStream *ics = &cce->ch[0].ics;
1902     const uint16_t *offsets = ics->swb_offset;
1903     float *dest = target->coeffs;
1904     const float *src = cce->ch[0].coeffs;
1905     int g, i, group, k, idx = 0;
1906     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1907         av_log(ac->avctx, AV_LOG_ERROR,
1908                "Dependent coupling is not supported together with LTP\n");
1909         return;
1910     }
1911     for (g = 0; g < ics->num_window_groups; g++) {
1912         for (i = 0; i < ics->max_sfb; i++, idx++) {
1913             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1914                 const float gain = cce->coup.gain[index][idx];
1915                 for (group = 0; group < ics->group_len[g]; group++) {
1916                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1917                         // XXX dsputil-ize
1918                         dest[group * 128 + k] += gain * src[group * 128 + k];
1919                     }
1920                 }
1921             }
1922         }
1923         dest += ics->group_len[g] * 128;
1924         src  += ics->group_len[g] * 128;
1925     }
1926 }
1927
1928 /**
1929  * Apply independent channel coupling (applied after IMDCT).
1930  *
1931  * @param   index   index into coupling gain array
1932  */
1933 static void apply_independent_coupling(AACContext *ac,
1934                                        SingleChannelElement *target,
1935                                        ChannelElement *cce, int index)
1936 {
1937     int i;
1938     const float gain = cce->coup.gain[index][0];
1939     const float *src = cce->ch[0].ret;
1940     float *dest = target->ret;
1941     const int len = 1024 << (ac->m4ac.sbr == 1);
1942
1943     for (i = 0; i < len; i++)
1944         dest[i] += gain * src[i];
1945 }
1946
1947 /**
1948  * channel coupling transformation interface
1949  *
1950  * @param   apply_coupling_method   pointer to (in)dependent coupling function
1951  */
1952 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
1953                                    enum RawDataBlockType type, int elem_id,
1954                                    enum CouplingPoint coupling_point,
1955                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
1956 {
1957     int i, c;
1958
1959     for (i = 0; i < MAX_ELEM_ID; i++) {
1960         ChannelElement *cce = ac->che[TYPE_CCE][i];
1961         int index = 0;
1962
1963         if (cce && cce->coup.coupling_point == coupling_point) {
1964             ChannelCoupling *coup = &cce->coup;
1965
1966             for (c = 0; c <= coup->num_coupled; c++) {
1967                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
1968                     if (coup->ch_select[c] != 1) {
1969                         apply_coupling_method(ac, &cc->ch[0], cce, index);
1970                         if (coup->ch_select[c] != 0)
1971                             index++;
1972                     }
1973                     if (coup->ch_select[c] != 2)
1974                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
1975                 } else
1976                     index += 1 + (coup->ch_select[c] == 3);
1977             }
1978         }
1979     }
1980 }
1981
1982 /**
1983  * Convert spectral data to float samples, applying all supported tools as appropriate.
1984  */
1985 static void spectral_to_sample(AACContext *ac)
1986 {
1987     int i, type;
1988     for (type = 3; type >= 0; type--) {
1989         for (i = 0; i < MAX_ELEM_ID; i++) {
1990             ChannelElement *che = ac->che[type][i];
1991             if (che) {
1992                 if (type <= TYPE_CPE)
1993                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
1994                 if (ac->m4ac.object_type == AOT_AAC_LTP) {
1995                     if (che->ch[0].ics.predictor_present) {
1996                         if (che->ch[0].ics.ltp.present)
1997                             apply_ltp(ac, &che->ch[0]);
1998                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
1999                             apply_ltp(ac, &che->ch[1]);
2000                     }
2001                 }
2002                 if (che->ch[0].tns.present)
2003                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2004                 if (che->ch[1].tns.present)
2005                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2006                 if (type <= TYPE_CPE)
2007                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
2008                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2009                     imdct_and_windowing(ac, &che->ch[0]);
2010                     if (ac->m4ac.object_type == AOT_AAC_LTP)
2011                         update_ltp(ac, &che->ch[0]);
2012                     if (type == TYPE_CPE) {
2013                         imdct_and_windowing(ac, &che->ch[1]);
2014                         if (ac->m4ac.object_type == AOT_AAC_LTP)
2015                             update_ltp(ac, &che->ch[1]);
2016                     }
2017                     if (ac->m4ac.sbr > 0) {
2018                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2019                     }
2020                 }
2021                 if (type <= TYPE_CCE)
2022                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
2023             }
2024         }
2025     }
2026 }
2027
2028 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
2029 {
2030     int size;
2031     AACADTSHeaderInfo hdr_info;
2032
2033     size = ff_aac_parse_header(gb, &hdr_info);
2034     if (size > 0) {
2035         if (ac->output_configured != OC_LOCKED && hdr_info.chan_config) {
2036             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2037             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2038             ac->m4ac.chan_config = hdr_info.chan_config;
2039             if (set_default_channel_config(ac->avctx, new_che_pos, hdr_info.chan_config))
2040                 return -7;
2041             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
2042                 return -7;
2043         } else if (ac->output_configured != OC_LOCKED) {
2044             ac->output_configured = OC_NONE;
2045         }
2046         if (ac->output_configured != OC_LOCKED) {
2047             ac->m4ac.sbr = -1;
2048             ac->m4ac.ps  = -1;
2049         }
2050         ac->m4ac.sample_rate     = hdr_info.sample_rate;
2051         ac->m4ac.sampling_index  = hdr_info.sampling_index;
2052         ac->m4ac.object_type     = hdr_info.object_type;
2053         if (!ac->avctx->sample_rate)
2054             ac->avctx->sample_rate = hdr_info.sample_rate;
2055         if (hdr_info.num_aac_frames == 1) {
2056             if (!hdr_info.crc_absent)
2057                 skip_bits(gb, 16);
2058         } else {
2059             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
2060             return -1;
2061         }
2062     }
2063     return size;
2064 }
2065
2066 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2067                                 int *data_size, GetBitContext *gb)
2068 {
2069     AACContext *ac = avctx->priv_data;
2070     ChannelElement *che = NULL, *che_prev = NULL;
2071     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2072     int err, elem_id, data_size_tmp;
2073     int samples = 0, multiplier;
2074
2075     if (show_bits(gb, 12) == 0xfff) {
2076         if (parse_adts_frame_header(ac, gb) < 0) {
2077             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2078             return -1;
2079         }
2080         if (ac->m4ac.sampling_index > 12) {
2081             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
2082             return -1;
2083         }
2084     }
2085
2086     ac->tags_mapped = 0;
2087     // parse
2088     while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2089         elem_id = get_bits(gb, 4);
2090
2091         if (elem_type < TYPE_DSE) {
2092             if (!(che=get_che(ac, elem_type, elem_id))) {
2093                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2094                        elem_type, elem_id);
2095                 return -1;
2096             }
2097             samples = 1024;
2098         }
2099
2100         switch (elem_type) {
2101
2102         case TYPE_SCE:
2103             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2104             break;
2105
2106         case TYPE_CPE:
2107             err = decode_cpe(ac, gb, che);
2108             break;
2109
2110         case TYPE_CCE:
2111             err = decode_cce(ac, gb, che);
2112             break;
2113
2114         case TYPE_LFE:
2115             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2116             break;
2117
2118         case TYPE_DSE:
2119             err = skip_data_stream_element(ac, gb);
2120             break;
2121
2122         case TYPE_PCE: {
2123             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2124             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2125             if ((err = decode_pce(avctx, &ac->m4ac, new_che_pos, gb)))
2126                 break;
2127             if (ac->output_configured > OC_TRIAL_PCE)
2128                 av_log(avctx, AV_LOG_ERROR,
2129                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2130             else
2131                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2132             break;
2133         }
2134
2135         case TYPE_FIL:
2136             if (elem_id == 15)
2137                 elem_id += get_bits(gb, 8) - 1;
2138             if (get_bits_left(gb) < 8 * elem_id) {
2139                     av_log(avctx, AV_LOG_ERROR, overread_err);
2140                     return -1;
2141             }
2142             while (elem_id > 0)
2143                 elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2144             err = 0; /* FIXME */
2145             break;
2146
2147         default:
2148             err = -1; /* should not happen, but keeps compiler happy */
2149             break;
2150         }
2151
2152         che_prev       = che;
2153         elem_type_prev = elem_type;
2154
2155         if (err)
2156             return err;
2157
2158         if (get_bits_left(gb) < 3) {
2159             av_log(avctx, AV_LOG_ERROR, overread_err);
2160             return -1;
2161         }
2162     }
2163
2164     spectral_to_sample(ac);
2165
2166     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2167     samples <<= multiplier;
2168     if (ac->output_configured < OC_LOCKED) {
2169         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2170         avctx->frame_size = samples;
2171     }
2172
2173     data_size_tmp = samples * avctx->channels;
2174     data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(float) : sizeof(int16_t);
2175     if (*data_size < data_size_tmp) {
2176         av_log(avctx, AV_LOG_ERROR,
2177                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2178                *data_size, data_size_tmp);
2179         return -1;
2180     }
2181     *data_size = data_size_tmp;
2182
2183     if (samples) {
2184         if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
2185             float_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2186         } else
2187             ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2188     }
2189
2190     if (ac->output_configured)
2191         ac->output_configured = OC_LOCKED;
2192
2193     return 0;
2194 }
2195
2196 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2197                             int *data_size, AVPacket *avpkt)
2198 {
2199     const uint8_t *buf = avpkt->data;
2200     int buf_size = avpkt->size;
2201     GetBitContext gb;
2202     int buf_consumed;
2203     int buf_offset;
2204     int err;
2205
2206     init_get_bits(&gb, buf, buf_size * 8);
2207
2208     if ((err = aac_decode_frame_int(avctx, data, data_size, &gb)) < 0)
2209         return err;
2210
2211     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2212     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2213         if (buf[buf_offset])
2214             break;
2215
2216     return buf_size > buf_offset ? buf_consumed : buf_size;
2217 }
2218
2219 static av_cold int aac_decode_close(AVCodecContext *avctx)
2220 {
2221     AACContext *ac = avctx->priv_data;
2222     int i, type;
2223
2224     for (i = 0; i < MAX_ELEM_ID; i++) {
2225         for (type = 0; type < 4; type++) {
2226             if (ac->che[type][i])
2227                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2228             av_freep(&ac->che[type][i]);
2229         }
2230     }
2231
2232     ff_mdct_end(&ac->mdct);
2233     ff_mdct_end(&ac->mdct_small);
2234     ff_mdct_end(&ac->mdct_ltp);
2235     return 0;
2236 }
2237
2238
2239 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
2240
2241 struct LATMContext {
2242     AACContext      aac_ctx;             ///< containing AACContext
2243     int             initialized;         ///< initilized after a valid extradata was seen
2244
2245     // parser data
2246     int             audio_mux_version_A; ///< LATM syntax version
2247     int             frame_length_type;   ///< 0/1 variable/fixed frame length
2248     int             frame_length;        ///< frame length for fixed frame length
2249 };
2250
2251 static inline uint32_t latm_get_value(GetBitContext *b)
2252 {
2253     int length = get_bits(b, 2);
2254
2255     return get_bits_long(b, (length+1)*8);
2256 }
2257
2258 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
2259                                              GetBitContext *gb)
2260 {
2261     AVCodecContext *avctx = latmctx->aac_ctx.avctx;
2262     MPEG4AudioConfig m4ac;
2263     int  config_start_bit = get_bits_count(gb);
2264     int     bits_consumed, esize;
2265
2266     if (config_start_bit % 8) {
2267         av_log_missing_feature(latmctx->aac_ctx.avctx, "audio specific "
2268                                "config not byte aligned.\n", 1);
2269         return AVERROR_INVALIDDATA;
2270     } else {
2271         bits_consumed =
2272             decode_audio_specific_config(NULL, avctx, &m4ac,
2273                                          gb->buffer + (config_start_bit / 8),
2274                                          get_bits_left(gb) / 8);
2275
2276         if (bits_consumed < 0)
2277             return AVERROR_INVALIDDATA;
2278
2279         esize = (bits_consumed+7) / 8;
2280
2281         if (avctx->extradata_size <= esize) {
2282             av_free(avctx->extradata);
2283             avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
2284             if (!avctx->extradata)
2285                 return AVERROR(ENOMEM);
2286         }
2287
2288         avctx->extradata_size = esize;
2289         memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2290         memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2291
2292         skip_bits_long(gb, bits_consumed);
2293     }
2294
2295     return bits_consumed;
2296 }
2297
2298 static int read_stream_mux_config(struct LATMContext *latmctx,
2299                                   GetBitContext *gb)
2300 {
2301     int ret, audio_mux_version = get_bits(gb, 1);
2302
2303     latmctx->audio_mux_version_A = 0;
2304     if (audio_mux_version)
2305         latmctx->audio_mux_version_A = get_bits(gb, 1);
2306
2307     if (!latmctx->audio_mux_version_A) {
2308
2309         if (audio_mux_version)
2310             latm_get_value(gb);                 // taraFullness
2311
2312         skip_bits(gb, 1);                       // allStreamSameTimeFraming
2313         skip_bits(gb, 6);                       // numSubFrames
2314         // numPrograms
2315         if (get_bits(gb, 4)) {                  // numPrograms
2316             av_log_missing_feature(latmctx->aac_ctx.avctx,
2317                                    "multiple programs are not supported\n", 1);
2318             return AVERROR_PATCHWELCOME;
2319         }
2320
2321         // for each program (which there is only on in DVB)
2322
2323         // for each layer (which there is only on in DVB)
2324         if (get_bits(gb, 3)) {                   // numLayer
2325             av_log_missing_feature(latmctx->aac_ctx.avctx,
2326                                    "multiple layers are not supported\n", 1);
2327             return AVERROR_PATCHWELCOME;
2328         }
2329
2330         // for all but first stream: use_same_config = get_bits(gb, 1);
2331         if (!audio_mux_version) {
2332             if ((ret = latm_decode_audio_specific_config(latmctx, gb)) < 0)
2333                 return ret;
2334         } else {
2335             int ascLen = latm_get_value(gb);
2336             if ((ret = latm_decode_audio_specific_config(latmctx, gb)) < 0)
2337                 return ret;
2338             ascLen -= ret;
2339             skip_bits_long(gb, ascLen);
2340         }
2341
2342         latmctx->frame_length_type = get_bits(gb, 3);
2343         switch (latmctx->frame_length_type) {
2344         case 0:
2345             skip_bits(gb, 8);       // latmBufferFullness
2346             break;
2347         case 1:
2348             latmctx->frame_length = get_bits(gb, 9);
2349             break;
2350         case 3:
2351         case 4:
2352         case 5:
2353             skip_bits(gb, 6);       // CELP frame length table index
2354             break;
2355         case 6:
2356         case 7:
2357             skip_bits(gb, 1);       // HVXC frame length table index
2358             break;
2359         }
2360
2361         if (get_bits(gb, 1)) {                  // other data
2362             if (audio_mux_version) {
2363                 latm_get_value(gb);             // other_data_bits
2364             } else {
2365                 int esc;
2366                 do {
2367                     esc = get_bits(gb, 1);
2368                     skip_bits(gb, 8);
2369                 } while (esc);
2370             }
2371         }
2372
2373         if (get_bits(gb, 1))                     // crc present
2374             skip_bits(gb, 8);                    // config_crc
2375     }
2376
2377     return 0;
2378 }
2379
2380 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
2381 {
2382     uint8_t tmp;
2383
2384     if (ctx->frame_length_type == 0) {
2385         int mux_slot_length = 0;
2386         do {
2387             tmp = get_bits(gb, 8);
2388             mux_slot_length += tmp;
2389         } while (tmp == 255);
2390         return mux_slot_length;
2391     } else if (ctx->frame_length_type == 1) {
2392         return ctx->frame_length;
2393     } else if (ctx->frame_length_type == 3 ||
2394                ctx->frame_length_type == 5 ||
2395                ctx->frame_length_type == 7) {
2396         skip_bits(gb, 2);          // mux_slot_length_coded
2397     }
2398     return 0;
2399 }
2400
2401 static int read_audio_mux_element(struct LATMContext *latmctx,
2402                                   GetBitContext *gb)
2403 {
2404     int err;
2405     uint8_t use_same_mux = get_bits(gb, 1);
2406     if (!use_same_mux) {
2407         if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2408             return err;
2409     } else if (!latmctx->aac_ctx.avctx->extradata) {
2410         av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2411                "no decoder config found\n");
2412         return AVERROR(EAGAIN);
2413     }
2414     if (latmctx->audio_mux_version_A == 0) {
2415         int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2416         if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2417             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2418             return AVERROR_INVALIDDATA;
2419         } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
2420             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2421                    "frame length mismatch %d << %d\n",
2422                    mux_slot_length_bytes * 8, get_bits_left(gb));
2423             return AVERROR_INVALIDDATA;
2424         }
2425     }
2426     return 0;
2427 }
2428
2429
2430 static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size,
2431                              AVPacket *avpkt)
2432 {
2433     struct LATMContext *latmctx = avctx->priv_data;
2434     int                 muxlength, err;
2435     GetBitContext       gb;
2436
2437     if (avpkt->size == 0)
2438         return 0;
2439
2440     init_get_bits(&gb, avpkt->data, avpkt->size * 8);
2441
2442     // check for LOAS sync word
2443     if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
2444         return AVERROR_INVALIDDATA;
2445
2446     muxlength = get_bits(&gb, 13) + 3;
2447     // not enough data, the parser should have sorted this
2448     if (muxlength > avpkt->size)
2449         return AVERROR_INVALIDDATA;
2450
2451     if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
2452         return err;
2453
2454     if (!latmctx->initialized) {
2455         if (!avctx->extradata) {
2456             *out_size = 0;
2457             return avpkt->size;
2458         } else {
2459             if ((err = aac_decode_init(avctx)) < 0)
2460                 return err;
2461             latmctx->initialized = 1;
2462         }
2463     }
2464
2465     if (show_bits(&gb, 12) == 0xfff) {
2466         av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2467                "ADTS header detected, probably as result of configuration "
2468                "misparsing\n");
2469         return AVERROR_INVALIDDATA;
2470     }
2471
2472     if ((err = aac_decode_frame_int(avctx, out, out_size, &gb)) < 0)
2473         return err;
2474
2475     return muxlength;
2476 }
2477
2478 av_cold static int latm_decode_init(AVCodecContext *avctx)
2479 {
2480     struct LATMContext *latmctx = avctx->priv_data;
2481     int ret;
2482
2483     ret = aac_decode_init(avctx);
2484
2485     if (avctx->extradata_size > 0) {
2486         latmctx->initialized = !ret;
2487     } else {
2488         latmctx->initialized = 0;
2489     }
2490
2491     return ret;
2492 }
2493
2494
2495 AVCodec ff_aac_decoder = {
2496     "aac",
2497     AVMEDIA_TYPE_AUDIO,
2498     CODEC_ID_AAC,
2499     sizeof(AACContext),
2500     aac_decode_init,
2501     NULL,
2502     aac_decode_close,
2503     aac_decode_frame,
2504     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2505     .sample_fmts = (const enum AVSampleFormat[]) {
2506         AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE
2507     },
2508     .channel_layouts = aac_channel_layout,
2509 };
2510
2511 /*
2512     Note: This decoder filter is intended to decode LATM streams transferred
2513     in MPEG transport streams which only contain one program.
2514     To do a more complex LATM demuxing a separate LATM demuxer should be used.
2515 */
2516 AVCodec ff_aac_latm_decoder = {
2517     .name = "aac_latm",
2518     .type = AVMEDIA_TYPE_AUDIO,
2519     .id   = CODEC_ID_AAC_LATM,
2520     .priv_data_size = sizeof(struct LATMContext),
2521     .init   = latm_decode_init,
2522     .close  = aac_decode_close,
2523     .decode = latm_decode_frame,
2524     .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
2525     .sample_fmts = (const enum AVSampleFormat[]) {
2526         AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE
2527     },
2528     .channel_layouts = aac_channel_layout,
2529 };