git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * AAC LATM decoder
   7  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
   8  * Copyright (c) 2010      Janne Grunau <janne-ffmpeg@jannau.net>
   9  *
  10  * This file is part of FFmpeg.
  11  *
  12  * FFmpeg is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public
  14  * License as published by the Free Software Foundation; either
  15  * version 2.1 of the License, or (at your option) any later version.
  16  *
  17  * FFmpeg is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with FFmpeg; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25  */
  26
  27 /**
  28  * @file
  29  * AAC decoder
  30  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  31  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  32  */
  33
  34 /*
  35  * supported tools
  36  *
  37  * Support?             Name
  38  * N (code in SoC repo) gain control
  39  * Y                    block switching
  40  * Y                    window shapes - standard
  41  * N                    window shapes - Low Delay
  42  * Y                    filterbank - standard
  43  * N (code in SoC repo) filterbank - Scalable Sample Rate
  44  * Y                    Temporal Noise Shaping
  45  * Y                    Long Term Prediction
  46  * Y                    intensity stereo
  47  * Y                    channel coupling
  48  * Y                    frequency domain prediction
  49  * Y                    Perceptual Noise Substitution
  50  * Y                    Mid/Side stereo
  51  * N                    Scalable Inverse AAC Quantization
  52  * N                    Frequency Selective Switch
  53  * N                    upsampling filter
  54  * Y                    quantization & coding - AAC
  55  * N                    quantization & coding - TwinVQ
  56  * N                    quantization & coding - BSAC
  57  * N                    AAC Error Resilience tools
  58  * N                    Error Resilience payload syntax
  59  * N                    Error Protection tool
  60  * N                    CELP
  61  * N                    Silence Compression
  62  * N                    HVXC
  63  * N                    HVXC 4kbits/s VR
  64  * N                    Structured Audio tools
  65  * N                    Structured Audio Sample Bank Format
  66  * N                    MIDI
  67  * N                    Harmonic and Individual Lines plus Noise
  68  * N                    Text-To-Speech Interface
  69  * Y                    Spectral Band Replication
  70  * Y (not in this code) Layer-1
  71  * Y (not in this code) Layer-2
  72  * Y (not in this code) Layer-3
  73  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  74  * Y                    Parametric Stereo
  75  * N                    Direct Stream Transfer
  76  *
  77  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  78  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  79            Parametric Stereo.
  80  */
  81
  82
  83 #include "avcodec.h"
  84 #include "internal.h"
  85 #include "get_bits.h"
  86 #include "dsputil.h"
  87 #include "fft.h"
  88 #include "fmtconvert.h"
  89 #include "lpc.h"
  90 #include "kbdwin.h"
  91 #include "sinewin.h"
  92
  93 #include "aac.h"
  94 #include "aactab.h"
  95 #include "aacdectab.h"
  96 #include "cbrt_tablegen.h"
  97 #include "sbr.h"
  98 #include "aacsbr.h"
  99 #include "mpeg4audio.h"
 100 #include "aacadtsdec.h"
 101
 102 #include <assert.h>
 103 #include <errno.h>
 104 #include <math.h>
 105 #include <string.h>
 106
 107 #if ARCH_ARM
 108 #   include "arm/aac.h"
 109 #endif
 110
 111 union float754 {
 112     float f;
 113     uint32_t i;
 114 };
 115
 116 static VLC vlc_scalefactors;
 117 static VLC vlc_spectral[11];
 118
 119 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 120
 121 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 122 {
 123     // For PCE based channel configurations map the channels solely based on tags.
 124     if (!ac->m4ac.chan_config) {
 125         return ac->tag_che_map[type][elem_id];
 126     }
 127     // For indexed channel configurations map the channels solely based on position.
 128     switch (ac->m4ac.chan_config) {
 129     case 7:
 130         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 131             ac->tags_mapped++;
 132             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 133         }
 134     case 6:
 135         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 136            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 137            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 138         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 139             ac->tags_mapped++;
 140             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 141         }
 142     case 5:
 143         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 144             ac->tags_mapped++;
 145             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 146         }
 147     case 4:
 148         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 149             ac->tags_mapped++;
 150             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 151         }
 152     case 3:
 153     case 2:
 154         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 155             ac->tags_mapped++;
 156             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 157         } else if (ac->m4ac.chan_config == 2) {
 158             return NULL;
 159         }
 160     case 1:
 161         if (!ac->tags_mapped && type == TYPE_SCE) {
 162             ac->tags_mapped++;
 163             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 164         }
 165     default:
 166         return NULL;
 167     }
 168 }
 169
 170 /**
 171  * Check for the channel element in the current channel position configuration.
 172  * If it exists, make sure the appropriate element is allocated and map the
 173  * channel order to match the internal FFmpeg channel layout.
 174  *
 175  * @param   che_pos current channel position configuration
 176  * @param   type channel element type
 177  * @param   id channel element id
 178  * @param   channels count of the number of channels in the configuration
 179  *
 180  * @return  Returns error status. 0 - OK, !0 - error
 181  */
 182 static av_cold int che_configure(AACContext *ac,
 183                                  enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 184                                  int type, int id, int *channels)
 185 {
 186     if (che_pos[type][id]) {
 187         if (!ac->che[type][id]) {
 188             if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 189                 return AVERROR(ENOMEM);
 190             ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
 191         }
 192         if (type != TYPE_CCE) {
 193             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 194             if (type == TYPE_CPE ||
 195                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 196                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 197             }
 198         }
 199     } else {
 200         if (ac->che[type][id])
 201             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 202         av_freep(&ac->che[type][id]);
 203     }
 204     return 0;
 205 }
 206
 207 /**
 208  * Configure output channel order based on the current program configuration element.
 209  *
 210  * @param   che_pos current channel position configuration
 211  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 212  *
 213  * @return  Returns error status. 0 - OK, !0 - error
 214  */
 215 static av_cold int output_configure(AACContext *ac,
 216                                     enum ChannelPosition che_pos[4][MAX_ELEM_ID],
 217                                     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 218                                     int channel_config, enum OCStatus oc_type)
 219 {
 220     AVCodecContext *avctx = ac->avctx;
 221     int i, type, channels = 0, ret;
 222
 223     if (new_che_pos != che_pos)
 224     memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 225
 226     if (channel_config) {
 227         for (i = 0; i < tags_per_config[channel_config]; i++) {
 228             if ((ret = che_configure(ac, che_pos,
 229                                      aac_channel_layout_map[channel_config - 1][i][0],
 230                                      aac_channel_layout_map[channel_config - 1][i][1],
 231                                      &channels)))
 232                 return ret;
 233         }
 234
 235         memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 236
 237         avctx->channel_layout = aac_channel_layout[channel_config - 1];
 238     } else {
 239         /* Allocate or free elements depending on if they are in the
 240          * current program configuration.
 241          *
 242          * Set up default 1:1 output mapping.
 243          *
 244          * For a 5.1 stream the output order will be:
 245          *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
 246          */
 247
 248         for (i = 0; i < MAX_ELEM_ID; i++) {
 249             for (type = 0; type < 4; type++) {
 250                 if ((ret = che_configure(ac, che_pos, type, i, &channels)))
 251                     return ret;
 252             }
 253         }
 254
 255         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 256     }
 257
 258     avctx->channels = channels;
 259
 260     ac->output_configured = oc_type;
 261
 262     return 0;
 263 }
 264
 265 /**
 266  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 267  *
 268  * @param cpe_map Stereo (Channel Pair Element) map, NULL if stereo bit is not present.
 269  * @param sce_map mono (Single Channel Element) map
 270  * @param type speaker type/position for these channels
 271  */
 272 static void decode_channel_map(enum ChannelPosition *cpe_map,
 273                                enum ChannelPosition *sce_map,
 274                                enum ChannelPosition type,
 275                                GetBitContext *gb, int n)
 276 {
 277     while (n--) {
 278         enum ChannelPosition *map = cpe_map && get_bits1(gb) ? cpe_map : sce_map; // stereo or mono map
 279         map[get_bits(gb, 4)] = type;
 280     }
 281 }
 282
 283 /**
 284  * Decode program configuration element; reference: table 4.2.
 285  *
 286  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 287  *
 288  * @return  Returns error status. 0 - OK, !0 - error
 289  */
 290 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
 291                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 292                       GetBitContext *gb)
 293 {
 294     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 295     int comment_len;
 296
 297     skip_bits(gb, 2);  // object_type
 298
 299     sampling_index = get_bits(gb, 4);
 300     if (m4ac->sampling_index != sampling_index)
 301         av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 302
 303     num_front       = get_bits(gb, 4);
 304     num_side        = get_bits(gb, 4);
 305     num_back        = get_bits(gb, 4);
 306     num_lfe         = get_bits(gb, 2);
 307     num_assoc_data  = get_bits(gb, 3);
 308     num_cc          = get_bits(gb, 4);
 309
 310     if (get_bits1(gb))
 311         skip_bits(gb, 4); // mono_mixdown_tag
 312     if (get_bits1(gb))
 313         skip_bits(gb, 4); // stereo_mixdown_tag
 314
 315     if (get_bits1(gb))
 316         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 317
 318     if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
 319         av_log(avctx, AV_LOG_ERROR, overread_err);
 320         return -1;
 321     }
 322     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
 323     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE,  gb, num_side );
 324     decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK,  gb, num_back );
 325     decode_channel_map(NULL,                  new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE,   gb, num_lfe  );
 326
 327     skip_bits_long(gb, 4 * num_assoc_data);
 328
 329     decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC,    gb, num_cc   );
 330
 331     align_get_bits(gb);
 332
 333     /* comment field, first byte is length */
 334     comment_len = get_bits(gb, 8) * 8;
 335     if (get_bits_left(gb) < comment_len) {
 336         av_log(avctx, AV_LOG_ERROR, overread_err);
 337         return -1;
 338     }
 339     skip_bits_long(gb, comment_len);
 340     return 0;
 341 }
 342
 343 /**
 344  * Set up channel positions based on a default channel configuration
 345  * as specified in table 1.17.
 346  *
 347  * @param   new_che_pos New channel position configuration - we only do something if it differs from the current one.
 348  *
 349  * @return  Returns error status. 0 - OK, !0 - error
 350  */
 351 static av_cold int set_default_channel_config(AVCodecContext *avctx,
 352                                               enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
 353                                               int channel_config)
 354 {
 355     if (channel_config < 1 || channel_config > 7) {
 356         av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 357                channel_config);
 358         return -1;
 359     }
 360
 361     /* default channel configurations:
 362      *
 363      * 1ch : front center (mono)
 364      * 2ch : L + R (stereo)
 365      * 3ch : front center + L + R
 366      * 4ch : front center + L + R + back center
 367      * 5ch : front center + L + R + back stereo
 368      * 6ch : front center + L + R + back stereo + LFE
 369      * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
 370      */
 371
 372     if (channel_config != 2)
 373         new_che_pos[TYPE_SCE][0] = AAC_CHANNEL_FRONT; // front center (or mono)
 374     if (channel_config > 1)
 375         new_che_pos[TYPE_CPE][0] = AAC_CHANNEL_FRONT; // L + R (or stereo)
 376     if (channel_config == 4)
 377         new_che_pos[TYPE_SCE][1] = AAC_CHANNEL_BACK;  // back center
 378     if (channel_config > 4)
 379         new_che_pos[TYPE_CPE][(channel_config == 7) + 1]
 380         = AAC_CHANNEL_BACK;  // back stereo
 381     if (channel_config > 5)
 382         new_che_pos[TYPE_LFE][0] = AAC_CHANNEL_LFE;   // LFE
 383     if (channel_config == 7)
 384         new_che_pos[TYPE_CPE][1] = AAC_CHANNEL_FRONT; // outer front left + outer front right
 385
 386     return 0;
 387 }
 388
 389 /**
 390  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 391  *
 392  * @param   ac          pointer to AACContext, may be null
 393  * @param   avctx       pointer to AVCCodecContext, used for logging
 394  *
 395  * @return  Returns error status. 0 - OK, !0 - error
 396  */
 397 static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
 398                                      GetBitContext *gb,
 399                                      MPEG4AudioConfig *m4ac,
 400                                      int channel_config)
 401 {
 402     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 403     int extension_flag, ret;
 404
 405     if (get_bits1(gb)) { // frameLengthFlag
 406         av_log_missing_feature(avctx, "960/120 MDCT window is", 1);
 407         return -1;
 408     }
 409
 410     if (get_bits1(gb))       // dependsOnCoreCoder
 411         skip_bits(gb, 14);   // coreCoderDelay
 412     extension_flag = get_bits1(gb);
 413
 414     if (m4ac->object_type == AOT_AAC_SCALABLE ||
 415         m4ac->object_type == AOT_ER_AAC_SCALABLE)
 416         skip_bits(gb, 3);     // layerNr
 417
 418     memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 419     if (channel_config == 0) {
 420         skip_bits(gb, 4);  // element_instance_tag
 421         if ((ret = decode_pce(avctx, m4ac, new_che_pos, gb)))
 422             return ret;
 423     } else {
 424         if ((ret = set_default_channel_config(avctx, new_che_pos, channel_config)))
 425             return ret;
 426     }
 427     if (ac && (ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
 428         return ret;
 429
 430     if (extension_flag) {
 431         switch (m4ac->object_type) {
 432         case AOT_ER_BSAC:
 433             skip_bits(gb, 5);    // numOfSubFrame
 434             skip_bits(gb, 11);   // layer_length
 435             break;
 436         case AOT_ER_AAC_LC:
 437         case AOT_ER_AAC_LTP:
 438         case AOT_ER_AAC_SCALABLE:
 439         case AOT_ER_AAC_LD:
 440             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 441                                     * aacScalefactorDataResilienceFlag
 442                                     * aacSpectralDataResilienceFlag
 443                                     */
 444             break;
 445         }
 446         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 447     }
 448     return 0;
 449 }
 450
 451 /**
 452  * Decode audio specific configuration; reference: table 1.13.
 453  *
 454  * @param   ac          pointer to AACContext, may be null
 455  * @param   avctx       pointer to AVCCodecContext, used for logging
 456  * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
 457  * @param   data        pointer to AVCodecContext extradata
 458  * @param   data_size   size of AVCCodecContext extradata
 459  *
 460  * @return  Returns error status or number of consumed bits. <0 - error
 461  */
 462 static int decode_audio_specific_config(AACContext *ac,
 463                                         AVCodecContext *avctx,
 464                                         MPEG4AudioConfig *m4ac,
 465                                         const uint8_t *data, int data_size, int asclen)
 466 {
 467     GetBitContext gb;
 468     int i;
 469
 470     av_dlog(avctx, "extradata size %d\n", avctx->extradata_size);
 471     for (i = 0; i < avctx->extradata_size; i++)
 472          av_dlog(avctx, "%02x ", avctx->extradata[i]);
 473     av_dlog(avctx, "\n");
 474
 475     init_get_bits(&gb, data, data_size * 8);
 476
 477     if ((i = avpriv_mpeg4audio_get_config(m4ac, data, asclen/8)) < 0)
 478         return -1;
 479     if (m4ac->sampling_index > 12) {
 480         av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
 481         return -1;
 482     }
 483     if (m4ac->sbr == 1 && m4ac->ps == -1)
 484         m4ac->ps = 1;
 485
 486     skip_bits_long(&gb, i);
 487
 488     switch (m4ac->object_type) {
 489     case AOT_AAC_MAIN:
 490     case AOT_AAC_LC:
 491     case AOT_AAC_LTP:
 492         if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
 493             return -1;
 494         break;
 495     default:
 496         av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 497                m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
 498         return -1;
 499     }
 500
 501     av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
 502             m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
 503             m4ac->sample_rate, m4ac->sbr, m4ac->ps);
 504
 505     return get_bits_count(&gb);
 506 }
 507
 508 /**
 509  * linear congruential pseudorandom number generator
 510  *
 511  * @param   previous_val    pointer to the current state of the generator
 512  *
 513  * @return  Returns a 32-bit pseudorandom integer
 514  */
 515 static av_always_inline int lcg_random(int previous_val)
 516 {
 517     return previous_val * 1664525 + 1013904223;
 518 }
 519
 520 static av_always_inline void reset_predict_state(PredictorState *ps)
 521 {
 522     ps->r0   = 0.0f;
 523     ps->r1   = 0.0f;
 524     ps->cor0 = 0.0f;
 525     ps->cor1 = 0.0f;
 526     ps->var0 = 1.0f;
 527     ps->var1 = 1.0f;
 528 }
 529
 530 static void reset_all_predictors(PredictorState *ps)
 531 {
 532     int i;
 533     for (i = 0; i < MAX_PREDICTORS; i++)
 534         reset_predict_state(&ps[i]);
 535 }
 536
 537 static int sample_rate_idx (int rate)
 538 {
 539          if (92017 <= rate) return 0;
 540     else if (75132 <= rate) return 1;
 541     else if (55426 <= rate) return 2;
 542     else if (46009 <= rate) return 3;
 543     else if (37566 <= rate) return 4;
 544     else if (27713 <= rate) return 5;
 545     else if (23004 <= rate) return 6;
 546     else if (18783 <= rate) return 7;
 547     else if (13856 <= rate) return 8;
 548     else if (11502 <= rate) return 9;
 549     else if (9391  <= rate) return 10;
 550     else                    return 11;
 551 }
 552
 553 static void reset_predictor_group(PredictorState *ps, int group_num)
 554 {
 555     int i;
 556     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 557         reset_predict_state(&ps[i]);
 558 }
 559
 560 #define AAC_INIT_VLC_STATIC(num, size) \
 561     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 562          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 563         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 564         size);
 565
 566 static av_cold int aac_decode_init(AVCodecContext *avctx)
 567 {
 568     AACContext *ac = avctx->priv_data;
 569     float output_scale_factor;
 570
 571     ac->avctx = avctx;
 572     ac->m4ac.sample_rate = avctx->sample_rate;
 573
 574     if (avctx->extradata_size > 0) {
 575         if (decode_audio_specific_config(ac, ac->avctx, &ac->m4ac,
 576                                          avctx->extradata,
 577                                          avctx->extradata_size, 8*avctx->extradata_size) < 0)
 578             return -1;
 579     } else {
 580         int sr, i;
 581         enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
 582
 583         sr = sample_rate_idx(avctx->sample_rate);
 584         ac->m4ac.sampling_index = sr;
 585         ac->m4ac.channels = avctx->channels;
 586         ac->m4ac.sbr = -1;
 587         ac->m4ac.ps = -1;
 588
 589         for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
 590             if (ff_mpeg4audio_channels[i] == avctx->channels)
 591                 break;
 592         if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
 593             i = 0;
 594         }
 595         ac->m4ac.chan_config = i;
 596
 597         if (ac->m4ac.chan_config) {
 598             int ret = set_default_channel_config(avctx, new_che_pos, ac->m4ac.chan_config);
 599             if (!ret)
 600                 output_configure(ac, ac->che_pos, new_che_pos, ac->m4ac.chan_config, OC_GLOBAL_HDR);
 601             else if (avctx->err_recognition & AV_EF_EXPLODE)
 602                 return AVERROR_INVALIDDATA;
 603         }
 604     }
 605
 606     if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
 607         avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
 608         output_scale_factor = 1.0 / 32768.0;
 609     } else {
 610         avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 611         output_scale_factor = 1.0;
 612     }
 613
 614     AAC_INIT_VLC_STATIC( 0, 304);
 615     AAC_INIT_VLC_STATIC( 1, 270);
 616     AAC_INIT_VLC_STATIC( 2, 550);
 617     AAC_INIT_VLC_STATIC( 3, 300);
 618     AAC_INIT_VLC_STATIC( 4, 328);
 619     AAC_INIT_VLC_STATIC( 5, 294);
 620     AAC_INIT_VLC_STATIC( 6, 306);
 621     AAC_INIT_VLC_STATIC( 7, 268);
 622     AAC_INIT_VLC_STATIC( 8, 510);
 623     AAC_INIT_VLC_STATIC( 9, 366);
 624     AAC_INIT_VLC_STATIC(10, 462);
 625
 626     ff_aac_sbr_init();
 627
 628     dsputil_init(&ac->dsp, avctx);
 629     ff_fmt_convert_init(&ac->fmt_conv, avctx);
 630
 631     ac->random_state = 0x1f2e3d4c;
 632
 633     ff_aac_tableinit();
 634
 635     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 636                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 637                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 638                     352);
 639
 640     ff_mdct_init(&ac->mdct,       11, 1, output_scale_factor/1024.0);
 641     ff_mdct_init(&ac->mdct_small,  8, 1, output_scale_factor/128.0);
 642     ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0/output_scale_factor);
 643     // window initialization
 644     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 645     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 646     ff_init_ff_sine_windows(10);
 647     ff_init_ff_sine_windows( 7);
 648
 649     cbrt_tableinit();
 650
 651     return 0;
 652 }
 653
 654 /**
 655  * Skip data_stream_element; reference: table 4.10.
 656  */
 657 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 658 {
 659     int byte_align = get_bits1(gb);
 660     int count = get_bits(gb, 8);
 661     if (count == 255)
 662         count += get_bits(gb, 8);
 663     if (byte_align)
 664         align_get_bits(gb);
 665
 666     if (get_bits_left(gb) < 8 * count) {
 667         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 668         return -1;
 669     }
 670     skip_bits_long(gb, 8 * count);
 671     return 0;
 672 }
 673
 674 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 675                              GetBitContext *gb)
 676 {
 677     int sfb;
 678     if (get_bits1(gb)) {
 679         ics->predictor_reset_group = get_bits(gb, 5);
 680         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 681             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 682             return -1;
 683         }
 684     }
 685     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 686         ics->prediction_used[sfb] = get_bits1(gb);
 687     }
 688     return 0;
 689 }
 690
 691 /**
 692  * Decode Long Term Prediction data; reference: table 4.xx.
 693  */
 694 static void decode_ltp(AACContext *ac, LongTermPrediction *ltp,
 695                        GetBitContext *gb, uint8_t max_sfb)
 696 {
 697     int sfb;
 698
 699     ltp->lag  = get_bits(gb, 11);
 700     ltp->coef = ltp_coef[get_bits(gb, 3)];
 701     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
 702         ltp->used[sfb] = get_bits1(gb);
 703 }
 704
 705 /**
 706  * Decode Individual Channel Stream info; reference: table 4.6.
 707  *
 708  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
 709  */
 710 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 711                            GetBitContext *gb, int common_window)
 712 {
 713     if (get_bits1(gb)) {
 714         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 715         memset(ics, 0, sizeof(IndividualChannelStream));
 716         return -1;
 717     }
 718     ics->window_sequence[1] = ics->window_sequence[0];
 719     ics->window_sequence[0] = get_bits(gb, 2);
 720     ics->use_kb_window[1]   = ics->use_kb_window[0];
 721     ics->use_kb_window[0]   = get_bits1(gb);
 722     ics->num_window_groups  = 1;
 723     ics->group_len[0]       = 1;
 724     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 725         int i;
 726         ics->max_sfb = get_bits(gb, 4);
 727         for (i = 0; i < 7; i++) {
 728             if (get_bits1(gb)) {
 729                 ics->group_len[ics->num_window_groups - 1]++;
 730             } else {
 731                 ics->num_window_groups++;
 732                 ics->group_len[ics->num_window_groups - 1] = 1;
 733             }
 734         }
 735         ics->num_windows       = 8;
 736         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 737         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 738         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 739         ics->predictor_present = 0;
 740     } else {
 741         ics->max_sfb               = get_bits(gb, 6);
 742         ics->num_windows           = 1;
 743         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 744         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 745         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 746         ics->predictor_present     = get_bits1(gb);
 747         ics->predictor_reset_group = 0;
 748         if (ics->predictor_present) {
 749             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 750                 if (decode_prediction(ac, ics, gb)) {
 751                     memset(ics, 0, sizeof(IndividualChannelStream));
 752                     return -1;
 753                 }
 754             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 755                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 756                 memset(ics, 0, sizeof(IndividualChannelStream));
 757                 return -1;
 758             } else {
 759                 if ((ics->ltp.present = get_bits(gb, 1)))
 760                     decode_ltp(ac, &ics->ltp, gb, ics->max_sfb);
 761             }
 762         }
 763     }
 764
 765     if (ics->max_sfb > ics->num_swb) {
 766         av_log(ac->avctx, AV_LOG_ERROR,
 767                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 768                ics->max_sfb, ics->num_swb);
 769         memset(ics, 0, sizeof(IndividualChannelStream));
 770         return -1;
 771     }
 772
 773     return 0;
 774 }
 775
 776 /**
 777  * Decode band types (section_data payload); reference: table 4.46.
 778  *
 779  * @param   band_type           array of the used band type
 780  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 781  *
 782  * @return  Returns error status. 0 - OK, !0 - error
 783  */
 784 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 785                              int band_type_run_end[120], GetBitContext *gb,
 786                              IndividualChannelStream *ics)
 787 {
 788     int g, idx = 0;
 789     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 790     for (g = 0; g < ics->num_window_groups; g++) {
 791         int k = 0;
 792         while (k < ics->max_sfb) {
 793             uint8_t sect_end = k;
 794             int sect_len_incr;
 795             int sect_band_type = get_bits(gb, 4);
 796             if (sect_band_type == 12) {
 797                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 798                 return -1;
 799             }
 800             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
 801                 sect_end += sect_len_incr;
 802             sect_end += sect_len_incr;
 803             if (get_bits_left(gb) < 0) {
 804                 av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 805                 return -1;
 806             }
 807             if (sect_end > ics->max_sfb) {
 808                 av_log(ac->avctx, AV_LOG_ERROR,
 809                        "Number of bands (%d) exceeds limit (%d).\n",
 810                        sect_end, ics->max_sfb);
 811                 return -1;
 812             }
 813             for (; k < sect_end; k++) {
 814                 band_type        [idx]   = sect_band_type;
 815                 band_type_run_end[idx++] = sect_end;
 816             }
 817         }
 818     }
 819     return 0;
 820 }
 821
 822 /**
 823  * Decode scalefactors; reference: table 4.47.
 824  *
 825  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
 826  * @param   band_type           array of the used band type
 827  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 828  * @param   sf                  array of scalefactors or intensity stereo positions
 829  *
 830  * @return  Returns error status. 0 - OK, !0 - error
 831  */
 832 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
 833                                unsigned int global_gain,
 834                                IndividualChannelStream *ics,
 835                                enum BandType band_type[120],
 836                                int band_type_run_end[120])
 837 {
 838     int g, i, idx = 0;
 839     int offset[3] = { global_gain, global_gain - 90, 0 };
 840     int clipped_offset;
 841     int noise_flag = 1;
 842     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
 843     for (g = 0; g < ics->num_window_groups; g++) {
 844         for (i = 0; i < ics->max_sfb;) {
 845             int run_end = band_type_run_end[idx];
 846             if (band_type[idx] == ZERO_BT) {
 847                 for (; i < run_end; i++, idx++)
 848                     sf[idx] = 0.;
 849             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
 850                 for (; i < run_end; i++, idx++) {
 851                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 852                     clipped_offset = av_clip(offset[2], -155, 100);
 853                     if (offset[2] != clipped_offset) {
 854                         av_log_ask_for_sample(ac->avctx, "Intensity stereo "
 855                                 "position clipped (%d -> %d).\nIf you heard an "
 856                                 "audible artifact, there may be a bug in the "
 857                                 "decoder. ", offset[2], clipped_offset);
 858                     }
 859                     sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
 860                 }
 861             } else if (band_type[idx] == NOISE_BT) {
 862                 for (; i < run_end; i++, idx++) {
 863                     if (noise_flag-- > 0)
 864                         offset[1] += get_bits(gb, 9) - 256;
 865                     else
 866                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 867                     clipped_offset = av_clip(offset[1], -100, 155);
 868                     if (offset[1] != clipped_offset) {
 869                         av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
 870                                 "(%d -> %d).\nIf you heard an audible "
 871                                 "artifact, there may be a bug in the decoder. ",
 872                                 offset[1], clipped_offset);
 873                     }
 874                     sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
 875                 }
 876             } else {
 877                 for (; i < run_end; i++, idx++) {
 878                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
 879                     if (offset[0] > 255U) {
 880                         av_log(ac->avctx, AV_LOG_ERROR,
 881                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
 882                         return -1;
 883                     }
 884                     sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
 885                 }
 886             }
 887         }
 888     }
 889     return 0;
 890 }
 891
 892 /**
 893  * Decode pulse data; reference: table 4.7.
 894  */
 895 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 896                          const uint16_t *swb_offset, int num_swb)
 897 {
 898     int i, pulse_swb;
 899     pulse->num_pulse = get_bits(gb, 2) + 1;
 900     pulse_swb        = get_bits(gb, 6);
 901     if (pulse_swb >= num_swb)
 902         return -1;
 903     pulse->pos[0]    = swb_offset[pulse_swb];
 904     pulse->pos[0]   += get_bits(gb, 5);
 905     if (pulse->pos[0] > 1023)
 906         return -1;
 907     pulse->amp[0]    = get_bits(gb, 4);
 908     for (i = 1; i < pulse->num_pulse; i++) {
 909         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
 910         if (pulse->pos[i] > 1023)
 911             return -1;
 912         pulse->amp[i] = get_bits(gb, 4);
 913     }
 914     return 0;
 915 }
 916
 917 /**
 918  * Decode Temporal Noise Shaping data; reference: table 4.48.
 919  *
 920  * @return  Returns error status. 0 - OK, !0 - error
 921  */
 922 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
 923                       GetBitContext *gb, const IndividualChannelStream *ics)
 924 {
 925     int w, filt, i, coef_len, coef_res, coef_compress;
 926     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 927     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
 928     for (w = 0; w < ics->num_windows; w++) {
 929         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
 930             coef_res = get_bits1(gb);
 931
 932             for (filt = 0; filt < tns->n_filt[w]; filt++) {
 933                 int tmp2_idx;
 934                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 935
 936                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
 937                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
 938                            tns->order[w][filt], tns_max_order);
 939                     tns->order[w][filt] = 0;
 940                     return -1;
 941                 }
 942                 if (tns->order[w][filt]) {
 943                     tns->direction[w][filt] = get_bits1(gb);
 944                     coef_compress = get_bits1(gb);
 945                     coef_len = coef_res + 3 - coef_compress;
 946                     tmp2_idx = 2 * coef_compress + coef_res;
 947
 948                     for (i = 0; i < tns->order[w][filt]; i++)
 949                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 950                 }
 951             }
 952         }
 953     }
 954     return 0;
 955 }
 956
 957 /**
 958  * Decode Mid/Side data; reference: table 4.54.
 959  *
 960  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
 961  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
 962  *                      [3] reserved for scalable AAC
 963  */
 964 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 965                                    int ms_present)
 966 {
 967     int idx;
 968     if (ms_present == 1) {
 969         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
 970             cpe->ms_mask[idx] = get_bits1(gb);
 971     } else if (ms_present == 2) {
 972         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
 973     }
 974 }
 975
 976 #ifndef VMUL2
 977 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
 978                            const float *scale)
 979 {
 980     float s = *scale;
 981     *dst++ = v[idx    & 15] * s;
 982     *dst++ = v[idx>>4 & 15] * s;
 983     return dst;
 984 }
 985 #endif
 986
 987 #ifndef VMUL4
 988 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 989                            const float *scale)
 990 {
 991     float s = *scale;
 992     *dst++ = v[idx    & 3] * s;
 993     *dst++ = v[idx>>2 & 3] * s;
 994     *dst++ = v[idx>>4 & 3] * s;
 995     *dst++ = v[idx>>6 & 3] * s;
 996     return dst;
 997 }
 998 #endif
 999
1000 #ifndef VMUL2S
1001 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1002                             unsigned sign, const float *scale)
1003 {
1004     union float754 s0, s1;
1005
1006     s0.f = s1.f = *scale;
1007     s0.i ^= sign >> 1 << 31;
1008     s1.i ^= sign      << 31;
1009
1010     *dst++ = v[idx    & 15] * s0.f;
1011     *dst++ = v[idx>>4 & 15] * s1.f;
1012
1013     return dst;
1014 }
1015 #endif
1016
1017 #ifndef VMUL4S
1018 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1019                             unsigned sign, const float *scale)
1020 {
1021     unsigned nz = idx >> 12;
1022     union float754 s = { .f = *scale };
1023     union float754 t;
1024
1025     t.i = s.i ^ (sign & 1U<<31);
1026     *dst++ = v[idx    & 3] * t.f;
1027
1028     sign <<= nz & 1; nz >>= 1;
1029     t.i = s.i ^ (sign & 1U<<31);
1030     *dst++ = v[idx>>2 & 3] * t.f;
1031
1032     sign <<= nz & 1; nz >>= 1;
1033     t.i = s.i ^ (sign & 1U<<31);
1034     *dst++ = v[idx>>4 & 3] * t.f;
1035
1036     sign <<= nz & 1; nz >>= 1;
1037     t.i = s.i ^ (sign & 1U<<31);
1038     *dst++ = v[idx>>6 & 3] * t.f;
1039
1040     return dst;
1041 }
1042 #endif
1043
1044 /**
1045  * Decode spectral data; reference: table 4.50.
1046  * Dequantize and scale spectral data; reference: 4.6.3.3.
1047  *
1048  * @param   coef            array of dequantized, scaled spectral data
1049  * @param   sf              array of scalefactors or intensity stereo positions
1050  * @param   pulse_present   set if pulses are present
1051  * @param   pulse           pointer to pulse data struct
1052  * @param   band_type       array of the used band type
1053  *
1054  * @return  Returns error status. 0 - OK, !0 - error
1055  */
1056 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1057                                        GetBitContext *gb, const float sf[120],
1058                                        int pulse_present, const Pulse *pulse,
1059                                        const IndividualChannelStream *ics,
1060                                        enum BandType band_type[120])
1061 {
1062     int i, k, g, idx = 0;
1063     const int c = 1024 / ics->num_windows;
1064     const uint16_t *offsets = ics->swb_offset;
1065     float *coef_base = coef;
1066
1067     for (g = 0; g < ics->num_windows; g++)
1068         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1069
1070     for (g = 0; g < ics->num_window_groups; g++) {
1071         unsigned g_len = ics->group_len[g];
1072
1073         for (i = 0; i < ics->max_sfb; i++, idx++) {
1074             const unsigned cbt_m1 = band_type[idx] - 1;
1075             float *cfo = coef + offsets[i];
1076             int off_len = offsets[i + 1] - offsets[i];
1077             int group;
1078
1079             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1080                 for (group = 0; group < g_len; group++, cfo+=128) {
1081                     memset(cfo, 0, off_len * sizeof(float));
1082                 }
1083             } else if (cbt_m1 == NOISE_BT - 1) {
1084                 for (group = 0; group < g_len; group++, cfo+=128) {
1085                     float scale;
1086                     float band_energy;
1087
1088                     for (k = 0; k < off_len; k++) {
1089                         ac->random_state  = lcg_random(ac->random_state);
1090                         cfo[k] = ac->random_state;
1091                     }
1092
1093                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1094                     scale = sf[idx] / sqrtf(band_energy);
1095                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1096                 }
1097             } else {
1098                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1099                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1100                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1101                 OPEN_READER(re, gb);
1102
1103                 switch (cbt_m1 >> 1) {
1104                 case 0:
1105                     for (group = 0; group < g_len; group++, cfo+=128) {
1106                         float *cf = cfo;
1107                         int len = off_len;
1108
1109                         do {
1110                             int code;
1111                             unsigned cb_idx;
1112
1113                             UPDATE_CACHE(re, gb);
1114                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1115                             cb_idx = cb_vector_idx[code];
1116                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1117                         } while (len -= 4);
1118                     }
1119                     break;
1120
1121                 case 1:
1122                     for (group = 0; group < g_len; group++, cfo+=128) {
1123                         float *cf = cfo;
1124                         int len = off_len;
1125
1126                         do {
1127                             int code;
1128                             unsigned nnz;
1129                             unsigned cb_idx;
1130                             uint32_t bits;
1131
1132                             UPDATE_CACHE(re, gb);
1133                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1134                             cb_idx = cb_vector_idx[code];
1135                             nnz = cb_idx >> 8 & 15;
1136                             bits = nnz ? GET_CACHE(re, gb) : 0;
1137                             LAST_SKIP_BITS(re, gb, nnz);
1138                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1139                         } while (len -= 4);
1140                     }
1141                     break;
1142
1143                 case 2:
1144                     for (group = 0; group < g_len; group++, cfo+=128) {
1145                         float *cf = cfo;
1146                         int len = off_len;
1147
1148                         do {
1149                             int code;
1150                             unsigned cb_idx;
1151
1152                             UPDATE_CACHE(re, gb);
1153                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1154                             cb_idx = cb_vector_idx[code];
1155                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1156                         } while (len -= 2);
1157                     }
1158                     break;
1159
1160                 case 3:
1161                 case 4:
1162                     for (group = 0; group < g_len; group++, cfo+=128) {
1163                         float *cf = cfo;
1164                         int len = off_len;
1165
1166                         do {
1167                             int code;
1168                             unsigned nnz;
1169                             unsigned cb_idx;
1170                             unsigned sign;
1171
1172                             UPDATE_CACHE(re, gb);
1173                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1174                             cb_idx = cb_vector_idx[code];
1175                             nnz = cb_idx >> 8 & 15;
1176                             sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1177                             LAST_SKIP_BITS(re, gb, nnz);
1178                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1179                         } while (len -= 2);
1180                     }
1181                     break;
1182
1183                 default:
1184                     for (group = 0; group < g_len; group++, cfo+=128) {
1185                         float *cf = cfo;
1186                         uint32_t *icf = (uint32_t *) cf;
1187                         int len = off_len;
1188
1189                         do {
1190                             int code;
1191                             unsigned nzt, nnz;
1192                             unsigned cb_idx;
1193                             uint32_t bits;
1194                             int j;
1195
1196                             UPDATE_CACHE(re, gb);
1197                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1198
1199                             if (!code) {
1200                                 *icf++ = 0;
1201                                 *icf++ = 0;
1202                                 continue;
1203                             }
1204
1205                             cb_idx = cb_vector_idx[code];
1206                             nnz = cb_idx >> 12;
1207                             nzt = cb_idx >> 8;
1208                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1209                             LAST_SKIP_BITS(re, gb, nnz);
1210
1211                             for (j = 0; j < 2; j++) {
1212                                 if (nzt & 1<<j) {
1213                                     uint32_t b;
1214                                     int n;
1215                                     /* The total length of escape_sequence must be < 22 bits according
1216                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1217                                     UPDATE_CACHE(re, gb);
1218                                     b = GET_CACHE(re, gb);
1219                                     b = 31 - av_log2(~b);
1220
1221                                     if (b > 8) {
1222                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1223                                         return -1;
1224                                     }
1225
1226                                     SKIP_BITS(re, gb, b + 1);
1227                                     b += 4;
1228                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1229                                     LAST_SKIP_BITS(re, gb, b);
1230                                     *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1231                                     bits <<= 1;
1232                                 } else {
1233                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1234                                     *icf++ = (bits & 1U<<31) | v;
1235                                     bits <<= !!v;
1236                                 }
1237                                 cb_idx >>= 4;
1238                             }
1239                         } while (len -= 2);
1240
1241                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1242                     }
1243                 }
1244
1245                 CLOSE_READER(re, gb);
1246             }
1247         }
1248         coef += g_len << 7;
1249     }
1250
1251     if (pulse_present) {
1252         idx = 0;
1253         for (i = 0; i < pulse->num_pulse; i++) {
1254             float co = coef_base[ pulse->pos[i] ];
1255             while (offsets[idx + 1] <= pulse->pos[i])
1256                 idx++;
1257             if (band_type[idx] != NOISE_BT && sf[idx]) {
1258                 float ico = -pulse->amp[i];
1259                 if (co) {
1260                     co /= sf[idx];
1261                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1262                 }
1263                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1264             }
1265         }
1266     }
1267     return 0;
1268 }
1269
1270 static av_always_inline float flt16_round(float pf)
1271 {
1272     union float754 tmp;
1273     tmp.f = pf;
1274     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1275     return tmp.f;
1276 }
1277
1278 static av_always_inline float flt16_even(float pf)
1279 {
1280     union float754 tmp;
1281     tmp.f = pf;
1282     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1283     return tmp.f;
1284 }
1285
1286 static av_always_inline float flt16_trunc(float pf)
1287 {
1288     union float754 pun;
1289     pun.f = pf;
1290     pun.i &= 0xFFFF0000U;
1291     return pun.f;
1292 }
1293
1294 static av_always_inline void predict(PredictorState *ps, float *coef,
1295                                      int output_enable)
1296 {
1297     const float a     = 0.953125; // 61.0 / 64
1298     const float alpha = 0.90625;  // 29.0 / 32
1299     float e0, e1;
1300     float pv;
1301     float k1, k2;
1302     float   r0 = ps->r0,     r1 = ps->r1;
1303     float cor0 = ps->cor0, cor1 = ps->cor1;
1304     float var0 = ps->var0, var1 = ps->var1;
1305
1306     k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1307     k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1308
1309     pv = flt16_round(k1 * r0 + k2 * r1);
1310     if (output_enable)
1311         *coef += pv;
1312
1313     e0 = *coef;
1314     e1 = e0 - k1 * r0;
1315
1316     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1317     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1318     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1319     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1320
1321     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1322     ps->r0 = flt16_trunc(a * e0);
1323 }
1324
1325 /**
1326  * Apply AAC-Main style frequency domain prediction.
1327  */
1328 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1329 {
1330     int sfb, k;
1331
1332     if (!sce->ics.predictor_initialized) {
1333         reset_all_predictors(sce->predictor_state);
1334         sce->ics.predictor_initialized = 1;
1335     }
1336
1337     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1338         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1339             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1340                 predict(&sce->predictor_state[k], &sce->coeffs[k],
1341                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1342             }
1343         }
1344         if (sce->ics.predictor_reset_group)
1345             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1346     } else
1347         reset_all_predictors(sce->predictor_state);
1348 }
1349
1350 /**
1351  * Decode an individual_channel_stream payload; reference: table 4.44.
1352  *
1353  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1354  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1355  *
1356  * @return  Returns error status. 0 - OK, !0 - error
1357  */
1358 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1359                       GetBitContext *gb, int common_window, int scale_flag)
1360 {
1361     Pulse pulse;
1362     TemporalNoiseShaping    *tns = &sce->tns;
1363     IndividualChannelStream *ics = &sce->ics;
1364     float *out = sce->coeffs;
1365     int global_gain, pulse_present = 0;
1366
1367     /* This assignment is to silence a GCC warning about the variable being used
1368      * uninitialized when in fact it always is.
1369      */
1370     pulse.num_pulse = 0;
1371
1372     global_gain = get_bits(gb, 8);
1373
1374     if (!common_window && !scale_flag) {
1375         if (decode_ics_info(ac, ics, gb, 0) < 0)
1376             return -1;
1377     }
1378
1379     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1380         return -1;
1381     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1382         return -1;
1383
1384     pulse_present = 0;
1385     if (!scale_flag) {
1386         if ((pulse_present = get_bits1(gb))) {
1387             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1388                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1389                 return -1;
1390             }
1391             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1392                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1393                 return -1;
1394             }
1395         }
1396         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1397             return -1;
1398         if (get_bits1(gb)) {
1399             av_log_missing_feature(ac->avctx, "SSR", 1);
1400             return -1;
1401         }
1402     }
1403
1404     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1405         return -1;
1406
1407     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1408         apply_prediction(ac, sce);
1409
1410     return 0;
1411 }
1412
1413 /**
1414  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1415  */
1416 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1417 {
1418     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1419     float *ch0 = cpe->ch[0].coeffs;
1420     float *ch1 = cpe->ch[1].coeffs;
1421     int g, i, group, idx = 0;
1422     const uint16_t *offsets = ics->swb_offset;
1423     for (g = 0; g < ics->num_window_groups; g++) {
1424         for (i = 0; i < ics->max_sfb; i++, idx++) {
1425             if (cpe->ms_mask[idx] &&
1426                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1427                 for (group = 0; group < ics->group_len[g]; group++) {
1428                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1429                                               ch1 + group * 128 + offsets[i],
1430                                               offsets[i+1] - offsets[i]);
1431                 }
1432             }
1433         }
1434         ch0 += ics->group_len[g] * 128;
1435         ch1 += ics->group_len[g] * 128;
1436     }
1437 }
1438
1439 /**
1440  * intensity stereo decoding; reference: 4.6.8.2.3
1441  *
1442  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1443  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1444  *                      [3] reserved for scalable AAC
1445  */
1446 static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
1447 {
1448     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1449     SingleChannelElement         *sce1 = &cpe->ch[1];
1450     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1451     const uint16_t *offsets = ics->swb_offset;
1452     int g, group, i, idx = 0;
1453     int c;
1454     float scale;
1455     for (g = 0; g < ics->num_window_groups; g++) {
1456         for (i = 0; i < ics->max_sfb;) {
1457             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1458                 const int bt_run_end = sce1->band_type_run_end[idx];
1459                 for (; i < bt_run_end; i++, idx++) {
1460                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1461                     if (ms_present)
1462                         c *= 1 - 2 * cpe->ms_mask[idx];
1463                     scale = c * sce1->sf[idx];
1464                     for (group = 0; group < ics->group_len[g]; group++)
1465                         ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1466                                                    coef0 + group * 128 + offsets[i],
1467                                                    scale,
1468                                                    offsets[i + 1] - offsets[i]);
1469                 }
1470             } else {
1471                 int bt_run_end = sce1->band_type_run_end[idx];
1472                 idx += bt_run_end - i;
1473                 i    = bt_run_end;
1474             }
1475         }
1476         coef0 += ics->group_len[g] * 128;
1477         coef1 += ics->group_len[g] * 128;
1478     }
1479 }
1480
1481 /**
1482  * Decode a channel_pair_element; reference: table 4.4.
1483  *
1484  * @return  Returns error status. 0 - OK, !0 - error
1485  */
1486 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1487 {
1488     int i, ret, common_window, ms_present = 0;
1489
1490     common_window = get_bits1(gb);
1491     if (common_window) {
1492         if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1))
1493             return -1;
1494         i = cpe->ch[1].ics.use_kb_window[0];
1495         cpe->ch[1].ics = cpe->ch[0].ics;
1496         cpe->ch[1].ics.use_kb_window[1] = i;
1497         if (cpe->ch[1].ics.predictor_present && (ac->m4ac.object_type != AOT_AAC_MAIN))
1498             if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1499                 decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1500         ms_present = get_bits(gb, 2);
1501         if (ms_present == 3) {
1502             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1503             return -1;
1504         } else if (ms_present)
1505             decode_mid_side_stereo(cpe, gb, ms_present);
1506     }
1507     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1508         return ret;
1509     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1510         return ret;
1511
1512     if (common_window) {
1513         if (ms_present)
1514             apply_mid_side_stereo(ac, cpe);
1515         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1516             apply_prediction(ac, &cpe->ch[0]);
1517             apply_prediction(ac, &cpe->ch[1]);
1518         }
1519     }
1520
1521     apply_intensity_stereo(ac, cpe, ms_present);
1522     return 0;
1523 }
1524
1525 static const float cce_scale[] = {
1526     1.09050773266525765921, //2^(1/8)
1527     1.18920711500272106672, //2^(1/4)
1528     M_SQRT2,
1529     2,
1530 };
1531
1532 /**
1533  * Decode coupling_channel_element; reference: table 4.8.
1534  *
1535  * @return  Returns error status. 0 - OK, !0 - error
1536  */
1537 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1538 {
1539     int num_gain = 0;
1540     int c, g, sfb, ret;
1541     int sign;
1542     float scale;
1543     SingleChannelElement *sce = &che->ch[0];
1544     ChannelCoupling     *coup = &che->coup;
1545
1546     coup->coupling_point = 2 * get_bits1(gb);
1547     coup->num_coupled = get_bits(gb, 3);
1548     for (c = 0; c <= coup->num_coupled; c++) {
1549         num_gain++;
1550         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1551         coup->id_select[c] = get_bits(gb, 4);
1552         if (coup->type[c] == TYPE_CPE) {
1553             coup->ch_select[c] = get_bits(gb, 2);
1554             if (coup->ch_select[c] == 3)
1555                 num_gain++;
1556         } else
1557             coup->ch_select[c] = 2;
1558     }
1559     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1560
1561     sign  = get_bits(gb, 1);
1562     scale = cce_scale[get_bits(gb, 2)];
1563
1564     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1565         return ret;
1566
1567     for (c = 0; c < num_gain; c++) {
1568         int idx  = 0;
1569         int cge  = 1;
1570         int gain = 0;
1571         float gain_cache = 1.;
1572         if (c) {
1573             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1574             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1575             gain_cache = powf(scale, -gain);
1576         }
1577         if (coup->coupling_point == AFTER_IMDCT) {
1578             coup->gain[c][0] = gain_cache;
1579         } else {
1580             for (g = 0; g < sce->ics.num_window_groups; g++) {
1581                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1582                     if (sce->band_type[idx] != ZERO_BT) {
1583                         if (!cge) {
1584                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1585                             if (t) {
1586                                 int s = 1;
1587                                 t = gain += t;
1588                                 if (sign) {
1589                                     s  -= 2 * (t & 0x1);
1590                                     t >>= 1;
1591                                 }
1592                                 gain_cache = powf(scale, -t) * s;
1593                             }
1594                         }
1595                         coup->gain[c][idx] = gain_cache;
1596                     }
1597                 }
1598             }
1599         }
1600     }
1601     return 0;
1602 }
1603
1604 /**
1605  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1606  *
1607  * @return  Returns number of bytes consumed.
1608  */
1609 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1610                                          GetBitContext *gb)
1611 {
1612     int i;
1613     int num_excl_chan = 0;
1614
1615     do {
1616         for (i = 0; i < 7; i++)
1617             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1618     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1619
1620     return num_excl_chan / 7;
1621 }
1622
1623 /**
1624  * Decode dynamic range information; reference: table 4.52.
1625  *
1626  * @param   cnt length of TYPE_FIL syntactic element in bytes
1627  *
1628  * @return  Returns number of bytes consumed.
1629  */
1630 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1631                                 GetBitContext *gb, int cnt)
1632 {
1633     int n             = 1;
1634     int drc_num_bands = 1;
1635     int i;
1636
1637     /* pce_tag_present? */
1638     if (get_bits1(gb)) {
1639         che_drc->pce_instance_tag  = get_bits(gb, 4);
1640         skip_bits(gb, 4); // tag_reserved_bits
1641         n++;
1642     }
1643
1644     /* excluded_chns_present? */
1645     if (get_bits1(gb)) {
1646         n += decode_drc_channel_exclusions(che_drc, gb);
1647     }
1648
1649     /* drc_bands_present? */
1650     if (get_bits1(gb)) {
1651         che_drc->band_incr            = get_bits(gb, 4);
1652         che_drc->interpolation_scheme = get_bits(gb, 4);
1653         n++;
1654         drc_num_bands += che_drc->band_incr;
1655         for (i = 0; i < drc_num_bands; i++) {
1656             che_drc->band_top[i] = get_bits(gb, 8);
1657             n++;
1658         }
1659     }
1660
1661     /* prog_ref_level_present? */
1662     if (get_bits1(gb)) {
1663         che_drc->prog_ref_level = get_bits(gb, 7);
1664         skip_bits1(gb); // prog_ref_level_reserved_bits
1665         n++;
1666     }
1667
1668     for (i = 0; i < drc_num_bands; i++) {
1669         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1670         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1671         n++;
1672     }
1673
1674     return n;
1675 }
1676
1677 /**
1678  * Decode extension data (incomplete); reference: table 4.51.
1679  *
1680  * @param   cnt length of TYPE_FIL syntactic element in bytes
1681  *
1682  * @return Returns number of bytes consumed
1683  */
1684 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1685                                     ChannelElement *che, enum RawDataBlockType elem_type)
1686 {
1687     int crc_flag = 0;
1688     int res = cnt;
1689     switch (get_bits(gb, 4)) { // extension type
1690     case EXT_SBR_DATA_CRC:
1691         crc_flag++;
1692     case EXT_SBR_DATA:
1693         if (!che) {
1694             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1695             return res;
1696         } else if (!ac->m4ac.sbr) {
1697             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1698             skip_bits_long(gb, 8 * cnt - 4);
1699             return res;
1700         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1701             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1702             skip_bits_long(gb, 8 * cnt - 4);
1703             return res;
1704         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1705             ac->m4ac.sbr = 1;
1706             ac->m4ac.ps = 1;
1707             output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
1708         } else {
1709             ac->m4ac.sbr = 1;
1710         }
1711         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1712         break;
1713     case EXT_DYNAMIC_RANGE:
1714         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1715         break;
1716     case EXT_FILL:
1717     case EXT_FILL_DATA:
1718     case EXT_DATA_ELEMENT:
1719     default:
1720         skip_bits_long(gb, 8 * cnt - 4);
1721         break;
1722     };
1723     return res;
1724 }
1725
1726 /**
1727  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1728  *
1729  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1730  * @param   coef    spectral coefficients
1731  */
1732 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1733                       IndividualChannelStream *ics, int decode)
1734 {
1735     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1736     int w, filt, m, i;
1737     int bottom, top, order, start, end, size, inc;
1738     float lpc[TNS_MAX_ORDER];
1739     float tmp[TNS_MAX_ORDER];
1740
1741     for (w = 0; w < ics->num_windows; w++) {
1742         bottom = ics->num_swb;
1743         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1744             top    = bottom;
1745             bottom = FFMAX(0, top - tns->length[w][filt]);
1746             order  = tns->order[w][filt];
1747             if (order == 0)
1748                 continue;
1749
1750             // tns_decode_coef
1751             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1752
1753             start = ics->swb_offset[FFMIN(bottom, mmm)];
1754             end   = ics->swb_offset[FFMIN(   top, mmm)];
1755             if ((size = end - start) <= 0)
1756                 continue;
1757             if (tns->direction[w][filt]) {
1758                 inc = -1;
1759                 start = end - 1;
1760             } else {
1761                 inc = 1;
1762             }
1763             start += w * 128;
1764
1765             if (decode) {
1766                 // ar filter
1767                 for (m = 0; m < size; m++, start += inc)
1768                     for (i = 1; i <= FFMIN(m, order); i++)
1769                         coef[start] -= coef[start - i * inc] * lpc[i - 1];
1770             } else {
1771                 // ma filter
1772                 for (m = 0; m < size; m++, start += inc) {
1773                     tmp[0] = coef[start];
1774                     for (i = 1; i <= FFMIN(m, order); i++)
1775                         coef[start] += tmp[i] * lpc[i - 1];
1776                     for (i = order; i > 0; i--)
1777                         tmp[i] = tmp[i - 1];
1778                 }
1779             }
1780         }
1781     }
1782 }
1783
1784 /**
1785  *  Apply windowing and MDCT to obtain the spectral
1786  *  coefficient from the predicted sample by LTP.
1787  */
1788 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
1789                                    float *in, IndividualChannelStream *ics)
1790 {
1791     const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1792     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1793     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1794     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1795
1796     if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
1797         ac->dsp.vector_fmul(in, in, lwindow_prev, 1024);
1798     } else {
1799         memset(in, 0, 448 * sizeof(float));
1800         ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
1801     }
1802     if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
1803         ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
1804     } else {
1805         ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
1806         memset(in + 1024 + 576, 0, 448 * sizeof(float));
1807     }
1808     ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
1809 }
1810
1811 /**
1812  * Apply the long term prediction
1813  */
1814 static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
1815 {
1816     const LongTermPrediction *ltp = &sce->ics.ltp;
1817     const uint16_t *offsets = sce->ics.swb_offset;
1818     int i, sfb;
1819
1820     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1821         float *predTime = sce->ret;
1822         float *predFreq = ac->buf_mdct;
1823         int16_t num_samples = 2048;
1824
1825         if (ltp->lag < 1024)
1826             num_samples = ltp->lag + 1024;
1827         for (i = 0; i < num_samples; i++)
1828             predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
1829         memset(&predTime[i], 0, (2048 - i) * sizeof(float));
1830
1831         windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
1832
1833         if (sce->tns.present)
1834             apply_tns(predFreq, &sce->tns, &sce->ics, 0);
1835
1836         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
1837             if (ltp->used[sfb])
1838                 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
1839                     sce->coeffs[i] += predFreq[i];
1840     }
1841 }
1842
1843 /**
1844  * Update the LTP buffer for next frame
1845  */
1846 static void update_ltp(AACContext *ac, SingleChannelElement *sce)
1847 {
1848     IndividualChannelStream *ics = &sce->ics;
1849     float *saved     = sce->saved;
1850     float *saved_ltp = sce->coeffs;
1851     const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1852     const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1853     int i;
1854
1855     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1856         memcpy(saved_ltp,       saved, 512 * sizeof(float));
1857         memset(saved_ltp + 576, 0,     448 * sizeof(float));
1858         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
1859         for (i = 0; i < 64; i++)
1860             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
1861     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1862         memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float));
1863         memset(saved_ltp + 576, 0,                  448 * sizeof(float));
1864         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
1865         for (i = 0; i < 64; i++)
1866             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
1867     } else { // LONG_STOP or ONLY_LONG
1868         ac->dsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
1869         for (i = 0; i < 512; i++)
1870             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
1871     }
1872
1873     memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
1874     memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
1875     memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
1876 }
1877
1878 /**
1879  * Conduct IMDCT and windowing.
1880  */
1881 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
1882 {
1883     IndividualChannelStream *ics = &sce->ics;
1884     float *in    = sce->coeffs;
1885     float *out   = sce->ret;
1886     float *saved = sce->saved;
1887     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1888     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1889     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1890     float *buf  = ac->buf_mdct;
1891     float *temp = ac->temp;
1892     int i;
1893
1894     // imdct
1895     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1896         for (i = 0; i < 1024; i += 128)
1897             ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
1898     } else
1899         ac->mdct.imdct_half(&ac->mdct, buf, in);
1900
1901     /* window overlapping
1902      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1903      * and long to short transitions are considered to be short to short
1904      * transitions. This leaves just two cases (long to long and short to short)
1905      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1906      */
1907     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1908             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1909         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
1910     } else {
1911         memcpy(                        out,               saved,            448 * sizeof(float));
1912
1913         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1914             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
1915             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
1916             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
1917             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
1918             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
1919             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1920         } else {
1921             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
1922             memcpy(                    out + 576,         buf + 64,         448 * sizeof(float));
1923         }
1924     }
1925
1926     // buffer update
1927     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1928         memcpy(                    saved,       temp + 64,         64 * sizeof(float));
1929         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
1930         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
1931         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
1932         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1933     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1934         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
1935         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1936     } else { // LONG_STOP or ONLY_LONG
1937         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
1938     }
1939 }
1940
1941 /**
1942  * Apply dependent channel coupling (applied before IMDCT).
1943  *
1944  * @param   index   index into coupling gain array
1945  */
1946 static void apply_dependent_coupling(AACContext *ac,
1947                                      SingleChannelElement *target,
1948                                      ChannelElement *cce, int index)
1949 {
1950     IndividualChannelStream *ics = &cce->ch[0].ics;
1951     const uint16_t *offsets = ics->swb_offset;
1952     float *dest = target->coeffs;
1953     const float *src = cce->ch[0].coeffs;
1954     int g, i, group, k, idx = 0;
1955     if (ac->m4ac.object_type == AOT_AAC_LTP) {
1956         av_log(ac->avctx, AV_LOG_ERROR,
1957                "Dependent coupling is not supported together with LTP\n");
1958         return;
1959     }
1960     for (g = 0; g < ics->num_window_groups; g++) {
1961         for (i = 0; i < ics->max_sfb; i++, idx++) {
1962             if (cce->ch[0].band_type[idx] != ZERO_BT) {
1963                 const float gain = cce->coup.gain[index][idx];
1964                 for (group = 0; group < ics->group_len[g]; group++) {
1965                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
1966                         // XXX dsputil-ize
1967                         dest[group * 128 + k] += gain * src[group * 128 + k];
1968                     }
1969                 }
1970             }
1971         }
1972         dest += ics->group_len[g] * 128;
1973         src  += ics->group_len[g] * 128;
1974     }
1975 }
1976
1977 /**
1978  * Apply independent channel coupling (applied after IMDCT).
1979  *
1980  * @param   index   index into coupling gain array
1981  */
1982 static void apply_independent_coupling(AACContext *ac,
1983                                        SingleChannelElement *target,
1984                                        ChannelElement *cce, int index)
1985 {
1986     int i;
1987     const float gain = cce->coup.gain[index][0];
1988     const float *src = cce->ch[0].ret;
1989     float *dest = target->ret;
1990     const int len = 1024 << (ac->m4ac.sbr == 1);
1991
1992     for (i = 0; i < len; i++)
1993         dest[i] += gain * src[i];
1994 }
1995
1996 /**
1997  * channel coupling transformation interface
1998  *
1999  * @param   apply_coupling_method   pointer to (in)dependent coupling function
2000  */
2001 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
2002                                    enum RawDataBlockType type, int elem_id,
2003                                    enum CouplingPoint coupling_point,
2004                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2005 {
2006     int i, c;
2007
2008     for (i = 0; i < MAX_ELEM_ID; i++) {
2009         ChannelElement *cce = ac->che[TYPE_CCE][i];
2010         int index = 0;
2011
2012         if (cce && cce->coup.coupling_point == coupling_point) {
2013             ChannelCoupling *coup = &cce->coup;
2014
2015             for (c = 0; c <= coup->num_coupled; c++) {
2016                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2017                     if (coup->ch_select[c] != 1) {
2018                         apply_coupling_method(ac, &cc->ch[0], cce, index);
2019                         if (coup->ch_select[c] != 0)
2020                             index++;
2021                     }
2022                     if (coup->ch_select[c] != 2)
2023                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
2024                 } else
2025                     index += 1 + (coup->ch_select[c] == 3);
2026             }
2027         }
2028     }
2029 }
2030
2031 /**
2032  * Convert spectral data to float samples, applying all supported tools as appropriate.
2033  */
2034 static void spectral_to_sample(AACContext *ac)
2035 {
2036     int i, type;
2037     for (type = 3; type >= 0; type--) {
2038         for (i = 0; i < MAX_ELEM_ID; i++) {
2039             ChannelElement *che = ac->che[type][i];
2040             if (che) {
2041                 if (type <= TYPE_CPE)
2042                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
2043                 if (ac->m4ac.object_type == AOT_AAC_LTP) {
2044                     if (che->ch[0].ics.predictor_present) {
2045                         if (che->ch[0].ics.ltp.present)
2046                             apply_ltp(ac, &che->ch[0]);
2047                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2048                             apply_ltp(ac, &che->ch[1]);
2049                     }
2050                 }
2051                 if (che->ch[0].tns.present)
2052                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2053                 if (che->ch[1].tns.present)
2054                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2055                 if (type <= TYPE_CPE)
2056                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
2057                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2058                     imdct_and_windowing(ac, &che->ch[0]);
2059                     if (ac->m4ac.object_type == AOT_AAC_LTP)
2060                         update_ltp(ac, &che->ch[0]);
2061                     if (type == TYPE_CPE) {
2062                         imdct_and_windowing(ac, &che->ch[1]);
2063                         if (ac->m4ac.object_type == AOT_AAC_LTP)
2064                             update_ltp(ac, &che->ch[1]);
2065                     }
2066                     if (ac->m4ac.sbr > 0) {
2067                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2068                     }
2069                 }
2070                 if (type <= TYPE_CCE)
2071                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
2072             }
2073         }
2074     }
2075 }
2076
2077 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
2078 {
2079     int size;
2080     AACADTSHeaderInfo hdr_info;
2081
2082     size = avpriv_aac_parse_header(gb, &hdr_info);
2083     if (size > 0) {
2084         if (hdr_info.chan_config) {
2085             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2086             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2087             ac->m4ac.chan_config = hdr_info.chan_config;
2088             if (set_default_channel_config(ac->avctx, new_che_pos, hdr_info.chan_config))
2089                 return -7;
2090             if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
2091                 return -7;
2092         } else if (ac->output_configured != OC_LOCKED) {
2093             ac->m4ac.chan_config = 0;
2094             ac->output_configured = OC_NONE;
2095         }
2096         if (ac->output_configured != OC_LOCKED) {
2097             ac->m4ac.sbr = -1;
2098             ac->m4ac.ps  = -1;
2099             ac->m4ac.sample_rate     = hdr_info.sample_rate;
2100             ac->m4ac.sampling_index  = hdr_info.sampling_index;
2101             ac->m4ac.object_type     = hdr_info.object_type;
2102         }
2103         if (!ac->avctx->sample_rate)
2104             ac->avctx->sample_rate = hdr_info.sample_rate;
2105         if (hdr_info.num_aac_frames == 1) {
2106             if (!hdr_info.crc_absent)
2107                 skip_bits(gb, 16);
2108         } else {
2109             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
2110             return -1;
2111         }
2112     }
2113     return size;
2114 }
2115
2116 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2117                                 int *data_size, GetBitContext *gb)
2118 {
2119     AACContext *ac = avctx->priv_data;
2120     ChannelElement *che = NULL, *che_prev = NULL;
2121     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2122     int err, elem_id, data_size_tmp;
2123     int samples = 0, multiplier, audio_found = 0;
2124
2125     if (show_bits(gb, 12) == 0xfff) {
2126         if (parse_adts_frame_header(ac, gb) < 0) {
2127             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2128             return -1;
2129         }
2130         if (ac->m4ac.sampling_index > 12) {
2131             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
2132             return -1;
2133         }
2134     }
2135
2136     ac->tags_mapped = 0;
2137     // parse
2138     while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2139         elem_id = get_bits(gb, 4);
2140
2141         if (elem_type < TYPE_DSE) {
2142             if (!ac->tags_mapped && elem_type == TYPE_CPE && ac->m4ac.chan_config==1) {
2143                 enum ChannelPosition new_che_pos[4][MAX_ELEM_ID]= {0};
2144                 ac->m4ac.chan_config=2;
2145
2146                 if (set_default_channel_config(ac->avctx, new_che_pos, 2)<0)
2147                     return -1;
2148                 if (output_configure(ac, ac->che_pos, new_che_pos, 2, OC_TRIAL_FRAME)<0)
2149                     return -1;
2150             }
2151             if (!(che=get_che(ac, elem_type, elem_id))) {
2152                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2153                        elem_type, elem_id);
2154                 return -1;
2155             }
2156             samples = 1024;
2157         }
2158
2159         switch (elem_type) {
2160
2161         case TYPE_SCE:
2162             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2163             audio_found = 1;
2164             break;
2165
2166         case TYPE_CPE:
2167             err = decode_cpe(ac, gb, che);
2168             audio_found = 1;
2169             break;
2170
2171         case TYPE_CCE:
2172             err = decode_cce(ac, gb, che);
2173             break;
2174
2175         case TYPE_LFE:
2176             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2177             audio_found = 1;
2178             break;
2179
2180         case TYPE_DSE:
2181             err = skip_data_stream_element(ac, gb);
2182             break;
2183
2184         case TYPE_PCE: {
2185             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
2186             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
2187             if ((err = decode_pce(avctx, &ac->m4ac, new_che_pos, gb)))
2188                 break;
2189             if (ac->output_configured > OC_TRIAL_PCE)
2190                 av_log(avctx, AV_LOG_ERROR,
2191                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2192             else
2193                 err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
2194             break;
2195         }
2196
2197         case TYPE_FIL:
2198             if (elem_id == 15)
2199                 elem_id += get_bits(gb, 8) - 1;
2200             if (get_bits_left(gb) < 8 * elem_id) {
2201                     av_log(avctx, AV_LOG_ERROR, overread_err);
2202                     return -1;
2203             }
2204             while (elem_id > 0)
2205                 elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2206             err = 0; /* FIXME */
2207             break;
2208
2209         default:
2210             err = -1; /* should not happen, but keeps compiler happy */
2211             break;
2212         }
2213
2214         che_prev       = che;
2215         elem_type_prev = elem_type;
2216
2217         if (err)
2218             return err;
2219
2220         if (get_bits_left(gb) < 3) {
2221             av_log(avctx, AV_LOG_ERROR, overread_err);
2222             return -1;
2223         }
2224     }
2225
2226     spectral_to_sample(ac);
2227
2228     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2229     samples <<= multiplier;
2230     if (ac->output_configured < OC_LOCKED) {
2231         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2232         avctx->frame_size = samples;
2233     }
2234
2235     data_size_tmp = samples * avctx->channels *
2236                     av_get_bytes_per_sample(avctx->sample_fmt);
2237     if (*data_size < data_size_tmp) {
2238         av_log(avctx, AV_LOG_ERROR,
2239                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
2240                *data_size, data_size_tmp);
2241         return -1;
2242     }
2243     *data_size = data_size_tmp;
2244
2245     if (samples) {
2246         if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
2247             ac->fmt_conv.float_interleave(data, (const float **)ac->output_data,
2248                                           samples, avctx->channels);
2249         else
2250             ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data,
2251                                                    samples, avctx->channels);
2252     }
2253
2254     if (ac->output_configured && audio_found)
2255         ac->output_configured = OC_LOCKED;
2256
2257     return 0;
2258 }
2259
2260 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2261                             int *data_size, AVPacket *avpkt)
2262 {
2263     const uint8_t *buf = avpkt->data;
2264     int buf_size = avpkt->size;
2265     GetBitContext gb;
2266     int buf_consumed;
2267     int buf_offset;
2268     int err;
2269
2270     init_get_bits(&gb, buf, buf_size * 8);
2271
2272     if ((err = aac_decode_frame_int(avctx, data, data_size, &gb)) < 0)
2273         return err;
2274
2275     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2276     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2277         if (buf[buf_offset])
2278             break;
2279
2280     return buf_size > buf_offset ? buf_consumed : buf_size;
2281 }
2282
2283 static av_cold int aac_decode_close(AVCodecContext *avctx)
2284 {
2285     AACContext *ac = avctx->priv_data;
2286     int i, type;
2287
2288     for (i = 0; i < MAX_ELEM_ID; i++) {
2289         for (type = 0; type < 4; type++) {
2290             if (ac->che[type][i])
2291                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2292             av_freep(&ac->che[type][i]);
2293         }
2294     }
2295
2296     ff_mdct_end(&ac->mdct);
2297     ff_mdct_end(&ac->mdct_small);
2298     ff_mdct_end(&ac->mdct_ltp);
2299     return 0;
2300 }
2301
2302
2303 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
2304
2305 struct LATMContext {
2306     AACContext      aac_ctx;             ///< containing AACContext
2307     int             initialized;         ///< initilized after a valid extradata was seen
2308
2309     // parser data
2310     int             audio_mux_version_A; ///< LATM syntax version
2311     int             frame_length_type;   ///< 0/1 variable/fixed frame length
2312     int             frame_length;        ///< frame length for fixed frame length
2313 };
2314
2315 static inline uint32_t latm_get_value(GetBitContext *b)
2316 {
2317     int length = get_bits(b, 2);
2318
2319     return get_bits_long(b, (length+1)*8);
2320 }
2321
2322 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
2323                                              GetBitContext *gb, int asclen)
2324 {
2325     AVCodecContext *avctx = latmctx->aac_ctx.avctx;
2326     AACContext *ac= &latmctx->aac_ctx;
2327     MPEG4AudioConfig m4ac=ac->m4ac;
2328     int  config_start_bit = get_bits_count(gb);
2329     int     bits_consumed, esize;
2330
2331     if (config_start_bit % 8) {
2332         av_log_missing_feature(latmctx->aac_ctx.avctx, "audio specific "
2333                                "config not byte aligned.\n", 1);
2334         return AVERROR_INVALIDDATA;
2335     } else {
2336         bits_consumed =
2337             decode_audio_specific_config(ac, avctx, &m4ac,
2338                                          gb->buffer + (config_start_bit / 8),
2339                                          get_bits_left(gb) / 8, asclen);
2340
2341         if (bits_consumed < 0)
2342             return AVERROR_INVALIDDATA;
2343         if(ac->m4ac.sample_rate != m4ac.sample_rate || m4ac.chan_config != ac->m4ac.chan_config)
2344             ac->m4ac= m4ac;
2345
2346         esize = (bits_consumed+7) / 8;
2347
2348         if (avctx->extradata_size <= esize) {
2349             av_free(avctx->extradata);
2350             avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
2351             if (!avctx->extradata)
2352                 return AVERROR(ENOMEM);
2353         }
2354
2355         avctx->extradata_size = esize;
2356         memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2357         memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2358
2359         skip_bits_long(gb, bits_consumed);
2360     }
2361
2362     return bits_consumed;
2363 }
2364
2365 static int read_stream_mux_config(struct LATMContext *latmctx,
2366                                   GetBitContext *gb)
2367 {
2368     int ret, audio_mux_version = get_bits(gb, 1);
2369
2370     latmctx->audio_mux_version_A = 0;
2371     if (audio_mux_version)
2372         latmctx->audio_mux_version_A = get_bits(gb, 1);
2373
2374     if (!latmctx->audio_mux_version_A) {
2375
2376         if (audio_mux_version)
2377             latm_get_value(gb);                 // taraFullness
2378
2379         skip_bits(gb, 1);                       // allStreamSameTimeFraming
2380         skip_bits(gb, 6);                       // numSubFrames
2381         // numPrograms
2382         if (get_bits(gb, 4)) {                  // numPrograms
2383             av_log_missing_feature(latmctx->aac_ctx.avctx,
2384                                    "multiple programs are not supported\n", 1);
2385             return AVERROR_PATCHWELCOME;
2386         }
2387
2388         // for each program (which there is only on in DVB)
2389
2390         // for each layer (which there is only on in DVB)
2391         if (get_bits(gb, 3)) {                   // numLayer
2392             av_log_missing_feature(latmctx->aac_ctx.avctx,
2393                                    "multiple layers are not supported\n", 1);
2394             return AVERROR_PATCHWELCOME;
2395         }
2396
2397         // for all but first stream: use_same_config = get_bits(gb, 1);
2398         if (!audio_mux_version) {
2399             if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
2400                 return ret;
2401         } else {
2402             int ascLen = latm_get_value(gb);
2403             if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
2404                 return ret;
2405             ascLen -= ret;
2406             skip_bits_long(gb, ascLen);
2407         }
2408
2409         latmctx->frame_length_type = get_bits(gb, 3);
2410         switch (latmctx->frame_length_type) {
2411         case 0:
2412             skip_bits(gb, 8);       // latmBufferFullness
2413             break;
2414         case 1:
2415             latmctx->frame_length = get_bits(gb, 9);
2416             break;
2417         case 3:
2418         case 4:
2419         case 5:
2420             skip_bits(gb, 6);       // CELP frame length table index
2421             break;
2422         case 6:
2423         case 7:
2424             skip_bits(gb, 1);       // HVXC frame length table index
2425             break;
2426         }
2427
2428         if (get_bits(gb, 1)) {                  // other data
2429             if (audio_mux_version) {
2430                 latm_get_value(gb);             // other_data_bits
2431             } else {
2432                 int esc;
2433                 do {
2434                     esc = get_bits(gb, 1);
2435                     skip_bits(gb, 8);
2436                 } while (esc);
2437             }
2438         }
2439
2440         if (get_bits(gb, 1))                     // crc present
2441             skip_bits(gb, 8);                    // config_crc
2442     }
2443
2444     return 0;
2445 }
2446
2447 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
2448 {
2449     uint8_t tmp;
2450
2451     if (ctx->frame_length_type == 0) {
2452         int mux_slot_length = 0;
2453         do {
2454             tmp = get_bits(gb, 8);
2455             mux_slot_length += tmp;
2456         } while (tmp == 255);
2457         return mux_slot_length;
2458     } else if (ctx->frame_length_type == 1) {
2459         return ctx->frame_length;
2460     } else if (ctx->frame_length_type == 3 ||
2461                ctx->frame_length_type == 5 ||
2462                ctx->frame_length_type == 7) {
2463         skip_bits(gb, 2);          // mux_slot_length_coded
2464     }
2465     return 0;
2466 }
2467
2468 static int read_audio_mux_element(struct LATMContext *latmctx,
2469                                   GetBitContext *gb)
2470 {
2471     int err;
2472     uint8_t use_same_mux = get_bits(gb, 1);
2473     if (!use_same_mux) {
2474         if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2475             return err;
2476     } else if (!latmctx->aac_ctx.avctx->extradata) {
2477         av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2478                "no decoder config found\n");
2479         return AVERROR(EAGAIN);
2480     }
2481     if (latmctx->audio_mux_version_A == 0) {
2482         int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2483         if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2484             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2485             return AVERROR_INVALIDDATA;
2486         } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
2487             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2488                    "frame length mismatch %d << %d\n",
2489                    mux_slot_length_bytes * 8, get_bits_left(gb));
2490             return AVERROR_INVALIDDATA;
2491         }
2492     }
2493     return 0;
2494 }
2495
2496
2497 static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size,
2498                              AVPacket *avpkt)
2499 {
2500     struct LATMContext *latmctx = avctx->priv_data;
2501     int                 muxlength, err;
2502     GetBitContext       gb;
2503
2504     init_get_bits(&gb, avpkt->data, avpkt->size * 8);
2505
2506     // check for LOAS sync word
2507     if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
2508         return AVERROR_INVALIDDATA;
2509
2510     muxlength = get_bits(&gb, 13) + 3;
2511     // not enough data, the parser should have sorted this
2512     if (muxlength > avpkt->size)
2513         return AVERROR_INVALIDDATA;
2514
2515     if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
2516         return err;
2517
2518     if (!latmctx->initialized) {
2519         if (!avctx->extradata) {
2520             *out_size = 0;
2521             return avpkt->size;
2522         } else {
2523             if ((err = decode_audio_specific_config(
2524                     &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.m4ac,
2525                     avctx->extradata, avctx->extradata_size, 8*avctx->extradata_size)) < 0)
2526                 return err;
2527             latmctx->initialized = 1;
2528         }
2529     }
2530
2531     if (show_bits(&gb, 12) == 0xfff) {
2532         av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2533                "ADTS header detected, probably as result of configuration "
2534                "misparsing\n");
2535         return AVERROR_INVALIDDATA;
2536     }
2537
2538     if ((err = aac_decode_frame_int(avctx, out, out_size, &gb)) < 0)
2539         return err;
2540
2541     return muxlength;
2542 }
2543
2544 av_cold static int latm_decode_init(AVCodecContext *avctx)
2545 {
2546     struct LATMContext *latmctx = avctx->priv_data;
2547     int ret = aac_decode_init(avctx);
2548
2549     if (avctx->extradata_size > 0)
2550         latmctx->initialized = !ret;
2551
2552     return ret;
2553 }
2554
2555
2556 AVCodec ff_aac_decoder = {
2557     .name           = "aac",
2558     .type           = AVMEDIA_TYPE_AUDIO,
2559     .id             = CODEC_ID_AAC,
2560     .priv_data_size = sizeof(AACContext),
2561     .init           = aac_decode_init,
2562     .close          = aac_decode_close,
2563     .decode         = aac_decode_frame,
2564     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2565     .sample_fmts = (const enum AVSampleFormat[]) {
2566         AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
2567     },
2568     .capabilities = CODEC_CAP_CHANNEL_CONF,
2569     .channel_layouts = aac_channel_layout,
2570 };
2571
2572 /*
2573     Note: This decoder filter is intended to decode LATM streams transferred
2574     in MPEG transport streams which only contain one program.
2575     To do a more complex LATM demuxing a separate LATM demuxer should be used.
2576 */
2577 AVCodec ff_aac_latm_decoder = {
2578     .name = "aac_latm",
2579     .type = AVMEDIA_TYPE_AUDIO,
2580     .id   = CODEC_ID_AAC_LATM,
2581     .priv_data_size = sizeof(struct LATMContext),
2582     .init   = latm_decode_init,
2583     .close  = aac_decode_close,
2584     .decode = latm_decode_frame,
2585     .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
2586     .sample_fmts = (const enum AVSampleFormat[]) {
2587         AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
2588     },
2589     .capabilities = CODEC_CAP_CHANNEL_CONF,
2590     .channel_layouts = aac_channel_layout,
2591 };