git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * AAC LATM decoder
   7  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
   8  * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
   9  *
  10  * This file is part of FFmpeg.
  11  *
  12  * FFmpeg is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public
  14  * License as published by the Free Software Foundation; either
  15  * version 2.1 of the License, or (at your option) any later version.
  16  *
  17  * FFmpeg is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with FFmpeg; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25  */
  26
  27 /**
  28  * @file
  29  * AAC decoder
  30  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  31  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  32  */
  33
  34 /*
  35  * supported tools
  36  *
  37  * Support?             Name
  38  * N (code in SoC repo) gain control
  39  * Y                    block switching
  40  * Y                    window shapes - standard
  41  * N                    window shapes - Low Delay
  42  * Y                    filterbank - standard
  43  * N (code in SoC repo) filterbank - Scalable Sample Rate
  44  * Y                    Temporal Noise Shaping
  45  * Y                    Long Term Prediction
  46  * Y                    intensity stereo
  47  * Y                    channel coupling
  48  * Y                    frequency domain prediction
  49  * Y                    Perceptual Noise Substitution
  50  * Y                    Mid/Side stereo
  51  * N                    Scalable Inverse AAC Quantization
  52  * N                    Frequency Selective Switch
  53  * N                    upsampling filter
  54  * Y                    quantization & coding - AAC
  55  * N                    quantization & coding - TwinVQ
  56  * N                    quantization & coding - BSAC
  57  * N                    AAC Error Resilience tools
  58  * N                    Error Resilience payload syntax
  59  * N                    Error Protection tool
  60  * N                    CELP
  61  * N                    Silence Compression
  62  * N                    HVXC
  63  * N                    HVXC 4kbits/s VR
  64  * N                    Structured Audio tools
  65  * N                    Structured Audio Sample Bank Format
  66  * N                    MIDI
  67  * N                    Harmonic and Individual Lines plus Noise
  68  * N                    Text-To-Speech Interface
  69  * Y                    Spectral Band Replication
  70  * Y (not in this code) Layer-1
  71  * Y (not in this code) Layer-2
  72  * Y (not in this code) Layer-3
  73  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  74  * Y                    Parametric Stereo
  75  * N                    Direct Stream Transfer
  76  *
  77  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  78  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  79            Parametric Stereo.
  80  */
  81
  82 #include "libavutil/float_dsp.h"
  83 #include "libavutil/opt.h"
  84 #include "avcodec.h"
  85 #include "internal.h"
  86 #include "get_bits.h"
  87 #include "dsputil.h"
  88 #include "fft.h"
  89 #include "fmtconvert.h"
  90 #include "lpc.h"
  91 #include "kbdwin.h"
  92 #include "sinewin.h"
  93
  94 #include "aac.h"
  95 #include "aactab.h"
  96 #include "aacdectab.h"
  97 #include "cbrt_tablegen.h"
  98 #include "sbr.h"
  99 #include "aacsbr.h"
 100 #include "mpeg4audio.h"
 101 #include "aacadtsdec.h"
 102 #include "libavutil/intfloat.h"
 103
 104 #include <assert.h>
 105 #include <errno.h>
 106 #include <math.h>
 107 #include <string.h>
 108
 109 #if ARCH_ARM
 110 #   include "arm/aac.h"
 111 #endif
 112
 113 static VLC vlc_scalefactors;
 114 static VLC vlc_spectral[11];
 115
 116 #define overread_err "Input buffer exhausted before END element found\n"
 117
 118 static int count_channels(uint8_t (*layout)[3], int tags)
 119 {
 120     int i, sum = 0;
 121     for (i = 0; i < tags; i++) {
 122         int syn_ele = layout[i][0];
 123         int pos     = layout[i][2];
 124         sum += (1 + (syn_ele == TYPE_CPE)) *
 125                (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
 126     }
 127     return sum;
 128 }
 129
 130 /**
 131  * Check for the channel element in the current channel position configuration.
 132  * If it exists, make sure the appropriate element is allocated and map the
 133  * channel order to match the internal FFmpeg channel layout.
 134  *
 135  * @param   che_pos current channel position configuration
 136  * @param   type channel element type
 137  * @param   id channel element id
 138  * @param   channels count of the number of channels in the configuration
 139  *
 140  * @return  Returns error status. 0 - OK, !0 - error
 141  */
 142 static av_cold int che_configure(AACContext *ac,
 143                                  enum ChannelPosition che_pos,
 144                                  int type, int id, int *channels)
 145 {
 146     if (che_pos) {
 147         if (!ac->che[type][id]) {
 148             if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 149                 return AVERROR(ENOMEM);
 150             ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
 151         }
 152         if (type != TYPE_CCE) {
 153             if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
 154                 av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
 155                 return AVERROR_INVALIDDATA;
 156             }
 157             ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
 158             if (type == TYPE_CPE ||
 159                 (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
 160                 ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
 161             }
 162         }
 163     } else {
 164         if (ac->che[type][id])
 165             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 166         av_freep(&ac->che[type][id]);
 167     }
 168     return 0;
 169 }
 170
 171 static int frame_configure_elements(AVCodecContext *avctx)
 172 {
 173     AACContext *ac = avctx->priv_data;
 174     int type, id, ch, ret;
 175
 176     /* set channel pointers to internal buffers by default */
 177     for (type = 0; type < 4; type++) {
 178         for (id = 0; id < MAX_ELEM_ID; id++) {
 179             ChannelElement *che = ac->che[type][id];
 180             if (che) {
 181                 che->ch[0].ret = che->ch[0].ret_buf;
 182                 che->ch[1].ret = che->ch[1].ret_buf;
 183             }
 184         }
 185     }
 186
 187     /* get output buffer */
 188     ac->frame.nb_samples = 2048;
 189     if ((ret = ff_get_buffer(avctx, &ac->frame)) < 0) {
 190         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 191         return ret;
 192     }
 193
 194     /* map output channel pointers to AVFrame data */
 195     for (ch = 0; ch < avctx->channels; ch++) {
 196         if (ac->output_element[ch])
 197             ac->output_element[ch]->ret = (float *)ac->frame.extended_data[ch];
 198     }
 199
 200     return 0;
 201 }
 202
 203 struct elem_to_channel {
 204     uint64_t av_position;
 205     uint8_t syn_ele;
 206     uint8_t elem_id;
 207     uint8_t aac_position;
 208 };
 209
 210 static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
 211                        uint8_t (*layout_map)[3], int offset, uint64_t left,
 212     uint64_t right, int pos)
 213 {
 214     if (layout_map[offset][0] == TYPE_CPE) {
 215         e2c_vec[offset] = (struct elem_to_channel) {
 216             .av_position = left | right, .syn_ele = TYPE_CPE,
 217             .elem_id = layout_map[offset    ][1], .aac_position = pos };
 218         return 1;
 219     } else {
 220         e2c_vec[offset]   = (struct elem_to_channel) {
 221             .av_position = left, .syn_ele = TYPE_SCE,
 222             .elem_id = layout_map[offset    ][1], .aac_position = pos };
 223         e2c_vec[offset + 1] = (struct elem_to_channel) {
 224             .av_position = right, .syn_ele = TYPE_SCE,
 225             .elem_id = layout_map[offset + 1][1], .aac_position = pos };
 226         return 2;
 227     }
 228 }
 229
 230 static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, int *current) {
 231     int num_pos_channels = 0;
 232     int first_cpe = 0;
 233     int sce_parity = 0;
 234     int i;
 235     for (i = *current; i < tags; i++) {
 236         if (layout_map[i][2] != pos)
 237             break;
 238         if (layout_map[i][0] == TYPE_CPE) {
 239             if (sce_parity) {
 240                 if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
 241                     sce_parity = 0;
 242                 } else {
 243                     return -1;
 244                 }
 245             }
 246             num_pos_channels += 2;
 247             first_cpe = 1;
 248         } else {
 249             num_pos_channels++;
 250             sce_parity ^= 1;
 251         }
 252     }
 253     if (sce_parity &&
 254         ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
 255             return -1;
 256     *current = i;
 257     return num_pos_channels;
 258 }
 259
 260 static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
 261 {
 262     int i, n, total_non_cc_elements;
 263     struct elem_to_channel e2c_vec[4*MAX_ELEM_ID] = {{ 0 }};
 264     int num_front_channels, num_side_channels, num_back_channels;
 265     uint64_t layout;
 266
 267     if (FF_ARRAY_ELEMS(e2c_vec) < tags)
 268         return 0;
 269
 270     i = 0;
 271     num_front_channels =
 272         count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
 273     if (num_front_channels < 0)
 274         return 0;
 275     num_side_channels =
 276         count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
 277     if (num_side_channels < 0)
 278         return 0;
 279     num_back_channels =
 280         count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
 281     if (num_back_channels < 0)
 282         return 0;
 283
 284     i = 0;
 285     if (num_front_channels & 1) {
 286         e2c_vec[i] = (struct elem_to_channel) {
 287             .av_position = AV_CH_FRONT_CENTER, .syn_ele = TYPE_SCE,
 288             .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_FRONT };
 289         i++;
 290         num_front_channels--;
 291     }
 292     if (num_front_channels >= 4) {
 293         i += assign_pair(e2c_vec, layout_map, i,
 294                          AV_CH_FRONT_LEFT_OF_CENTER,
 295                          AV_CH_FRONT_RIGHT_OF_CENTER,
 296                          AAC_CHANNEL_FRONT);
 297         num_front_channels -= 2;
 298     }
 299     if (num_front_channels >= 2) {
 300         i += assign_pair(e2c_vec, layout_map, i,
 301                          AV_CH_FRONT_LEFT,
 302                          AV_CH_FRONT_RIGHT,
 303                          AAC_CHANNEL_FRONT);
 304         num_front_channels -= 2;
 305     }
 306     while (num_front_channels >= 2) {
 307         i += assign_pair(e2c_vec, layout_map, i,
 308                          UINT64_MAX,
 309                          UINT64_MAX,
 310                          AAC_CHANNEL_FRONT);
 311         num_front_channels -= 2;
 312     }
 313
 314     if (num_side_channels >= 2) {
 315         i += assign_pair(e2c_vec, layout_map, i,
 316                          AV_CH_SIDE_LEFT,
 317                          AV_CH_SIDE_RIGHT,
 318                          AAC_CHANNEL_FRONT);
 319         num_side_channels -= 2;
 320     }
 321     while (num_side_channels >= 2) {
 322         i += assign_pair(e2c_vec, layout_map, i,
 323                          UINT64_MAX,
 324                          UINT64_MAX,
 325                          AAC_CHANNEL_SIDE);
 326         num_side_channels -= 2;
 327     }
 328
 329     while (num_back_channels >= 4) {
 330         i += assign_pair(e2c_vec, layout_map, i,
 331                          UINT64_MAX,
 332                          UINT64_MAX,
 333                          AAC_CHANNEL_BACK);
 334         num_back_channels -= 2;
 335     }
 336     if (num_back_channels >= 2) {
 337         i += assign_pair(e2c_vec, layout_map, i,
 338                          AV_CH_BACK_LEFT,
 339                          AV_CH_BACK_RIGHT,
 340                          AAC_CHANNEL_BACK);
 341         num_back_channels -= 2;
 342     }
 343     if (num_back_channels) {
 344         e2c_vec[i] = (struct elem_to_channel) {
 345           .av_position = AV_CH_BACK_CENTER, .syn_ele = TYPE_SCE,
 346           .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_BACK };
 347         i++;
 348         num_back_channels--;
 349     }
 350
 351     if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
 352         e2c_vec[i] = (struct elem_to_channel) {
 353           .av_position = AV_CH_LOW_FREQUENCY, .syn_ele = TYPE_LFE,
 354           .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
 355         i++;
 356     }
 357     while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
 358         e2c_vec[i] = (struct elem_to_channel) {
 359           .av_position = UINT64_MAX, .syn_ele = TYPE_LFE,
 360           .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
 361         i++;
 362     }
 363
 364     // Must choose a stable sort
 365     total_non_cc_elements = n = i;
 366     do {
 367         int next_n = 0;
 368         for (i = 1; i < n; i++) {
 369             if (e2c_vec[i-1].av_position > e2c_vec[i].av_position) {
 370                 FFSWAP(struct elem_to_channel, e2c_vec[i-1], e2c_vec[i]);
 371                 next_n = i;
 372             }
 373         }
 374         n = next_n;
 375     } while (n > 0);
 376
 377     layout = 0;
 378     for (i = 0; i < total_non_cc_elements; i++) {
 379         layout_map[i][0] = e2c_vec[i].syn_ele;
 380         layout_map[i][1] = e2c_vec[i].elem_id;
 381         layout_map[i][2] = e2c_vec[i].aac_position;
 382         if (e2c_vec[i].av_position != UINT64_MAX) {
 383             layout |= e2c_vec[i].av_position;
 384         }
 385     }
 386
 387     return layout;
 388 }
 389
 390 /**
 391  * Save current output configuration if and only if it has been locked.
 392  */
 393 static void push_output_configuration(AACContext *ac) {
 394     if (ac->oc[1].status == OC_LOCKED) {
 395         ac->oc[0] = ac->oc[1];
 396     }
 397     ac->oc[1].status = OC_NONE;
 398 }
 399
 400 /**
 401  * Restore the previous output configuration if and only if the current
 402  * configuration is unlocked.
 403  */
 404 static void pop_output_configuration(AACContext *ac) {
 405     if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
 406         ac->oc[1] = ac->oc[0];
 407         ac->avctx->channels = ac->oc[1].channels;
 408         ac->avctx->channel_layout = ac->oc[1].channel_layout;
 409     }
 410 }
 411
 412 /**
 413  * Configure output channel order based on the current program configuration element.
 414  *
 415  * @return  Returns error status. 0 - OK, !0 - error
 416  */
 417 static int output_configure(AACContext *ac,
 418                             uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
 419                             enum OCStatus oc_type, int get_new_frame)
 420 {
 421     AVCodecContext *avctx = ac->avctx;
 422     int i, channels = 0, ret;
 423     uint64_t layout = 0;
 424
 425     if (ac->oc[1].layout_map != layout_map) {
 426         memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
 427         ac->oc[1].layout_map_tags = tags;
 428     }
 429
 430     // Try to sniff a reasonable channel order, otherwise output the
 431     // channels in the order the PCE declared them.
 432     if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE)
 433         layout = sniff_channel_order(layout_map, tags);
 434     for (i = 0; i < tags; i++) {
 435         int type =     layout_map[i][0];
 436         int id =       layout_map[i][1];
 437         int position = layout_map[i][2];
 438         // Allocate or free elements depending on if they are in the
 439         // current program configuration.
 440         ret = che_configure(ac, position, type, id, &channels);
 441         if (ret < 0)
 442             return ret;
 443     }
 444     if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
 445         if (layout == AV_CH_FRONT_CENTER) {
 446             layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT;
 447         } else {
 448             layout = 0;
 449         }
 450     }
 451
 452     memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 453     if (layout) avctx->channel_layout = layout;
 454     ac->oc[1].channel_layout = layout;
 455     avctx->channels = ac->oc[1].channels = channels;
 456     ac->oc[1].status = oc_type;
 457
 458     if (get_new_frame) {
 459         if ((ret = frame_configure_elements(ac->avctx)) < 0)
 460             return ret;
 461     }
 462
 463     return 0;
 464 }
 465
 466 static void flush(AVCodecContext *avctx)
 467 {
 468     AACContext *ac= avctx->priv_data;
 469     int type, i, j;
 470
 471     for (type = 3; type >= 0; type--) {
 472         for (i = 0; i < MAX_ELEM_ID; i++) {
 473             ChannelElement *che = ac->che[type][i];
 474             if (che) {
 475                 for (j = 0; j <= 1; j++) {
 476                     memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
 477                 }
 478             }
 479         }
 480     }
 481 }
 482
 483 /**
 484  * Set up channel positions based on a default channel configuration
 485  * as specified in table 1.17.
 486  *
 487  * @return  Returns error status. 0 - OK, !0 - error
 488  */
 489 static int set_default_channel_config(AVCodecContext *avctx,
 490                                               uint8_t (*layout_map)[3],
 491                                               int *tags,
 492                                               int channel_config)
 493 {
 494     if (channel_config < 1 || channel_config > 7) {
 495         av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 496                channel_config);
 497         return -1;
 498     }
 499     *tags = tags_per_config[channel_config];
 500     memcpy(layout_map, aac_channel_layout_map[channel_config-1], *tags * sizeof(*layout_map));
 501     return 0;
 502 }
 503
 504 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 505 {
 506     // For PCE based channel configurations map the channels solely based on tags.
 507     if (!ac->oc[1].m4ac.chan_config) {
 508         return ac->tag_che_map[type][elem_id];
 509     }
 510     // Allow single CPE stereo files to be signalled with mono configuration.
 511     if (!ac->tags_mapped && type == TYPE_CPE && ac->oc[1].m4ac.chan_config == 1) {
 512         uint8_t layout_map[MAX_ELEM_ID*4][3];
 513         int layout_map_tags;
 514         push_output_configuration(ac);
 515
 516         av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
 517
 518         if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
 519                                        2) < 0)
 520             return NULL;
 521         if (output_configure(ac, layout_map, layout_map_tags,
 522                              OC_TRIAL_FRAME, 1) < 0)
 523             return NULL;
 524
 525         ac->oc[1].m4ac.chan_config = 2;
 526         ac->oc[1].m4ac.ps = 0;
 527     }
 528     // And vice-versa
 529     if (!ac->tags_mapped && type == TYPE_SCE && ac->oc[1].m4ac.chan_config == 2) {
 530         uint8_t layout_map[MAX_ELEM_ID*4][3];
 531         int layout_map_tags;
 532         push_output_configuration(ac);
 533
 534         av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
 535
 536         if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
 537                                        1) < 0)
 538             return NULL;
 539         if (output_configure(ac, layout_map, layout_map_tags,
 540                              OC_TRIAL_FRAME, 1) < 0)
 541             return NULL;
 542
 543         ac->oc[1].m4ac.chan_config = 1;
 544         if (ac->oc[1].m4ac.sbr)
 545             ac->oc[1].m4ac.ps = -1;
 546     }
 547     // For indexed channel configurations map the channels solely based on position.
 548     switch (ac->oc[1].m4ac.chan_config) {
 549     case 7:
 550         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 551             ac->tags_mapped++;
 552             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 553         }
 554     case 6:
 555         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 556            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 557            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 558         if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 559             ac->tags_mapped++;
 560             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 561         }
 562     case 5:
 563         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 564             ac->tags_mapped++;
 565             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 566         }
 567     case 4:
 568         if (ac->tags_mapped == 2 && ac->oc[1].m4ac.chan_config == 4 && type == TYPE_SCE) {
 569             ac->tags_mapped++;
 570             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 571         }
 572     case 3:
 573     case 2:
 574         if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && type == TYPE_CPE) {
 575             ac->tags_mapped++;
 576             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 577         } else if (ac->oc[1].m4ac.chan_config == 2) {
 578             return NULL;
 579         }
 580     case 1:
 581         if (!ac->tags_mapped && type == TYPE_SCE) {
 582             ac->tags_mapped++;
 583             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 584         }
 585     default:
 586         return NULL;
 587     }
 588 }
 589
 590 /**
 591  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 592  *
 593  * @param type speaker type/position for these channels
 594  */
 595 static void decode_channel_map(uint8_t layout_map[][3],
 596                                enum ChannelPosition type,
 597                                GetBitContext *gb, int n)
 598 {
 599     while (n--) {
 600         enum RawDataBlockType syn_ele;
 601         switch (type) {
 602         case AAC_CHANNEL_FRONT:
 603         case AAC_CHANNEL_BACK:
 604         case AAC_CHANNEL_SIDE:
 605             syn_ele = get_bits1(gb);
 606             break;
 607         case AAC_CHANNEL_CC:
 608             skip_bits1(gb);
 609             syn_ele = TYPE_CCE;
 610             break;
 611         case AAC_CHANNEL_LFE:
 612             syn_ele = TYPE_LFE;
 613             break;
 614         default:
 615             av_assert0(0);
 616         }
 617         layout_map[0][0] = syn_ele;
 618         layout_map[0][1] = get_bits(gb, 4);
 619         layout_map[0][2] = type;
 620         layout_map++;
 621     }
 622 }
 623
 624 /**
 625  * Decode program configuration element; reference: table 4.2.
 626  *
 627  * @return  Returns error status. 0 - OK, !0 - error
 628  */
 629 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
 630                       uint8_t (*layout_map)[3],
 631                       GetBitContext *gb)
 632 {
 633     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 634     int comment_len;
 635     int tags;
 636
 637     skip_bits(gb, 2);  // object_type
 638
 639     sampling_index = get_bits(gb, 4);
 640     if (m4ac->sampling_index != sampling_index)
 641         av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 642
 643     num_front       = get_bits(gb, 4);
 644     num_side        = get_bits(gb, 4);
 645     num_back        = get_bits(gb, 4);
 646     num_lfe         = get_bits(gb, 2);
 647     num_assoc_data  = get_bits(gb, 3);
 648     num_cc          = get_bits(gb, 4);
 649
 650     if (get_bits1(gb))
 651         skip_bits(gb, 4); // mono_mixdown_tag
 652     if (get_bits1(gb))
 653         skip_bits(gb, 4); // stereo_mixdown_tag
 654
 655     if (get_bits1(gb))
 656         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 657
 658     if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
 659         av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
 660         return -1;
 661     }
 662     decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
 663     tags = num_front;
 664     decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
 665     tags += num_side;
 666     decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK,  gb, num_back);
 667     tags += num_back;
 668     decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE,   gb, num_lfe);
 669     tags += num_lfe;
 670
 671     skip_bits_long(gb, 4 * num_assoc_data);
 672
 673     decode_channel_map(layout_map + tags, AAC_CHANNEL_CC,    gb, num_cc);
 674     tags += num_cc;
 675
 676     align_get_bits(gb);
 677
 678     /* comment field, first byte is length */
 679     comment_len = get_bits(gb, 8) * 8;
 680     if (get_bits_left(gb) < comment_len) {
 681         av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
 682         return -1;
 683     }
 684     skip_bits_long(gb, comment_len);
 685     return tags;
 686 }
 687
 688 /**
 689  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 690  *
 691  * @param   ac          pointer to AACContext, may be null
 692  * @param   avctx       pointer to AVCCodecContext, used for logging
 693  *
 694  * @return  Returns error status. 0 - OK, !0 - error
 695  */
 696 static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
 697                                      GetBitContext *gb,
 698                                      MPEG4AudioConfig *m4ac,
 699                                      int channel_config)
 700 {
 701     int extension_flag, ret;
 702     uint8_t layout_map[MAX_ELEM_ID*4][3];
 703     int tags = 0;
 704
 705     if (get_bits1(gb)) { // frameLengthFlag
 706         av_log_missing_feature(avctx, "960/120 MDCT window", 1);
 707         return AVERROR_PATCHWELCOME;
 708     }
 709
 710     if (get_bits1(gb))       // dependsOnCoreCoder
 711         skip_bits(gb, 14);   // coreCoderDelay
 712     extension_flag = get_bits1(gb);
 713
 714     if (m4ac->object_type == AOT_AAC_SCALABLE ||
 715         m4ac->object_type == AOT_ER_AAC_SCALABLE)
 716         skip_bits(gb, 3);     // layerNr
 717
 718     if (channel_config == 0) {
 719         skip_bits(gb, 4);  // element_instance_tag
 720         tags = decode_pce(avctx, m4ac, layout_map, gb);
 721         if (tags < 0)
 722             return tags;
 723     } else {
 724         if ((ret = set_default_channel_config(avctx, layout_map, &tags, channel_config)))
 725             return ret;
 726     }
 727
 728     if (count_channels(layout_map, tags) > 1) {
 729         m4ac->ps = 0;
 730     } else if (m4ac->sbr == 1 && m4ac->ps == -1)
 731         m4ac->ps = 1;
 732
 733     if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
 734         return ret;
 735
 736     if (extension_flag) {
 737         switch (m4ac->object_type) {
 738         case AOT_ER_BSAC:
 739             skip_bits(gb, 5);    // numOfSubFrame
 740             skip_bits(gb, 11);   // layer_length
 741             break;
 742         case AOT_ER_AAC_LC:
 743         case AOT_ER_AAC_LTP:
 744         case AOT_ER_AAC_SCALABLE:
 745         case AOT_ER_AAC_LD:
 746             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 747                                     * aacScalefactorDataResilienceFlag
 748                                     * aacSpectralDataResilienceFlag
 749                                     */
 750             break;
 751         }
 752         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 753     }
 754     return 0;
 755 }
 756
 757 /**
 758  * Decode audio specific configuration; reference: table 1.13.
 759  *
 760  * @param   ac          pointer to AACContext, may be null
 761  * @param   avctx       pointer to AVCCodecContext, used for logging
 762  * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
 763  * @param   data        pointer to buffer holding an audio specific config
 764  * @param   bit_size    size of audio specific config or data in bits
 765  * @param   sync_extension look for an appended sync extension
 766  *
 767  * @return  Returns error status or number of consumed bits. <0 - error
 768  */
 769 static int decode_audio_specific_config(AACContext *ac,
 770                                         AVCodecContext *avctx,
 771                                         MPEG4AudioConfig *m4ac,
 772                                         const uint8_t *data, int bit_size,
 773                                         int sync_extension)
 774 {
 775     GetBitContext gb;
 776     int i;
 777     int ret;
 778
 779     av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
 780     for (i = 0; i < bit_size >> 3; i++)
 781          av_dlog(avctx, "%02x ", data[i]);
 782     av_dlog(avctx, "\n");
 783
 784     if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
 785         return ret;
 786
 787     if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, sync_extension)) < 0)
 788         return -1;
 789     if (m4ac->sampling_index > 12) {
 790         av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
 791         return -1;
 792     }
 793
 794     skip_bits_long(&gb, i);
 795
 796     switch (m4ac->object_type) {
 797     case AOT_AAC_MAIN:
 798     case AOT_AAC_LC:
 799     case AOT_AAC_LTP:
 800         if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
 801             return -1;
 802         break;
 803     default:
 804         av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 805                m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
 806         return -1;
 807     }
 808
 809     av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
 810             m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
 811             m4ac->sample_rate, m4ac->sbr, m4ac->ps);
 812
 813     return get_bits_count(&gb);
 814 }
 815
 816 /**
 817  * linear congruential pseudorandom number generator
 818  *
 819  * @param   previous_val    pointer to the current state of the generator
 820  *
 821  * @return  Returns a 32-bit pseudorandom integer
 822  */
 823 static av_always_inline int lcg_random(unsigned previous_val)
 824 {
 825     union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
 826     return v.s;
 827 }
 828
 829 static av_always_inline void reset_predict_state(PredictorState *ps)
 830 {
 831     ps->r0   = 0.0f;
 832     ps->r1   = 0.0f;
 833     ps->cor0 = 0.0f;
 834     ps->cor1 = 0.0f;
 835     ps->var0 = 1.0f;
 836     ps->var1 = 1.0f;
 837 }
 838
 839 static void reset_all_predictors(PredictorState *ps)
 840 {
 841     int i;
 842     for (i = 0; i < MAX_PREDICTORS; i++)
 843         reset_predict_state(&ps[i]);
 844 }
 845
 846 static int sample_rate_idx (int rate)
 847 {
 848          if (92017 <= rate) return 0;
 849     else if (75132 <= rate) return 1;
 850     else if (55426 <= rate) return 2;
 851     else if (46009 <= rate) return 3;
 852     else if (37566 <= rate) return 4;
 853     else if (27713 <= rate) return 5;
 854     else if (23004 <= rate) return 6;
 855     else if (18783 <= rate) return 7;
 856     else if (13856 <= rate) return 8;
 857     else if (11502 <= rate) return 9;
 858     else if (9391  <= rate) return 10;
 859     else                    return 11;
 860 }
 861
 862 static void reset_predictor_group(PredictorState *ps, int group_num)
 863 {
 864     int i;
 865     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 866         reset_predict_state(&ps[i]);
 867 }
 868
 869 #define AAC_INIT_VLC_STATIC(num, size) \
 870     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 871          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 872         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 873         size);
 874
 875 static av_cold int aac_decode_init(AVCodecContext *avctx)
 876 {
 877     AACContext *ac = avctx->priv_data;
 878
 879     ac->avctx = avctx;
 880     ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
 881
 882     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 883
 884     if (avctx->extradata_size > 0) {
 885         if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
 886                                          avctx->extradata,
 887                                          avctx->extradata_size*8, 1) < 0)
 888             return -1;
 889     } else {
 890         int sr, i;
 891         uint8_t layout_map[MAX_ELEM_ID*4][3];
 892         int layout_map_tags;
 893
 894         sr = sample_rate_idx(avctx->sample_rate);
 895         ac->oc[1].m4ac.sampling_index = sr;
 896         ac->oc[1].m4ac.channels = avctx->channels;
 897         ac->oc[1].m4ac.sbr = -1;
 898         ac->oc[1].m4ac.ps = -1;
 899
 900         for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
 901             if (ff_mpeg4audio_channels[i] == avctx->channels)
 902                 break;
 903         if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
 904             i = 0;
 905         }
 906         ac->oc[1].m4ac.chan_config = i;
 907
 908         if (ac->oc[1].m4ac.chan_config) {
 909             int ret = set_default_channel_config(avctx, layout_map,
 910                 &layout_map_tags, ac->oc[1].m4ac.chan_config);
 911             if (!ret)
 912                 output_configure(ac, layout_map, layout_map_tags,
 913                                  OC_GLOBAL_HDR, 0);
 914             else if (avctx->err_recognition & AV_EF_EXPLODE)
 915                 return AVERROR_INVALIDDATA;
 916         }
 917     }
 918
 919     if (avctx->channels > MAX_CHANNELS) {
 920         av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
 921         return AVERROR_INVALIDDATA;
 922     }
 923
 924     AAC_INIT_VLC_STATIC( 0, 304);
 925     AAC_INIT_VLC_STATIC( 1, 270);
 926     AAC_INIT_VLC_STATIC( 2, 550);
 927     AAC_INIT_VLC_STATIC( 3, 300);
 928     AAC_INIT_VLC_STATIC( 4, 328);
 929     AAC_INIT_VLC_STATIC( 5, 294);
 930     AAC_INIT_VLC_STATIC( 6, 306);
 931     AAC_INIT_VLC_STATIC( 7, 268);
 932     AAC_INIT_VLC_STATIC( 8, 510);
 933     AAC_INIT_VLC_STATIC( 9, 366);
 934     AAC_INIT_VLC_STATIC(10, 462);
 935
 936     ff_aac_sbr_init();
 937
 938     ff_fmt_convert_init(&ac->fmt_conv, avctx);
 939     avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 940
 941     ac->random_state = 0x1f2e3d4c;
 942
 943     ff_aac_tableinit();
 944
 945     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 946                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 947                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 948                     352);
 949
 950     ff_mdct_init(&ac->mdct,       11, 1, 1.0 / (32768.0 * 1024.0));
 951     ff_mdct_init(&ac->mdct_small,  8, 1, 1.0 / (32768.0 * 128.0));
 952     ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0 * 32768.0);
 953     // window initialization
 954     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 955     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 956     ff_init_ff_sine_windows(10);
 957     ff_init_ff_sine_windows( 7);
 958
 959     cbrt_tableinit();
 960
 961     avcodec_get_frame_defaults(&ac->frame);
 962     avctx->coded_frame = &ac->frame;
 963
 964     return 0;
 965 }
 966
 967 /**
 968  * Skip data_stream_element; reference: table 4.10.
 969  */
 970 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 971 {
 972     int byte_align = get_bits1(gb);
 973     int count = get_bits(gb, 8);
 974     if (count == 255)
 975         count += get_bits(gb, 8);
 976     if (byte_align)
 977         align_get_bits(gb);
 978
 979     if (get_bits_left(gb) < 8 * count) {
 980         av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
 981         return -1;
 982     }
 983     skip_bits_long(gb, 8 * count);
 984     return 0;
 985 }
 986
 987 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 988                              GetBitContext *gb)
 989 {
 990     int sfb;
 991     if (get_bits1(gb)) {
 992         ics->predictor_reset_group = get_bits(gb, 5);
 993         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 994             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 995             return -1;
 996         }
 997     }
 998     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
 999         ics->prediction_used[sfb] = get_bits1(gb);
1000     }
1001     return 0;
1002 }
1003
1004 /**
1005  * Decode Long Term Prediction data; reference: table 4.xx.
1006  */
1007 static void decode_ltp(LongTermPrediction *ltp,
1008                        GetBitContext *gb, uint8_t max_sfb)
1009 {
1010     int sfb;
1011
1012     ltp->lag  = get_bits(gb, 11);
1013     ltp->coef = ltp_coef[get_bits(gb, 3)];
1014     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
1015         ltp->used[sfb] = get_bits1(gb);
1016 }
1017
1018 /**
1019  * Decode Individual Channel Stream info; reference: table 4.6.
1020  */
1021 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
1022                            GetBitContext *gb)
1023 {
1024     if (get_bits1(gb)) {
1025         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
1026         return AVERROR_INVALIDDATA;
1027     }
1028     ics->window_sequence[1] = ics->window_sequence[0];
1029     ics->window_sequence[0] = get_bits(gb, 2);
1030     ics->use_kb_window[1]   = ics->use_kb_window[0];
1031     ics->use_kb_window[0]   = get_bits1(gb);
1032     ics->num_window_groups  = 1;
1033     ics->group_len[0]       = 1;
1034     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1035         int i;
1036         ics->max_sfb = get_bits(gb, 4);
1037         for (i = 0; i < 7; i++) {
1038             if (get_bits1(gb)) {
1039                 ics->group_len[ics->num_window_groups - 1]++;
1040             } else {
1041                 ics->num_window_groups++;
1042                 ics->group_len[ics->num_window_groups - 1] = 1;
1043             }
1044         }
1045         ics->num_windows       = 8;
1046         ics->swb_offset        =    ff_swb_offset_128[ac->oc[1].m4ac.sampling_index];
1047         ics->num_swb           =   ff_aac_num_swb_128[ac->oc[1].m4ac.sampling_index];
1048         ics->tns_max_bands     = ff_tns_max_bands_128[ac->oc[1].m4ac.sampling_index];
1049         ics->predictor_present = 0;
1050     } else {
1051         ics->max_sfb               = get_bits(gb, 6);
1052         ics->num_windows           = 1;
1053         ics->swb_offset            =    ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index];
1054         ics->num_swb               =   ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index];
1055         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->oc[1].m4ac.sampling_index];
1056         ics->predictor_present     = get_bits1(gb);
1057         ics->predictor_reset_group = 0;
1058         if (ics->predictor_present) {
1059             if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1060                 if (decode_prediction(ac, ics, gb)) {
1061                     goto fail;
1062                 }
1063             } else if (ac->oc[1].m4ac.object_type == AOT_AAC_LC) {
1064                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
1065                 goto fail;
1066             } else {
1067                 if ((ics->ltp.present = get_bits(gb, 1)))
1068                     decode_ltp(&ics->ltp, gb, ics->max_sfb);
1069             }
1070         }
1071     }
1072
1073     if (ics->max_sfb > ics->num_swb) {
1074         av_log(ac->avctx, AV_LOG_ERROR,
1075                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
1076                ics->max_sfb, ics->num_swb);
1077         goto fail;
1078     }
1079
1080     return 0;
1081 fail:
1082     ics->max_sfb = 0;
1083     return AVERROR_INVALIDDATA;
1084 }
1085
1086 /**
1087  * Decode band types (section_data payload); reference: table 4.46.
1088  *
1089  * @param   band_type           array of the used band type
1090  * @param   band_type_run_end   array of the last scalefactor band of a band type run
1091  *
1092  * @return  Returns error status. 0 - OK, !0 - error
1093  */
1094 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
1095                              int band_type_run_end[120], GetBitContext *gb,
1096                              IndividualChannelStream *ics)
1097 {
1098     int g, idx = 0;
1099     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
1100     for (g = 0; g < ics->num_window_groups; g++) {
1101         int k = 0;
1102         while (k < ics->max_sfb) {
1103             uint8_t sect_end = k;
1104             int sect_len_incr;
1105             int sect_band_type = get_bits(gb, 4);
1106             if (sect_band_type == 12) {
1107                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
1108                 return -1;
1109             }
1110             do {
1111                 sect_len_incr = get_bits(gb, bits);
1112                 sect_end += sect_len_incr;
1113                 if (get_bits_left(gb) < 0) {
1114                     av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
1115                     return -1;
1116                 }
1117                 if (sect_end > ics->max_sfb) {
1118                     av_log(ac->avctx, AV_LOG_ERROR,
1119                            "Number of bands (%d) exceeds limit (%d).\n",
1120                            sect_end, ics->max_sfb);
1121                     return -1;
1122                 }
1123             } while (sect_len_incr == (1 << bits) - 1);
1124             for (; k < sect_end; k++) {
1125                 band_type        [idx]   = sect_band_type;
1126                 band_type_run_end[idx++] = sect_end;
1127             }
1128         }
1129     }
1130     return 0;
1131 }
1132
1133 /**
1134  * Decode scalefactors; reference: table 4.47.
1135  *
1136  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
1137  * @param   band_type           array of the used band type
1138  * @param   band_type_run_end   array of the last scalefactor band of a band type run
1139  * @param   sf                  array of scalefactors or intensity stereo positions
1140  *
1141  * @return  Returns error status. 0 - OK, !0 - error
1142  */
1143 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
1144                                unsigned int global_gain,
1145                                IndividualChannelStream *ics,
1146                                enum BandType band_type[120],
1147                                int band_type_run_end[120])
1148 {
1149     int g, i, idx = 0;
1150     int offset[3] = { global_gain, global_gain - 90, 0 };
1151     int clipped_offset;
1152     int noise_flag = 1;
1153     for (g = 0; g < ics->num_window_groups; g++) {
1154         for (i = 0; i < ics->max_sfb;) {
1155             int run_end = band_type_run_end[idx];
1156             if (band_type[idx] == ZERO_BT) {
1157                 for (; i < run_end; i++, idx++)
1158                     sf[idx] = 0.;
1159             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
1160                 for (; i < run_end; i++, idx++) {
1161                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1162                     clipped_offset = av_clip(offset[2], -155, 100);
1163                     if (offset[2] != clipped_offset) {
1164                         av_log_ask_for_sample(ac->avctx, "Intensity stereo "
1165                                 "position clipped (%d -> %d).\nIf you heard an "
1166                                 "audible artifact, there may be a bug in the "
1167                                 "decoder. ", offset[2], clipped_offset);
1168                     }
1169                     sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1170                 }
1171             } else if (band_type[idx] == NOISE_BT) {
1172                 for (; i < run_end; i++, idx++) {
1173                     if (noise_flag-- > 0)
1174                         offset[1] += get_bits(gb, 9) - 256;
1175                     else
1176                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1177                     clipped_offset = av_clip(offset[1], -100, 155);
1178                     if (offset[1] != clipped_offset) {
1179                         av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
1180                                 "(%d -> %d).\nIf you heard an audible "
1181                                 "artifact, there may be a bug in the decoder. ",
1182                                 offset[1], clipped_offset);
1183                     }
1184                     sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1185                 }
1186             } else {
1187                 for (; i < run_end; i++, idx++) {
1188                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1189                     if (offset[0] > 255U) {
1190                         av_log(ac->avctx, AV_LOG_ERROR,
1191                                "Scalefactor (%d) out of range.\n", offset[0]);
1192                         return -1;
1193                     }
1194                     sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1195                 }
1196             }
1197         }
1198     }
1199     return 0;
1200 }
1201
1202 /**
1203  * Decode pulse data; reference: table 4.7.
1204  */
1205 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1206                          const uint16_t *swb_offset, int num_swb)
1207 {
1208     int i, pulse_swb;
1209     pulse->num_pulse = get_bits(gb, 2) + 1;
1210     pulse_swb        = get_bits(gb, 6);
1211     if (pulse_swb >= num_swb)
1212         return -1;
1213     pulse->pos[0]    = swb_offset[pulse_swb];
1214     pulse->pos[0]   += get_bits(gb, 5);
1215     if (pulse->pos[0] > 1023)
1216         return -1;
1217     pulse->amp[0]    = get_bits(gb, 4);
1218     for (i = 1; i < pulse->num_pulse; i++) {
1219         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1220         if (pulse->pos[i] > 1023)
1221             return -1;
1222         pulse->amp[i] = get_bits(gb, 4);
1223     }
1224     return 0;
1225 }
1226
1227 /**
1228  * Decode Temporal Noise Shaping data; reference: table 4.48.
1229  *
1230  * @return  Returns error status. 0 - OK, !0 - error
1231  */
1232 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
1233                       GetBitContext *gb, const IndividualChannelStream *ics)
1234 {
1235     int w, filt, i, coef_len, coef_res, coef_compress;
1236     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1237     const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1238     for (w = 0; w < ics->num_windows; w++) {
1239         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1240             coef_res = get_bits1(gb);
1241
1242             for (filt = 0; filt < tns->n_filt[w]; filt++) {
1243                 int tmp2_idx;
1244                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1245
1246                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1247                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
1248                            tns->order[w][filt], tns_max_order);
1249                     tns->order[w][filt] = 0;
1250                     return -1;
1251                 }
1252                 if (tns->order[w][filt]) {
1253                     tns->direction[w][filt] = get_bits1(gb);
1254                     coef_compress = get_bits1(gb);
1255                     coef_len = coef_res + 3 - coef_compress;
1256                     tmp2_idx = 2 * coef_compress + coef_res;
1257
1258                     for (i = 0; i < tns->order[w][filt]; i++)
1259                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1260                 }
1261             }
1262         }
1263     }
1264     return 0;
1265 }
1266
1267 /**
1268  * Decode Mid/Side data; reference: table 4.54.
1269  *
1270  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1271  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1272  *                      [3] reserved for scalable AAC
1273  */
1274 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
1275                                    int ms_present)
1276 {
1277     int idx;
1278     if (ms_present == 1) {
1279         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
1280             cpe->ms_mask[idx] = get_bits1(gb);
1281     } else if (ms_present == 2) {
1282         memset(cpe->ms_mask, 1,  sizeof(cpe->ms_mask[0]) * cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb);
1283     }
1284 }
1285
1286 #ifndef VMUL2
1287 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
1288                            const float *scale)
1289 {
1290     float s = *scale;
1291     *dst++ = v[idx    & 15] * s;
1292     *dst++ = v[idx>>4 & 15] * s;
1293     return dst;
1294 }
1295 #endif
1296
1297 #ifndef VMUL4
1298 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
1299                            const float *scale)
1300 {
1301     float s = *scale;
1302     *dst++ = v[idx    & 3] * s;
1303     *dst++ = v[idx>>2 & 3] * s;
1304     *dst++ = v[idx>>4 & 3] * s;
1305     *dst++ = v[idx>>6 & 3] * s;
1306     return dst;
1307 }
1308 #endif
1309
1310 #ifndef VMUL2S
1311 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1312                             unsigned sign, const float *scale)
1313 {
1314     union av_intfloat32 s0, s1;
1315
1316     s0.f = s1.f = *scale;
1317     s0.i ^= sign >> 1 << 31;
1318     s1.i ^= sign      << 31;
1319
1320     *dst++ = v[idx    & 15] * s0.f;
1321     *dst++ = v[idx>>4 & 15] * s1.f;
1322
1323     return dst;
1324 }
1325 #endif
1326
1327 #ifndef VMUL4S
1328 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1329                             unsigned sign, const float *scale)
1330 {
1331     unsigned nz = idx >> 12;
1332     union av_intfloat32 s = { .f = *scale };
1333     union av_intfloat32 t;
1334
1335     t.i = s.i ^ (sign & 1U<<31);
1336     *dst++ = v[idx    & 3] * t.f;
1337
1338     sign <<= nz & 1; nz >>= 1;
1339     t.i = s.i ^ (sign & 1U<<31);
1340     *dst++ = v[idx>>2 & 3] * t.f;
1341
1342     sign <<= nz & 1; nz >>= 1;
1343     t.i = s.i ^ (sign & 1U<<31);
1344     *dst++ = v[idx>>4 & 3] * t.f;
1345
1346     sign <<= nz & 1;
1347     t.i = s.i ^ (sign & 1U<<31);
1348     *dst++ = v[idx>>6 & 3] * t.f;
1349
1350     return dst;
1351 }
1352 #endif
1353
1354 /**
1355  * Decode spectral data; reference: table 4.50.
1356  * Dequantize and scale spectral data; reference: 4.6.3.3.
1357  *
1358  * @param   coef            array of dequantized, scaled spectral data
1359  * @param   sf              array of scalefactors or intensity stereo positions
1360  * @param   pulse_present   set if pulses are present
1361  * @param   pulse           pointer to pulse data struct
1362  * @param   band_type       array of the used band type
1363  *
1364  * @return  Returns error status. 0 - OK, !0 - error
1365  */
1366 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1367                                        GetBitContext *gb, const float sf[120],
1368                                        int pulse_present, const Pulse *pulse,
1369                                        const IndividualChannelStream *ics,
1370                                        enum BandType band_type[120])
1371 {
1372     int i, k, g, idx = 0;
1373     const int c = 1024 / ics->num_windows;
1374     const uint16_t *offsets = ics->swb_offset;
1375     float *coef_base = coef;
1376
1377     for (g = 0; g < ics->num_windows; g++)
1378         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1379
1380     for (g = 0; g < ics->num_window_groups; g++) {
1381         unsigned g_len = ics->group_len[g];
1382
1383         for (i = 0; i < ics->max_sfb; i++, idx++) {
1384             const unsigned cbt_m1 = band_type[idx] - 1;
1385             float *cfo = coef + offsets[i];
1386             int off_len = offsets[i + 1] - offsets[i];
1387             int group;
1388
1389             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1390                 for (group = 0; group < g_len; group++, cfo+=128) {
1391                     memset(cfo, 0, off_len * sizeof(float));
1392                 }
1393             } else if (cbt_m1 == NOISE_BT - 1) {
1394                 for (group = 0; group < g_len; group++, cfo+=128) {
1395                     float scale;
1396                     float band_energy;
1397
1398                     for (k = 0; k < off_len; k++) {
1399                         ac->random_state  = lcg_random(ac->random_state);
1400                         cfo[k] = ac->random_state;
1401                     }
1402
1403                     band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len);
1404                     scale = sf[idx] / sqrtf(band_energy);
1405                     ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1406                 }
1407             } else {
1408                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1409                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1410                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1411                 OPEN_READER(re, gb);
1412
1413                 switch (cbt_m1 >> 1) {
1414                 case 0:
1415                     for (group = 0; group < g_len; group++, cfo+=128) {
1416                         float *cf = cfo;
1417                         int len = off_len;
1418
1419                         do {
1420                             int code;
1421                             unsigned cb_idx;
1422
1423                             UPDATE_CACHE(re, gb);
1424                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1425                             cb_idx = cb_vector_idx[code];
1426                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1427                         } while (len -= 4);
1428                     }
1429                     break;
1430
1431                 case 1:
1432                     for (group = 0; group < g_len; group++, cfo+=128) {
1433                         float *cf = cfo;
1434                         int len = off_len;
1435
1436                         do {
1437                             int code;
1438                             unsigned nnz;
1439                             unsigned cb_idx;
1440                             uint32_t bits;
1441
1442                             UPDATE_CACHE(re, gb);
1443                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1444                             cb_idx = cb_vector_idx[code];
1445                             nnz = cb_idx >> 8 & 15;
1446                             bits = nnz ? GET_CACHE(re, gb) : 0;
1447                             LAST_SKIP_BITS(re, gb, nnz);
1448                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1449                         } while (len -= 4);
1450                     }
1451                     break;
1452
1453                 case 2:
1454                     for (group = 0; group < g_len; group++, cfo+=128) {
1455                         float *cf = cfo;
1456                         int len = off_len;
1457
1458                         do {
1459                             int code;
1460                             unsigned cb_idx;
1461
1462                             UPDATE_CACHE(re, gb);
1463                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1464                             cb_idx = cb_vector_idx[code];
1465                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1466                         } while (len -= 2);
1467                     }
1468                     break;
1469
1470                 case 3:
1471                 case 4:
1472                     for (group = 0; group < g_len; group++, cfo+=128) {
1473                         float *cf = cfo;
1474                         int len = off_len;
1475
1476                         do {
1477                             int code;
1478                             unsigned nnz;
1479                             unsigned cb_idx;
1480                             unsigned sign;
1481
1482                             UPDATE_CACHE(re, gb);
1483                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1484                             cb_idx = cb_vector_idx[code];
1485                             nnz = cb_idx >> 8 & 15;
1486                             sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1487                             LAST_SKIP_BITS(re, gb, nnz);
1488                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1489                         } while (len -= 2);
1490                     }
1491                     break;
1492
1493                 default:
1494                     for (group = 0; group < g_len; group++, cfo+=128) {
1495                         float *cf = cfo;
1496                         uint32_t *icf = (uint32_t *) cf;
1497                         int len = off_len;
1498
1499                         do {
1500                             int code;
1501                             unsigned nzt, nnz;
1502                             unsigned cb_idx;
1503                             uint32_t bits;
1504                             int j;
1505
1506                             UPDATE_CACHE(re, gb);
1507                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1508
1509                             if (!code) {
1510                                 *icf++ = 0;
1511                                 *icf++ = 0;
1512                                 continue;
1513                             }
1514
1515                             cb_idx = cb_vector_idx[code];
1516                             nnz = cb_idx >> 12;
1517                             nzt = cb_idx >> 8;
1518                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1519                             LAST_SKIP_BITS(re, gb, nnz);
1520
1521                             for (j = 0; j < 2; j++) {
1522                                 if (nzt & 1<<j) {
1523                                     uint32_t b;
1524                                     int n;
1525                                     /* The total length of escape_sequence must be < 22 bits according
1526                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1527                                     UPDATE_CACHE(re, gb);
1528                                     b = GET_CACHE(re, gb);
1529                                     b = 31 - av_log2(~b);
1530
1531                                     if (b > 8) {
1532                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1533                                         return -1;
1534                                     }
1535
1536                                     SKIP_BITS(re, gb, b + 1);
1537                                     b += 4;
1538                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1539                                     LAST_SKIP_BITS(re, gb, b);
1540                                     *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1541                                     bits <<= 1;
1542                                 } else {
1543                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1544                                     *icf++ = (bits & 1U<<31) | v;
1545                                     bits <<= !!v;
1546                                 }
1547                                 cb_idx >>= 4;
1548                             }
1549                         } while (len -= 2);
1550
1551                         ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1552                     }
1553                 }
1554
1555                 CLOSE_READER(re, gb);
1556             }
1557         }
1558         coef += g_len << 7;
1559     }
1560
1561     if (pulse_present) {
1562         idx = 0;
1563         for (i = 0; i < pulse->num_pulse; i++) {
1564             float co = coef_base[ pulse->pos[i] ];
1565             while (offsets[idx + 1] <= pulse->pos[i])
1566                 idx++;
1567             if (band_type[idx] != NOISE_BT && sf[idx]) {
1568                 float ico = -pulse->amp[i];
1569                 if (co) {
1570                     co /= sf[idx];
1571                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1572                 }
1573                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1574             }
1575         }
1576     }
1577     return 0;
1578 }
1579
1580 static av_always_inline float flt16_round(float pf)
1581 {
1582     union av_intfloat32 tmp;
1583     tmp.f = pf;
1584     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1585     return tmp.f;
1586 }
1587
1588 static av_always_inline float flt16_even(float pf)
1589 {
1590     union av_intfloat32 tmp;
1591     tmp.f = pf;
1592     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1593     return tmp.f;
1594 }
1595
1596 static av_always_inline float flt16_trunc(float pf)
1597 {
1598     union av_intfloat32 pun;
1599     pun.f = pf;
1600     pun.i &= 0xFFFF0000U;
1601     return pun.f;
1602 }
1603
1604 static av_always_inline void predict(PredictorState *ps, float *coef,
1605                                      int output_enable)
1606 {
1607     const float a     = 0.953125; // 61.0 / 64
1608     const float alpha = 0.90625;  // 29.0 / 32
1609     float e0, e1;
1610     float pv;
1611     float k1, k2;
1612     float   r0 = ps->r0,     r1 = ps->r1;
1613     float cor0 = ps->cor0, cor1 = ps->cor1;
1614     float var0 = ps->var0, var1 = ps->var1;
1615
1616     k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1617     k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1618
1619     pv = flt16_round(k1 * r0 + k2 * r1);
1620     if (output_enable)
1621         *coef += pv;
1622
1623     e0 = *coef;
1624     e1 = e0 - k1 * r0;
1625
1626     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1627     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1628     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1629     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1630
1631     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1632     ps->r0 = flt16_trunc(a * e0);
1633 }
1634
1635 /**
1636  * Apply AAC-Main style frequency domain prediction.
1637  */
1638 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1639 {
1640     int sfb, k;
1641
1642     if (!sce->ics.predictor_initialized) {
1643         reset_all_predictors(sce->predictor_state);
1644         sce->ics.predictor_initialized = 1;
1645     }
1646
1647     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1648         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]; sfb++) {
1649             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1650                 predict(&sce->predictor_state[k], &sce->coeffs[k],
1651                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1652             }
1653         }
1654         if (sce->ics.predictor_reset_group)
1655             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1656     } else
1657         reset_all_predictors(sce->predictor_state);
1658 }
1659
1660 /**
1661  * Decode an individual_channel_stream payload; reference: table 4.44.
1662  *
1663  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1664  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1665  *
1666  * @return  Returns error status. 0 - OK, !0 - error
1667  */
1668 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1669                       GetBitContext *gb, int common_window, int scale_flag)
1670 {
1671     Pulse pulse;
1672     TemporalNoiseShaping    *tns = &sce->tns;
1673     IndividualChannelStream *ics = &sce->ics;
1674     float *out = sce->coeffs;
1675     int global_gain, pulse_present = 0;
1676
1677     /* This assignment is to silence a GCC warning about the variable being used
1678      * uninitialized when in fact it always is.
1679      */
1680     pulse.num_pulse = 0;
1681
1682     global_gain = get_bits(gb, 8);
1683
1684     if (!common_window && !scale_flag) {
1685         if (decode_ics_info(ac, ics, gb) < 0)
1686             return AVERROR_INVALIDDATA;
1687     }
1688
1689     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1690         return -1;
1691     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1692         return -1;
1693
1694     pulse_present = 0;
1695     if (!scale_flag) {
1696         if ((pulse_present = get_bits1(gb))) {
1697             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1698                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1699                 return -1;
1700             }
1701             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1702                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1703                 return -1;
1704             }
1705         }
1706         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1707             return -1;
1708         if (get_bits1(gb)) {
1709             av_log_missing_feature(ac->avctx, "SSR", 1);
1710             return AVERROR_PATCHWELCOME;
1711         }
1712     }
1713
1714     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1715         return -1;
1716
1717     if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
1718         apply_prediction(ac, sce);
1719
1720     return 0;
1721 }
1722
1723 /**
1724  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1725  */
1726 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1727 {
1728     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1729     float *ch0 = cpe->ch[0].coeffs;
1730     float *ch1 = cpe->ch[1].coeffs;
1731     int g, i, group, idx = 0;
1732     const uint16_t *offsets = ics->swb_offset;
1733     for (g = 0; g < ics->num_window_groups; g++) {
1734         for (i = 0; i < ics->max_sfb; i++, idx++) {
1735             if (cpe->ms_mask[idx] &&
1736                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1737                 for (group = 0; group < ics->group_len[g]; group++) {
1738                     ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i],
1739                                                ch1 + group * 128 + offsets[i],
1740                                                offsets[i+1] - offsets[i]);
1741                 }
1742             }
1743         }
1744         ch0 += ics->group_len[g] * 128;
1745         ch1 += ics->group_len[g] * 128;
1746     }
1747 }
1748
1749 /**
1750  * intensity stereo decoding; reference: 4.6.8.2.3
1751  *
1752  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1753  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1754  *                      [3] reserved for scalable AAC
1755  */
1756 static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
1757 {
1758     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1759     SingleChannelElement         *sce1 = &cpe->ch[1];
1760     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1761     const uint16_t *offsets = ics->swb_offset;
1762     int g, group, i, idx = 0;
1763     int c;
1764     float scale;
1765     for (g = 0; g < ics->num_window_groups; g++) {
1766         for (i = 0; i < ics->max_sfb;) {
1767             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1768                 const int bt_run_end = sce1->band_type_run_end[idx];
1769                 for (; i < bt_run_end; i++, idx++) {
1770                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1771                     if (ms_present)
1772                         c *= 1 - 2 * cpe->ms_mask[idx];
1773                     scale = c * sce1->sf[idx];
1774                     for (group = 0; group < ics->group_len[g]; group++)
1775                         ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1776                                                     coef0 + group * 128 + offsets[i],
1777                                                     scale,
1778                                                     offsets[i + 1] - offsets[i]);
1779                 }
1780             } else {
1781                 int bt_run_end = sce1->band_type_run_end[idx];
1782                 idx += bt_run_end - i;
1783                 i    = bt_run_end;
1784             }
1785         }
1786         coef0 += ics->group_len[g] * 128;
1787         coef1 += ics->group_len[g] * 128;
1788     }
1789 }
1790
1791 /**
1792  * Decode a channel_pair_element; reference: table 4.4.
1793  *
1794  * @return  Returns error status. 0 - OK, !0 - error
1795  */
1796 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1797 {
1798     int i, ret, common_window, ms_present = 0;
1799
1800     common_window = get_bits1(gb);
1801     if (common_window) {
1802         if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
1803             return AVERROR_INVALIDDATA;
1804         i = cpe->ch[1].ics.use_kb_window[0];
1805         cpe->ch[1].ics = cpe->ch[0].ics;
1806         cpe->ch[1].ics.use_kb_window[1] = i;
1807         if (cpe->ch[1].ics.predictor_present && (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
1808             if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1809                 decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1810         ms_present = get_bits(gb, 2);
1811         if (ms_present == 3) {
1812             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1813             return -1;
1814         } else if (ms_present)
1815             decode_mid_side_stereo(cpe, gb, ms_present);
1816     }
1817     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1818         return ret;
1819     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1820         return ret;
1821
1822     if (common_window) {
1823         if (ms_present)
1824             apply_mid_side_stereo(ac, cpe);
1825         if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1826             apply_prediction(ac, &cpe->ch[0]);
1827             apply_prediction(ac, &cpe->ch[1]);
1828         }
1829     }
1830
1831     apply_intensity_stereo(ac, cpe, ms_present);
1832     return 0;
1833 }
1834
1835 static const float cce_scale[] = {
1836     1.09050773266525765921, //2^(1/8)
1837     1.18920711500272106672, //2^(1/4)
1838     M_SQRT2,
1839     2,
1840 };
1841
1842 /**
1843  * Decode coupling_channel_element; reference: table 4.8.
1844  *
1845  * @return  Returns error status. 0 - OK, !0 - error
1846  */
1847 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1848 {
1849     int num_gain = 0;
1850     int c, g, sfb, ret;
1851     int sign;
1852     float scale;
1853     SingleChannelElement *sce = &che->ch[0];
1854     ChannelCoupling     *coup = &che->coup;
1855
1856     coup->coupling_point = 2 * get_bits1(gb);
1857     coup->num_coupled = get_bits(gb, 3);
1858     for (c = 0; c <= coup->num_coupled; c++) {
1859         num_gain++;
1860         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1861         coup->id_select[c] = get_bits(gb, 4);
1862         if (coup->type[c] == TYPE_CPE) {
1863             coup->ch_select[c] = get_bits(gb, 2);
1864             if (coup->ch_select[c] == 3)
1865                 num_gain++;
1866         } else
1867             coup->ch_select[c] = 2;
1868     }
1869     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1870
1871     sign  = get_bits(gb, 1);
1872     scale = cce_scale[get_bits(gb, 2)];
1873
1874     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1875         return ret;
1876
1877     for (c = 0; c < num_gain; c++) {
1878         int idx  = 0;
1879         int cge  = 1;
1880         int gain = 0;
1881         float gain_cache = 1.;
1882         if (c) {
1883             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1884             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1885             gain_cache = powf(scale, -gain);
1886         }
1887         if (coup->coupling_point == AFTER_IMDCT) {
1888             coup->gain[c][0] = gain_cache;
1889         } else {
1890             for (g = 0; g < sce->ics.num_window_groups; g++) {
1891                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1892                     if (sce->band_type[idx] != ZERO_BT) {
1893                         if (!cge) {
1894                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1895                             if (t) {
1896                                 int s = 1;
1897                                 t = gain += t;
1898                                 if (sign) {
1899                                     s  -= 2 * (t & 0x1);
1900                                     t >>= 1;
1901                                 }
1902                                 gain_cache = powf(scale, -t) * s;
1903                             }
1904                         }
1905                         coup->gain[c][idx] = gain_cache;
1906                     }
1907                 }
1908             }
1909         }
1910     }
1911     return 0;
1912 }
1913
1914 /**
1915  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1916  *
1917  * @return  Returns number of bytes consumed.
1918  */
1919 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1920                                          GetBitContext *gb)
1921 {
1922     int i;
1923     int num_excl_chan = 0;
1924
1925     do {
1926         for (i = 0; i < 7; i++)
1927             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1928     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1929
1930     return num_excl_chan / 7;
1931 }
1932
1933 /**
1934  * Decode dynamic range information; reference: table 4.52.
1935  *
1936  * @return  Returns number of bytes consumed.
1937  */
1938 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1939                                 GetBitContext *gb)
1940 {
1941     int n             = 1;
1942     int drc_num_bands = 1;
1943     int i;
1944
1945     /* pce_tag_present? */
1946     if (get_bits1(gb)) {
1947         che_drc->pce_instance_tag  = get_bits(gb, 4);
1948         skip_bits(gb, 4); // tag_reserved_bits
1949         n++;
1950     }
1951
1952     /* excluded_chns_present? */
1953     if (get_bits1(gb)) {
1954         n += decode_drc_channel_exclusions(che_drc, gb);
1955     }
1956
1957     /* drc_bands_present? */
1958     if (get_bits1(gb)) {
1959         che_drc->band_incr            = get_bits(gb, 4);
1960         che_drc->interpolation_scheme = get_bits(gb, 4);
1961         n++;
1962         drc_num_bands += che_drc->band_incr;
1963         for (i = 0; i < drc_num_bands; i++) {
1964             che_drc->band_top[i] = get_bits(gb, 8);
1965             n++;
1966         }
1967     }
1968
1969     /* prog_ref_level_present? */
1970     if (get_bits1(gb)) {
1971         che_drc->prog_ref_level = get_bits(gb, 7);
1972         skip_bits1(gb); // prog_ref_level_reserved_bits
1973         n++;
1974     }
1975
1976     for (i = 0; i < drc_num_bands; i++) {
1977         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1978         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1979         n++;
1980     }
1981
1982     return n;
1983 }
1984
1985 static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
1986     uint8_t buf[256];
1987     int i, major, minor;
1988
1989     if (len < 13+7*8)
1990         goto unknown;
1991
1992     get_bits(gb, 13); len -= 13;
1993
1994     for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
1995         buf[i] = get_bits(gb, 8);
1996
1997     buf[i] = 0;
1998     if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
1999         av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
2000
2001     if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
2002         ac->avctx->internal->skip_samples = 1024;
2003     }
2004
2005 unknown:
2006     skip_bits_long(gb, len);
2007
2008     return 0;
2009 }
2010
2011 /**
2012  * Decode extension data (incomplete); reference: table 4.51.
2013  *
2014  * @param   cnt length of TYPE_FIL syntactic element in bytes
2015  *
2016  * @return Returns number of bytes consumed
2017  */
2018 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
2019                                     ChannelElement *che, enum RawDataBlockType elem_type)
2020 {
2021     int crc_flag = 0;
2022     int res = cnt;
2023     switch (get_bits(gb, 4)) { // extension type
2024     case EXT_SBR_DATA_CRC:
2025         crc_flag++;
2026     case EXT_SBR_DATA:
2027         if (!che) {
2028             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
2029             return res;
2030         } else if (!ac->oc[1].m4ac.sbr) {
2031             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
2032             skip_bits_long(gb, 8 * cnt - 4);
2033             return res;
2034         } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
2035             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
2036             skip_bits_long(gb, 8 * cnt - 4);
2037             return res;
2038         } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
2039             ac->oc[1].m4ac.sbr = 1;
2040             ac->oc[1].m4ac.ps = 1;
2041             output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
2042                              ac->oc[1].status, 1);
2043         } else {
2044             ac->oc[1].m4ac.sbr = 1;
2045         }
2046         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
2047         break;
2048     case EXT_DYNAMIC_RANGE:
2049         res = decode_dynamic_range(&ac->che_drc, gb);
2050         break;
2051     case EXT_FILL:
2052         decode_fill(ac, gb, 8 * cnt - 4);
2053         break;
2054     case EXT_FILL_DATA:
2055     case EXT_DATA_ELEMENT:
2056     default:
2057         skip_bits_long(gb, 8 * cnt - 4);
2058         break;
2059     };
2060     return res;
2061 }
2062
2063 /**
2064  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
2065  *
2066  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
2067  * @param   coef    spectral coefficients
2068  */
2069 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
2070                       IndividualChannelStream *ics, int decode)
2071 {
2072     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
2073     int w, filt, m, i;
2074     int bottom, top, order, start, end, size, inc;
2075     float lpc[TNS_MAX_ORDER];
2076     float tmp[TNS_MAX_ORDER+1];
2077
2078     for (w = 0; w < ics->num_windows; w++) {
2079         bottom = ics->num_swb;
2080         for (filt = 0; filt < tns->n_filt[w]; filt++) {
2081             top    = bottom;
2082             bottom = FFMAX(0, top - tns->length[w][filt]);
2083             order  = tns->order[w][filt];
2084             if (order == 0)
2085                 continue;
2086
2087             // tns_decode_coef
2088             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
2089
2090             start = ics->swb_offset[FFMIN(bottom, mmm)];
2091             end   = ics->swb_offset[FFMIN(   top, mmm)];
2092             if ((size = end - start) <= 0)
2093                 continue;
2094             if (tns->direction[w][filt]) {
2095                 inc = -1;
2096                 start = end - 1;
2097             } else {
2098                 inc = 1;
2099             }
2100             start += w * 128;
2101
2102             if (decode) {
2103                 // ar filter
2104                 for (m = 0; m < size; m++, start += inc)
2105                     for (i = 1; i <= FFMIN(m, order); i++)
2106                         coef[start] -= coef[start - i * inc] * lpc[i - 1];
2107             } else {
2108                 // ma filter
2109                 for (m = 0; m < size; m++, start += inc) {
2110                     tmp[0] = coef[start];
2111                     for (i = 1; i <= FFMIN(m, order); i++)
2112                         coef[start] += tmp[i] * lpc[i - 1];
2113                     for (i = order; i > 0; i--)
2114                         tmp[i] = tmp[i - 1];
2115                 }
2116             }
2117         }
2118     }
2119 }
2120
2121 /**
2122  *  Apply windowing and MDCT to obtain the spectral
2123  *  coefficient from the predicted sample by LTP.
2124  */
2125 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
2126                                    float *in, IndividualChannelStream *ics)
2127 {
2128     const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2129     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2130     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2131     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2132
2133     if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2134         ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024);
2135     } else {
2136         memset(in, 0, 448 * sizeof(float));
2137         ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2138     }
2139     if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2140         ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
2141     } else {
2142         ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
2143         memset(in + 1024 + 576, 0, 448 * sizeof(float));
2144     }
2145     ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
2146 }
2147
2148 /**
2149  * Apply the long term prediction
2150  */
2151 static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
2152 {
2153     const LongTermPrediction *ltp = &sce->ics.ltp;
2154     const uint16_t *offsets = sce->ics.swb_offset;
2155     int i, sfb;
2156
2157     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2158         float *predTime = sce->ret;
2159         float *predFreq = ac->buf_mdct;
2160         int16_t num_samples = 2048;
2161
2162         if (ltp->lag < 1024)
2163             num_samples = ltp->lag + 1024;
2164         for (i = 0; i < num_samples; i++)
2165             predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
2166         memset(&predTime[i], 0, (2048 - i) * sizeof(float));
2167
2168         windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2169
2170         if (sce->tns.present)
2171             apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2172
2173         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2174             if (ltp->used[sfb])
2175                 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2176                     sce->coeffs[i] += predFreq[i];
2177     }
2178 }
2179
2180 /**
2181  * Update the LTP buffer for next frame
2182  */
2183 static void update_ltp(AACContext *ac, SingleChannelElement *sce)
2184 {
2185     IndividualChannelStream *ics = &sce->ics;
2186     float *saved     = sce->saved;
2187     float *saved_ltp = sce->coeffs;
2188     const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2189     const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2190     int i;
2191
2192     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2193         memcpy(saved_ltp,       saved, 512 * sizeof(float));
2194         memset(saved_ltp + 576, 0,     448 * sizeof(float));
2195         ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2196         for (i = 0; i < 64; i++)
2197             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2198     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2199         memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float));
2200         memset(saved_ltp + 576, 0,                  448 * sizeof(float));
2201         ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2202         for (i = 0; i < 64; i++)
2203             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2204     } else { // LONG_STOP or ONLY_LONG
2205         ac->fdsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
2206         for (i = 0; i < 512; i++)
2207             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
2208     }
2209
2210     memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2211     memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
2212     memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
2213 }
2214
2215 /**
2216  * Conduct IMDCT and windowing.
2217  */
2218 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
2219 {
2220     IndividualChannelStream *ics = &sce->ics;
2221     float *in    = sce->coeffs;
2222     float *out   = sce->ret;
2223     float *saved = sce->saved;
2224     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2225     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2226     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2227     float *buf  = ac->buf_mdct;
2228     float *temp = ac->temp;
2229     int i;
2230
2231     // imdct
2232     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2233         for (i = 0; i < 1024; i += 128)
2234             ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2235     } else
2236         ac->mdct.imdct_half(&ac->mdct, buf, in);
2237
2238     /* window overlapping
2239      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2240      * and long to short transitions are considered to be short to short
2241      * transitions. This leaves just two cases (long to long and short to short)
2242      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2243      */
2244     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2245             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
2246         ac->fdsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
2247     } else {
2248         memcpy(                         out,               saved,            448 * sizeof(float));
2249
2250         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2251             ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
2252             ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
2253             ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
2254             ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
2255             ac->fdsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
2256             memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(float));
2257         } else {
2258             ac->fdsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
2259             memcpy(                     out + 576,         buf + 64,         448 * sizeof(float));
2260         }
2261     }
2262
2263     // buffer update
2264     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2265         memcpy(                     saved,       temp + 64,         64 * sizeof(float));
2266         ac->fdsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
2267         ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2268         ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2269         memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
2270     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2271         memcpy(                     saved,       buf + 512,        448 * sizeof(float));
2272         memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
2273     } else { // LONG_STOP or ONLY_LONG
2274         memcpy(                     saved,       buf + 512,        512 * sizeof(float));
2275     }
2276 }
2277
2278 /**
2279  * Apply dependent channel coupling (applied before IMDCT).
2280  *
2281  * @param   index   index into coupling gain array
2282  */
2283 static void apply_dependent_coupling(AACContext *ac,
2284                                      SingleChannelElement *target,
2285                                      ChannelElement *cce, int index)
2286 {
2287     IndividualChannelStream *ics = &cce->ch[0].ics;
2288     const uint16_t *offsets = ics->swb_offset;
2289     float *dest = target->coeffs;
2290     const float *src = cce->ch[0].coeffs;
2291     int g, i, group, k, idx = 0;
2292     if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2293         av_log(ac->avctx, AV_LOG_ERROR,
2294                "Dependent coupling is not supported together with LTP\n");
2295         return;
2296     }
2297     for (g = 0; g < ics->num_window_groups; g++) {
2298         for (i = 0; i < ics->max_sfb; i++, idx++) {
2299             if (cce->ch[0].band_type[idx] != ZERO_BT) {
2300                 const float gain = cce->coup.gain[index][idx];
2301                 for (group = 0; group < ics->group_len[g]; group++) {
2302                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
2303                         // XXX dsputil-ize
2304                         dest[group * 128 + k] += gain * src[group * 128 + k];
2305                     }
2306                 }
2307             }
2308         }
2309         dest += ics->group_len[g] * 128;
2310         src  += ics->group_len[g] * 128;
2311     }
2312 }
2313
2314 /**
2315  * Apply independent channel coupling (applied after IMDCT).
2316  *
2317  * @param   index   index into coupling gain array
2318  */
2319 static void apply_independent_coupling(AACContext *ac,
2320                                        SingleChannelElement *target,
2321                                        ChannelElement *cce, int index)
2322 {
2323     int i;
2324     const float gain = cce->coup.gain[index][0];
2325     const float *src = cce->ch[0].ret;
2326     float *dest = target->ret;
2327     const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
2328
2329     for (i = 0; i < len; i++)
2330         dest[i] += gain * src[i];
2331 }
2332
2333 /**
2334  * channel coupling transformation interface
2335  *
2336  * @param   apply_coupling_method   pointer to (in)dependent coupling function
2337  */
2338 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
2339                                    enum RawDataBlockType type, int elem_id,
2340                                    enum CouplingPoint coupling_point,
2341                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2342 {
2343     int i, c;
2344
2345     for (i = 0; i < MAX_ELEM_ID; i++) {
2346         ChannelElement *cce = ac->che[TYPE_CCE][i];
2347         int index = 0;
2348
2349         if (cce && cce->coup.coupling_point == coupling_point) {
2350             ChannelCoupling *coup = &cce->coup;
2351
2352             for (c = 0; c <= coup->num_coupled; c++) {
2353                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2354                     if (coup->ch_select[c] != 1) {
2355                         apply_coupling_method(ac, &cc->ch[0], cce, index);
2356                         if (coup->ch_select[c] != 0)
2357                             index++;
2358                     }
2359                     if (coup->ch_select[c] != 2)
2360                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
2361                 } else
2362                     index += 1 + (coup->ch_select[c] == 3);
2363             }
2364         }
2365     }
2366 }
2367
2368 /**
2369  * Convert spectral data to float samples, applying all supported tools as appropriate.
2370  */
2371 static void spectral_to_sample(AACContext *ac)
2372 {
2373     int i, type;
2374     for (type = 3; type >= 0; type--) {
2375         for (i = 0; i < MAX_ELEM_ID; i++) {
2376             ChannelElement *che = ac->che[type][i];
2377             if (che) {
2378                 if (type <= TYPE_CPE)
2379                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
2380                 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2381                     if (che->ch[0].ics.predictor_present) {
2382                         if (che->ch[0].ics.ltp.present)
2383                             apply_ltp(ac, &che->ch[0]);
2384                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2385                             apply_ltp(ac, &che->ch[1]);
2386                     }
2387                 }
2388                 if (che->ch[0].tns.present)
2389                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2390                 if (che->ch[1].tns.present)
2391                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2392                 if (type <= TYPE_CPE)
2393                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
2394                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2395                     imdct_and_windowing(ac, &che->ch[0]);
2396                     if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2397                         update_ltp(ac, &che->ch[0]);
2398                     if (type == TYPE_CPE) {
2399                         imdct_and_windowing(ac, &che->ch[1]);
2400                         if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2401                             update_ltp(ac, &che->ch[1]);
2402                     }
2403                     if (ac->oc[1].m4ac.sbr > 0) {
2404                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2405                     }
2406                 }
2407                 if (type <= TYPE_CCE)
2408                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
2409             }
2410         }
2411     }
2412 }
2413
2414 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
2415 {
2416     int size;
2417     AACADTSHeaderInfo hdr_info;
2418     uint8_t layout_map[MAX_ELEM_ID*4][3];
2419     int layout_map_tags;
2420
2421     size = avpriv_aac_parse_header(gb, &hdr_info);
2422     if (size > 0) {
2423         if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
2424             // This is 2 for "VLB " audio in NSV files.
2425             // See samples/nsv/vlb_audio.
2426             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame", 0);
2427             ac->warned_num_aac_frames = 1;
2428         }
2429         push_output_configuration(ac);
2430         if (hdr_info.chan_config) {
2431             ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
2432             if (set_default_channel_config(ac->avctx, layout_map,
2433                     &layout_map_tags, hdr_info.chan_config))
2434                 return -7;
2435             if (output_configure(ac, layout_map, layout_map_tags,
2436                                  FFMAX(ac->oc[1].status, OC_TRIAL_FRAME), 0))
2437                 return -7;
2438         } else {
2439             ac->oc[1].m4ac.chan_config = 0;
2440             /**
2441              * dual mono frames in Japanese DTV can have chan_config 0
2442              * WITHOUT specifying PCE.
2443              *  thus, set dual mono as default.
2444              */
2445             if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
2446                 layout_map_tags = 2;
2447                 layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
2448                 layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
2449                 layout_map[0][1] = 0;
2450                 layout_map[1][1] = 1;
2451                 if (output_configure(ac, layout_map, layout_map_tags,
2452                                      OC_TRIAL_FRAME, 0))
2453                     return -7;
2454             }
2455         }
2456         ac->oc[1].m4ac.sample_rate     = hdr_info.sample_rate;
2457         ac->oc[1].m4ac.sampling_index  = hdr_info.sampling_index;
2458         ac->oc[1].m4ac.object_type     = hdr_info.object_type;
2459         if (ac->oc[0].status != OC_LOCKED ||
2460             ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
2461             ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
2462             ac->oc[1].m4ac.sbr = -1;
2463             ac->oc[1].m4ac.ps  = -1;
2464         }
2465         if (!hdr_info.crc_absent)
2466             skip_bits(gb, 16);
2467     }
2468     return size;
2469 }
2470
2471 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2472                                 int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
2473 {
2474     AACContext *ac = avctx->priv_data;
2475     ChannelElement *che = NULL, *che_prev = NULL;
2476     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2477     int err, elem_id;
2478     int samples = 0, multiplier, audio_found = 0, pce_found = 0;
2479     int is_dmono, sce_count = 0;
2480
2481     if (show_bits(gb, 12) == 0xfff) {
2482         if (parse_adts_frame_header(ac, gb) < 0) {
2483             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2484             err = -1;
2485             goto fail;
2486         }
2487         if (ac->oc[1].m4ac.sampling_index > 12) {
2488             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
2489             err = -1;
2490             goto fail;
2491         }
2492     }
2493
2494     if (frame_configure_elements(avctx) < 0) {
2495         err = -1;
2496         goto fail;
2497     }
2498
2499     ac->tags_mapped = 0;
2500     // parse
2501     while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2502         elem_id = get_bits(gb, 4);
2503
2504         if (elem_type < TYPE_DSE) {
2505             if (!(che=get_che(ac, elem_type, elem_id))) {
2506                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2507                        elem_type, elem_id);
2508                 err = -1;
2509                 goto fail;
2510             }
2511             samples = 1024;
2512         }
2513
2514         switch (elem_type) {
2515
2516         case TYPE_SCE:
2517             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2518             audio_found = 1;
2519             sce_count++;
2520             break;
2521
2522         case TYPE_CPE:
2523             err = decode_cpe(ac, gb, che);
2524             audio_found = 1;
2525             break;
2526
2527         case TYPE_CCE:
2528             err = decode_cce(ac, gb, che);
2529             break;
2530
2531         case TYPE_LFE:
2532             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2533             audio_found = 1;
2534             break;
2535
2536         case TYPE_DSE:
2537             err = skip_data_stream_element(ac, gb);
2538             break;
2539
2540         case TYPE_PCE: {
2541             uint8_t layout_map[MAX_ELEM_ID*4][3];
2542             int tags;
2543             push_output_configuration(ac);
2544             tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
2545             if (tags < 0) {
2546                 err = tags;
2547                 break;
2548             }
2549             if (pce_found) {
2550                 av_log(avctx, AV_LOG_ERROR,
2551                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2552                 pop_output_configuration(ac);
2553             } else {
2554                 err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
2555                 if (!err)
2556                     ac->oc[1].m4ac.chan_config = 0;
2557                 pce_found = 1;
2558             }
2559             break;
2560         }
2561
2562         case TYPE_FIL:
2563             if (elem_id == 15)
2564                 elem_id += get_bits(gb, 8) - 1;
2565             if (get_bits_left(gb) < 8 * elem_id) {
2566                     av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
2567                     err = -1;
2568                     goto fail;
2569             }
2570             while (elem_id > 0)
2571                 elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2572             err = 0; /* FIXME */
2573             break;
2574
2575         default:
2576             err = -1; /* should not happen, but keeps compiler happy */
2577             break;
2578         }
2579
2580         che_prev       = che;
2581         elem_type_prev = elem_type;
2582
2583         if (err)
2584             goto fail;
2585
2586         if (get_bits_left(gb) < 3) {
2587             av_log(avctx, AV_LOG_ERROR, overread_err);
2588             err = -1;
2589             goto fail;
2590         }
2591     }
2592
2593     spectral_to_sample(ac);
2594
2595     multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
2596     samples <<= multiplier;
2597     /* for dual-mono audio (SCE + SCE) */
2598     is_dmono = ac->dmono_mode && sce_count == 2 &&
2599                ac->oc[1].channel_layout == (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT);
2600
2601     if (samples) {
2602         ac->frame.nb_samples = samples;
2603         *(AVFrame *)data = ac->frame;
2604     }
2605     *got_frame_ptr = !!samples;
2606
2607     if (is_dmono) {
2608         if (ac->dmono_mode == 1)
2609             ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
2610         else if (ac->dmono_mode == 2)
2611             ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
2612     }
2613
2614     if (ac->oc[1].status && audio_found) {
2615         avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
2616         avctx->frame_size = samples;
2617         ac->oc[1].status = OC_LOCKED;
2618     }
2619
2620     if (multiplier) {
2621         int side_size;
2622         uint32_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
2623         if (side && side_size>=4)
2624             AV_WL32(side, 2*AV_RL32(side));
2625     }
2626     return 0;
2627 fail:
2628     pop_output_configuration(ac);
2629     return err;
2630 }
2631
2632 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2633                             int *got_frame_ptr, AVPacket *avpkt)
2634 {
2635     AACContext *ac = avctx->priv_data;
2636     const uint8_t *buf = avpkt->data;
2637     int buf_size = avpkt->size;
2638     GetBitContext gb;
2639     int buf_consumed;
2640     int buf_offset;
2641     int err;
2642     int new_extradata_size;
2643     const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
2644                                        AV_PKT_DATA_NEW_EXTRADATA,
2645                                        &new_extradata_size);
2646     int jp_dualmono_size;
2647     const uint8_t *jp_dualmono   = av_packet_get_side_data(avpkt,
2648                                        AV_PKT_DATA_JP_DUALMONO,
2649                                        &jp_dualmono_size);
2650
2651     if (new_extradata && 0) {
2652         av_free(avctx->extradata);
2653         avctx->extradata = av_mallocz(new_extradata_size +
2654                                       FF_INPUT_BUFFER_PADDING_SIZE);
2655         if (!avctx->extradata)
2656             return AVERROR(ENOMEM);
2657         avctx->extradata_size = new_extradata_size;
2658         memcpy(avctx->extradata, new_extradata, new_extradata_size);
2659         push_output_configuration(ac);
2660         if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
2661                                          avctx->extradata,
2662                                          avctx->extradata_size*8, 1) < 0) {
2663             pop_output_configuration(ac);
2664             return AVERROR_INVALIDDATA;
2665         }
2666     }
2667
2668     ac->dmono_mode = 0;
2669     if (jp_dualmono && jp_dualmono_size > 0)
2670         ac->dmono_mode =  1 + *jp_dualmono;
2671     if (ac->force_dmono_mode >= 0)
2672         ac->dmono_mode = ac->force_dmono_mode;
2673
2674     if (INT_MAX / 8 <= buf_size)
2675         return AVERROR_INVALIDDATA;
2676
2677     init_get_bits(&gb, buf, buf_size * 8);
2678
2679     if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt)) < 0)
2680         return err;
2681
2682     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2683     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2684         if (buf[buf_offset])
2685             break;
2686
2687     return buf_size > buf_offset ? buf_consumed : buf_size;
2688 }
2689
2690 static av_cold int aac_decode_close(AVCodecContext *avctx)
2691 {
2692     AACContext *ac = avctx->priv_data;
2693     int i, type;
2694
2695     for (i = 0; i < MAX_ELEM_ID; i++) {
2696         for (type = 0; type < 4; type++) {
2697             if (ac->che[type][i])
2698                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2699             av_freep(&ac->che[type][i]);
2700         }
2701     }
2702
2703     ff_mdct_end(&ac->mdct);
2704     ff_mdct_end(&ac->mdct_small);
2705     ff_mdct_end(&ac->mdct_ltp);
2706     return 0;
2707 }
2708
2709
2710 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
2711
2712 struct LATMContext {
2713     AACContext      aac_ctx;             ///< containing AACContext
2714     int             initialized;         ///< initialized after a valid extradata was seen
2715
2716     // parser data
2717     int             audio_mux_version_A; ///< LATM syntax version
2718     int             frame_length_type;   ///< 0/1 variable/fixed frame length
2719     int             frame_length;        ///< frame length for fixed frame length
2720 };
2721
2722 static inline uint32_t latm_get_value(GetBitContext *b)
2723 {
2724     int length = get_bits(b, 2);
2725
2726     return get_bits_long(b, (length+1)*8);
2727 }
2728
2729 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
2730                                              GetBitContext *gb, int asclen)
2731 {
2732     AACContext *ac        = &latmctx->aac_ctx;
2733     AVCodecContext *avctx = ac->avctx;
2734     MPEG4AudioConfig m4ac = { 0 };
2735     int config_start_bit  = get_bits_count(gb);
2736     int sync_extension    = 0;
2737     int bits_consumed, esize;
2738
2739     if (asclen) {
2740         sync_extension = 1;
2741         asclen         = FFMIN(asclen, get_bits_left(gb));
2742     } else
2743         asclen         = get_bits_left(gb);
2744
2745     if (config_start_bit % 8) {
2746         av_log_missing_feature(latmctx->aac_ctx.avctx,
2747                                "Non-byte-aligned audio-specific config", 1);
2748         return AVERROR_PATCHWELCOME;
2749     }
2750     if (asclen <= 0)
2751         return AVERROR_INVALIDDATA;
2752     bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
2753                                          gb->buffer + (config_start_bit / 8),
2754                                          asclen, sync_extension);
2755
2756     if (bits_consumed < 0)
2757         return AVERROR_INVALIDDATA;
2758
2759     if (!latmctx->initialized ||
2760         ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
2761         ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
2762
2763         if(latmctx->initialized) {
2764             av_log(avctx, AV_LOG_INFO, "audio config changed\n");
2765         } else {
2766             av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
2767         }
2768         latmctx->initialized = 0;
2769
2770         esize = (bits_consumed+7) / 8;
2771
2772         if (avctx->extradata_size < esize) {
2773             av_free(avctx->extradata);
2774             avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
2775             if (!avctx->extradata)
2776                 return AVERROR(ENOMEM);
2777         }
2778
2779         avctx->extradata_size = esize;
2780         memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2781         memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2782     }
2783     skip_bits_long(gb, bits_consumed);
2784
2785     return bits_consumed;
2786 }
2787
2788 static int read_stream_mux_config(struct LATMContext *latmctx,
2789                                   GetBitContext *gb)
2790 {
2791     int ret, audio_mux_version = get_bits(gb, 1);
2792
2793     latmctx->audio_mux_version_A = 0;
2794     if (audio_mux_version)
2795         latmctx->audio_mux_version_A = get_bits(gb, 1);
2796
2797     if (!latmctx->audio_mux_version_A) {
2798
2799         if (audio_mux_version)
2800             latm_get_value(gb);                 // taraFullness
2801
2802         skip_bits(gb, 1);                       // allStreamSameTimeFraming
2803         skip_bits(gb, 6);                       // numSubFrames
2804         // numPrograms
2805         if (get_bits(gb, 4)) {                  // numPrograms
2806             av_log_missing_feature(latmctx->aac_ctx.avctx,
2807                                    "Multiple programs", 1);
2808             return AVERROR_PATCHWELCOME;
2809         }
2810
2811         // for each program (which there is only one in DVB)
2812
2813         // for each layer (which there is only one in DVB)
2814         if (get_bits(gb, 3)) {                   // numLayer
2815             av_log_missing_feature(latmctx->aac_ctx.avctx,
2816                                    "Multiple layers", 1);
2817             return AVERROR_PATCHWELCOME;
2818         }
2819
2820         // for all but first stream: use_same_config = get_bits(gb, 1);
2821         if (!audio_mux_version) {
2822             if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
2823                 return ret;
2824         } else {
2825             int ascLen = latm_get_value(gb);
2826             if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
2827                 return ret;
2828             ascLen -= ret;
2829             skip_bits_long(gb, ascLen);
2830         }
2831
2832         latmctx->frame_length_type = get_bits(gb, 3);
2833         switch (latmctx->frame_length_type) {
2834         case 0:
2835             skip_bits(gb, 8);       // latmBufferFullness
2836             break;
2837         case 1:
2838             latmctx->frame_length = get_bits(gb, 9);
2839             break;
2840         case 3:
2841         case 4:
2842         case 5:
2843             skip_bits(gb, 6);       // CELP frame length table index
2844             break;
2845         case 6:
2846         case 7:
2847             skip_bits(gb, 1);       // HVXC frame length table index
2848             break;
2849         }
2850
2851         if (get_bits(gb, 1)) {                  // other data
2852             if (audio_mux_version) {
2853                 latm_get_value(gb);             // other_data_bits
2854             } else {
2855                 int esc;
2856                 do {
2857                     esc = get_bits(gb, 1);
2858                     skip_bits(gb, 8);
2859                 } while (esc);
2860             }
2861         }
2862
2863         if (get_bits(gb, 1))                     // crc present
2864             skip_bits(gb, 8);                    // config_crc
2865     }
2866
2867     return 0;
2868 }
2869
2870 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
2871 {
2872     uint8_t tmp;
2873
2874     if (ctx->frame_length_type == 0) {
2875         int mux_slot_length = 0;
2876         do {
2877             tmp = get_bits(gb, 8);
2878             mux_slot_length += tmp;
2879         } while (tmp == 255);
2880         return mux_slot_length;
2881     } else if (ctx->frame_length_type == 1) {
2882         return ctx->frame_length;
2883     } else if (ctx->frame_length_type == 3 ||
2884                ctx->frame_length_type == 5 ||
2885                ctx->frame_length_type == 7) {
2886         skip_bits(gb, 2);          // mux_slot_length_coded
2887     }
2888     return 0;
2889 }
2890
2891 static int read_audio_mux_element(struct LATMContext *latmctx,
2892                                   GetBitContext *gb)
2893 {
2894     int err;
2895     uint8_t use_same_mux = get_bits(gb, 1);
2896     if (!use_same_mux) {
2897         if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2898             return err;
2899     } else if (!latmctx->aac_ctx.avctx->extradata) {
2900         av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2901                "no decoder config found\n");
2902         return AVERROR(EAGAIN);
2903     }
2904     if (latmctx->audio_mux_version_A == 0) {
2905         int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2906         if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2907             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2908             return AVERROR_INVALIDDATA;
2909         } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
2910             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2911                    "frame length mismatch %d << %d\n",
2912                    mux_slot_length_bytes * 8, get_bits_left(gb));
2913             return AVERROR_INVALIDDATA;
2914         }
2915     }
2916     return 0;
2917 }
2918
2919
2920 static int latm_decode_frame(AVCodecContext *avctx, void *out,
2921                              int *got_frame_ptr, AVPacket *avpkt)
2922 {
2923     struct LATMContext *latmctx = avctx->priv_data;
2924     int                 muxlength, err;
2925     GetBitContext       gb;
2926
2927     if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
2928         return err;
2929
2930     // check for LOAS sync word
2931     if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
2932         return AVERROR_INVALIDDATA;
2933
2934     muxlength = get_bits(&gb, 13) + 3;
2935     // not enough data, the parser should have sorted this out
2936     if (muxlength > avpkt->size)
2937         return AVERROR_INVALIDDATA;
2938
2939     if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
2940         return err;
2941
2942     if (!latmctx->initialized) {
2943         if (!avctx->extradata) {
2944             *got_frame_ptr = 0;
2945             return avpkt->size;
2946         } else {
2947             push_output_configuration(&latmctx->aac_ctx);
2948             if ((err = decode_audio_specific_config(
2949                     &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
2950                     avctx->extradata, avctx->extradata_size*8, 1)) < 0) {
2951                 pop_output_configuration(&latmctx->aac_ctx);
2952                 return err;
2953             }
2954             latmctx->initialized = 1;
2955         }
2956     }
2957
2958     if (show_bits(&gb, 12) == 0xfff) {
2959         av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2960                "ADTS header detected, probably as result of configuration "
2961                "misparsing\n");
2962         return AVERROR_INVALIDDATA;
2963     }
2964
2965     if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt)) < 0)
2966         return err;
2967
2968     return muxlength;
2969 }
2970
2971 static av_cold int latm_decode_init(AVCodecContext *avctx)
2972 {
2973     struct LATMContext *latmctx = avctx->priv_data;
2974     int ret = aac_decode_init(avctx);
2975
2976     if (avctx->extradata_size > 0)
2977         latmctx->initialized = !ret;
2978
2979     return ret;
2980 }
2981
2982 /**
2983  * AVOptions for Japanese DTV specific extensions (ADTS only)
2984  */
2985 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
2986 static const AVOption options[] = {
2987     {"dual_mono_mode", "Select the channel to decode for dual mono",
2988      offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
2989      AACDEC_FLAGS, "dual_mono_mode"},
2990
2991     {"auto", "autoselection",            0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2992     {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2993     {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2994     {"both", "Select both channels",     0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2995
2996     {NULL},
2997 };
2998
2999 static const AVClass aac_decoder_class = {
3000     .class_name = "AAC decoder",
3001     .item_name  = av_default_item_name,
3002     .option     = options,
3003     .version    = LIBAVUTIL_VERSION_INT,
3004 };
3005
3006 AVCodec ff_aac_decoder = {
3007     .name            = "aac",
3008     .type            = AVMEDIA_TYPE_AUDIO,
3009     .id              = AV_CODEC_ID_AAC,
3010     .priv_data_size  = sizeof(AACContext),
3011     .init            = aac_decode_init,
3012     .close           = aac_decode_close,
3013     .decode          = aac_decode_frame,
3014     .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
3015     .sample_fmts     = (const enum AVSampleFormat[]) {
3016         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
3017     },
3018     .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3019     .channel_layouts = aac_channel_layout,
3020     .flush = flush,
3021     .priv_class      = &aac_decoder_class,
3022 };
3023
3024 /*
3025     Note: This decoder filter is intended to decode LATM streams transferred
3026     in MPEG transport streams which only contain one program.
3027     To do a more complex LATM demuxing a separate LATM demuxer should be used.
3028 */
3029 AVCodec ff_aac_latm_decoder = {
3030     .name            = "aac_latm",
3031     .type            = AVMEDIA_TYPE_AUDIO,
3032     .id              = AV_CODEC_ID_AAC_LATM,
3033     .priv_data_size  = sizeof(struct LATMContext),
3034     .init            = latm_decode_init,
3035     .close           = aac_decode_close,
3036     .decode          = latm_decode_frame,
3037     .long_name       = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
3038     .sample_fmts     = (const enum AVSampleFormat[]) {
3039         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
3040     },
3041     .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3042     .channel_layouts = aac_channel_layout,
3043     .flush = flush,
3044 };