git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  *
   6  * AAC LATM decoder
   7  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
   8  * Copyright (c) 2010      Janne Grunau <janne-ffmpeg@jannau.net>
   9  *
  10  * This file is part of Libav.
  11  *
  12  * Libav is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public
  14  * License as published by the Free Software Foundation; either
  15  * version 2.1 of the License, or (at your option) any later version.
  16  *
  17  * Libav is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with Libav; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25  */
  26
  27 /**
  28  * @file
  29  * AAC decoder
  30  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  31  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  32  */
  33
  34 /*
  35  * supported tools
  36  *
  37  * Support?             Name
  38  * N (code in SoC repo) gain control
  39  * Y                    block switching
  40  * Y                    window shapes - standard
  41  * N                    window shapes - Low Delay
  42  * Y                    filterbank - standard
  43  * N (code in SoC repo) filterbank - Scalable Sample Rate
  44  * Y                    Temporal Noise Shaping
  45  * Y                    Long Term Prediction
  46  * Y                    intensity stereo
  47  * Y                    channel coupling
  48  * Y                    frequency domain prediction
  49  * Y                    Perceptual Noise Substitution
  50  * Y                    Mid/Side stereo
  51  * N                    Scalable Inverse AAC Quantization
  52  * N                    Frequency Selective Switch
  53  * N                    upsampling filter
  54  * Y                    quantization & coding - AAC
  55  * N                    quantization & coding - TwinVQ
  56  * N                    quantization & coding - BSAC
  57  * N                    AAC Error Resilience tools
  58  * N                    Error Resilience payload syntax
  59  * N                    Error Protection tool
  60  * N                    CELP
  61  * N                    Silence Compression
  62  * N                    HVXC
  63  * N                    HVXC 4kbits/s VR
  64  * N                    Structured Audio tools
  65  * N                    Structured Audio Sample Bank Format
  66  * N                    MIDI
  67  * N                    Harmonic and Individual Lines plus Noise
  68  * N                    Text-To-Speech Interface
  69  * Y                    Spectral Band Replication
  70  * Y (not in this code) Layer-1
  71  * Y (not in this code) Layer-2
  72  * Y (not in this code) Layer-3
  73  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  74  * Y                    Parametric Stereo
  75  * N                    Direct Stream Transfer
  76  *
  77  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  78  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  79            Parametric Stereo.
  80  */
  81
  82
  83 #include "avcodec.h"
  84 #include "internal.h"
  85 #include "get_bits.h"
  86 #include "dsputil.h"
  87 #include "fft.h"
  88 #include "fmtconvert.h"
  89 #include "lpc.h"
  90 #include "kbdwin.h"
  91 #include "sinewin.h"
  92
  93 #include "aac.h"
  94 #include "aactab.h"
  95 #include "aacdectab.h"
  96 #include "cbrt_tablegen.h"
  97 #include "sbr.h"
  98 #include "aacsbr.h"
  99 #include "mpeg4audio.h"
 100 #include "aacadtsdec.h"
 101 #include "libavutil/intfloat.h"
 102
 103 #include <assert.h>
 104 #include <errno.h>
 105 #include <math.h>
 106 #include <string.h>
 107
 108 #if ARCH_ARM
 109 #   include "arm/aac.h"
 110 #endif
 111
 112 static VLC vlc_scalefactors;
 113 static VLC vlc_spectral[11];
 114
 115 static const char overread_err[] = "Input buffer exhausted before END element found\n";
 116
 117 static int count_channels(uint8_t (*layout)[3], int tags)
 118 {
 119     int i, sum = 0;
 120     for (i = 0; i < tags; i++) {
 121         int syn_ele = layout[i][0];
 122         int pos     = layout[i][2];
 123         sum += (1 + (syn_ele == TYPE_CPE)) *
 124                (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
 125     }
 126     return sum;
 127 }
 128
 129 /**
 130  * Check for the channel element in the current channel position configuration.
 131  * If it exists, make sure the appropriate element is allocated and map the
 132  * channel order to match the internal Libav channel layout.
 133  *
 134  * @param   che_pos current channel position configuration
 135  * @param   type channel element type
 136  * @param   id channel element id
 137  * @param   channels count of the number of channels in the configuration
 138  *
 139  * @return  Returns error status. 0 - OK, !0 - error
 140  */
 141 static av_cold int che_configure(AACContext *ac,
 142                                  enum ChannelPosition che_pos,
 143                                  int type, int id, int *channels)
 144 {
 145     if (che_pos) {
 146         if (!ac->che[type][id]) {
 147             if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
 148                 return AVERROR(ENOMEM);
 149             ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
 150         }
 151         if (type != TYPE_CCE) {
 152             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
 153             if (type == TYPE_CPE ||
 154                 (type == TYPE_SCE && ac->m4ac.ps == 1)) {
 155                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
 156             }
 157         }
 158     } else {
 159         if (ac->che[type][id])
 160             ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
 161         av_freep(&ac->che[type][id]);
 162     }
 163     return 0;
 164 }
 165
 166 struct elem_to_channel {
 167     uint64_t av_position;
 168     uint8_t syn_ele;
 169     uint8_t elem_id;
 170     uint8_t aac_position;
 171 };
 172
 173 static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
 174     uint8_t (*layout_map)[3], int offset, int tags, uint64_t left,
 175     uint64_t right, int pos)
 176 {
 177     if (layout_map[offset][0] == TYPE_CPE) {
 178         e2c_vec[offset] = (struct elem_to_channel) {
 179             .av_position = left | right, .syn_ele = TYPE_CPE,
 180             .elem_id = layout_map[offset    ][1], .aac_position = pos };
 181         return 1;
 182     } else {
 183         e2c_vec[offset]   = (struct elem_to_channel) {
 184             .av_position = left, .syn_ele = TYPE_SCE,
 185             .elem_id = layout_map[offset    ][1], .aac_position = pos };
 186         e2c_vec[offset + 1] = (struct elem_to_channel) {
 187             .av_position = right, .syn_ele = TYPE_SCE,
 188             .elem_id = layout_map[offset + 1][1], .aac_position = pos };
 189         return 2;
 190     }
 191 }
 192
 193 static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, int *current) {
 194     int num_pos_channels = 0;
 195     int first_cpe = 0;
 196     int sce_parity = 0;
 197     int i;
 198     for (i = *current; i < tags; i++) {
 199         if (layout_map[i][2] != pos)
 200             break;
 201         if (layout_map[i][0] == TYPE_CPE) {
 202             if (sce_parity) {
 203                 if (pos == AAC_CHANNEL_FRONT || !first_cpe) {
 204                     sce_parity = 0;
 205                 } else {
 206                     return -1;
 207                 }
 208             }
 209             num_pos_channels += 2;
 210             first_cpe = 1;
 211         } else {
 212             num_pos_channels++;
 213             sce_parity ^= 1;
 214         }
 215     }
 216     if (sce_parity &&
 217         ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
 218             return -1;
 219     *current = i;
 220     return num_pos_channels;
 221 }
 222
 223 static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
 224 {
 225     int i, n, total_non_cc_elements;
 226     struct elem_to_channel e2c_vec[MAX_ELEM_ID] = {{ 0 }};
 227     int num_front_channels, num_side_channels, num_back_channels;
 228     uint64_t layout;
 229
 230     i = 0;
 231     num_front_channels =
 232         count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
 233     if (num_front_channels < 0)
 234         return 0;
 235     num_side_channels =
 236         count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
 237     if (num_side_channels < 0)
 238         return 0;
 239     num_back_channels =
 240         count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
 241     if (num_back_channels < 0)
 242         return 0;
 243
 244     i = 0;
 245     if (num_front_channels & 1) {
 246         e2c_vec[i] = (struct elem_to_channel) {
 247             .av_position = AV_CH_FRONT_CENTER, .syn_ele = TYPE_SCE,
 248             .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_FRONT };
 249         i++;
 250         num_front_channels--;
 251     }
 252     if (num_front_channels >= 4) {
 253         i += assign_pair(e2c_vec, layout_map, i, tags,
 254                          AV_CH_FRONT_LEFT_OF_CENTER,
 255                          AV_CH_FRONT_RIGHT_OF_CENTER,
 256                          AAC_CHANNEL_FRONT);
 257         num_front_channels -= 2;
 258     }
 259     if (num_front_channels >= 2) {
 260         i += assign_pair(e2c_vec, layout_map, i, tags,
 261                          AV_CH_FRONT_LEFT,
 262                          AV_CH_FRONT_RIGHT,
 263                          AAC_CHANNEL_FRONT);
 264         num_front_channels -= 2;
 265     }
 266     while (num_front_channels >= 2) {
 267         i += assign_pair(e2c_vec, layout_map, i, tags,
 268                          UINT64_MAX,
 269                          UINT64_MAX,
 270                          AAC_CHANNEL_FRONT);
 271         num_front_channels -= 2;
 272     }
 273
 274     if (num_side_channels >= 2) {
 275         i += assign_pair(e2c_vec, layout_map, i, tags,
 276                          AV_CH_SIDE_LEFT,
 277                          AV_CH_SIDE_RIGHT,
 278                          AAC_CHANNEL_FRONT);
 279         num_side_channels -= 2;
 280     }
 281     while (num_side_channels >= 2) {
 282         i += assign_pair(e2c_vec, layout_map, i, tags,
 283                          UINT64_MAX,
 284                          UINT64_MAX,
 285                          AAC_CHANNEL_SIDE);
 286         num_side_channels -= 2;
 287     }
 288
 289     while (num_back_channels >= 4) {
 290         i += assign_pair(e2c_vec, layout_map, i, tags,
 291                          UINT64_MAX,
 292                          UINT64_MAX,
 293                          AAC_CHANNEL_BACK);
 294         num_back_channels -= 2;
 295     }
 296     if (num_back_channels >= 2) {
 297         i += assign_pair(e2c_vec, layout_map, i, tags,
 298                          AV_CH_BACK_LEFT,
 299                          AV_CH_BACK_RIGHT,
 300                          AAC_CHANNEL_BACK);
 301         num_back_channels -= 2;
 302     }
 303     if (num_back_channels) {
 304         e2c_vec[i] = (struct elem_to_channel) {
 305           .av_position = AV_CH_BACK_CENTER, .syn_ele = TYPE_SCE,
 306           .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_BACK };
 307         i++;
 308         num_back_channels--;
 309     }
 310
 311     if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
 312         e2c_vec[i] = (struct elem_to_channel) {
 313           .av_position = AV_CH_LOW_FREQUENCY, .syn_ele = TYPE_LFE,
 314           .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
 315         i++;
 316     }
 317     while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
 318         e2c_vec[i] = (struct elem_to_channel) {
 319           .av_position = UINT64_MAX, .syn_ele = TYPE_LFE,
 320           .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
 321         i++;
 322     }
 323
 324     // Must choose a stable sort
 325     total_non_cc_elements = n = i;
 326     do {
 327         int next_n = 0;
 328         for (i = 1; i < n; i++) {
 329             if (e2c_vec[i-1].av_position > e2c_vec[i].av_position) {
 330                 FFSWAP(struct elem_to_channel, e2c_vec[i-1], e2c_vec[i]);
 331                 next_n = i;
 332             }
 333         }
 334         n = next_n;
 335     } while (n > 0);
 336
 337     layout = 0;
 338     for (i = 0; i < total_non_cc_elements; i++) {
 339         layout_map[i][0] = e2c_vec[i].syn_ele;
 340         layout_map[i][1] = e2c_vec[i].elem_id;
 341         layout_map[i][2] = e2c_vec[i].aac_position;
 342         if (e2c_vec[i].av_position != UINT64_MAX) {
 343             layout |= e2c_vec[i].av_position;
 344         }
 345     }
 346
 347     return layout;
 348 }
 349
 350 /**
 351  * Configure output channel order based on the current program configuration element.
 352  *
 353  * @return  Returns error status. 0 - OK, !0 - error
 354  */
 355 static av_cold int output_configure(AACContext *ac,
 356                                     uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
 357                                     int channel_config, enum OCStatus oc_type)
 358 {
 359     AVCodecContext *avctx = ac->avctx;
 360     int i, channels = 0, ret;
 361     uint64_t layout = 0;
 362
 363     if (ac->layout_map != layout_map) {
 364         memcpy(ac->layout_map, layout_map, tags * sizeof(layout_map[0]));
 365         ac->layout_map_tags = tags;
 366     }
 367
 368     // Try to sniff a reasonable channel order, otherwise output the
 369     // channels in the order the PCE declared them.
 370     if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE)
 371         layout = sniff_channel_order(layout_map, tags);
 372     for (i = 0; i < tags; i++) {
 373         int type =     layout_map[i][0];
 374         int id =       layout_map[i][1];
 375         int position = layout_map[i][2];
 376         // Allocate or free elements depending on if they are in the
 377         // current program configuration.
 378         ret = che_configure(ac, position, type, id, &channels);
 379         if (ret < 0)
 380             return ret;
 381     }
 382
 383     memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
 384     avctx->channel_layout = layout;
 385     avctx->channels = channels;
 386     ac->output_configured = oc_type;
 387
 388     return 0;
 389 }
 390
 391 /**
 392  * Set up channel positions based on a default channel configuration
 393  * as specified in table 1.17.
 394  *
 395  * @return  Returns error status. 0 - OK, !0 - error
 396  */
 397 static av_cold int set_default_channel_config(AVCodecContext *avctx,
 398                                               uint8_t (*layout_map)[3],
 399                                               int *tags,
 400                                               int channel_config)
 401 {
 402     if (channel_config < 1 || channel_config > 7) {
 403         av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
 404                channel_config);
 405         return -1;
 406     }
 407     *tags = tags_per_config[channel_config];
 408     memcpy(layout_map, aac_channel_layout_map[channel_config-1], *tags * sizeof(*layout_map));
 409     return 0;
 410 }
 411
 412 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 413 {
 414     // For PCE based channel configurations map the channels solely based on tags.
 415     if (!ac->m4ac.chan_config) {
 416         return ac->tag_che_map[type][elem_id];
 417     }
 418     // Allow single CPE stereo files to be signalled with mono configuration.
 419     if (!ac->tags_mapped && type == TYPE_CPE && ac->m4ac.chan_config == 1) {
 420         uint8_t layout_map[MAX_ELEM_ID*4][3];
 421         int layout_map_tags;
 422
 423         if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
 424                                        2) < 0)
 425             return NULL;
 426         if (output_configure(ac, layout_map, layout_map_tags,
 427                              2, OC_TRIAL_FRAME) < 0)
 428             return NULL;
 429
 430         ac->m4ac.chan_config = 2;
 431     }
 432     // For indexed channel configurations map the channels solely based on position.
 433     switch (ac->m4ac.chan_config) {
 434     case 7:
 435         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
 436             ac->tags_mapped++;
 437             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
 438         }
 439     case 6:
 440         /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
 441            instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
 442            encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
 443         if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
 444             ac->tags_mapped++;
 445             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
 446         }
 447     case 5:
 448         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
 449             ac->tags_mapped++;
 450             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
 451         }
 452     case 4:
 453         if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
 454             ac->tags_mapped++;
 455             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
 456         }
 457     case 3:
 458     case 2:
 459         if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
 460             ac->tags_mapped++;
 461             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
 462         } else if (ac->m4ac.chan_config == 2) {
 463             return NULL;
 464         }
 465     case 1:
 466         if (!ac->tags_mapped && type == TYPE_SCE) {
 467             ac->tags_mapped++;
 468             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
 469         }
 470     default:
 471         return NULL;
 472     }
 473 }
 474
 475 /**
 476  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 477  *
 478  * @param type speaker type/position for these channels
 479  */
 480 static void decode_channel_map(uint8_t layout_map[][3],
 481                                enum ChannelPosition type,
 482                                GetBitContext *gb, int n)
 483 {
 484     while (n--) {
 485         enum RawDataBlockType syn_ele;
 486         switch (type) {
 487         case AAC_CHANNEL_FRONT:
 488         case AAC_CHANNEL_BACK:
 489         case AAC_CHANNEL_SIDE:
 490             syn_ele = get_bits1(gb);
 491             break;
 492         case AAC_CHANNEL_CC:
 493             skip_bits1(gb);
 494             syn_ele = TYPE_CCE;
 495             break;
 496         case AAC_CHANNEL_LFE:
 497             syn_ele = TYPE_LFE;
 498             break;
 499         }
 500         layout_map[0][0] = syn_ele;
 501         layout_map[0][1] = get_bits(gb, 4);
 502         layout_map[0][2] = type;
 503         layout_map++;
 504     }
 505 }
 506
 507 /**
 508  * Decode program configuration element; reference: table 4.2.
 509  *
 510  * @return  Returns error status. 0 - OK, !0 - error
 511  */
 512 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
 513                       uint8_t (*layout_map)[3],
 514                       GetBitContext *gb)
 515 {
 516     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
 517     int comment_len;
 518     int tags;
 519
 520     skip_bits(gb, 2);  // object_type
 521
 522     sampling_index = get_bits(gb, 4);
 523     if (m4ac->sampling_index != sampling_index)
 524         av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
 525
 526     num_front       = get_bits(gb, 4);
 527     num_side        = get_bits(gb, 4);
 528     num_back        = get_bits(gb, 4);
 529     num_lfe         = get_bits(gb, 2);
 530     num_assoc_data  = get_bits(gb, 3);
 531     num_cc          = get_bits(gb, 4);
 532
 533     if (get_bits1(gb))
 534         skip_bits(gb, 4); // mono_mixdown_tag
 535     if (get_bits1(gb))
 536         skip_bits(gb, 4); // stereo_mixdown_tag
 537
 538     if (get_bits1(gb))
 539         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 540
 541     decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
 542     tags = num_front;
 543     decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
 544     tags += num_side;
 545     decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK,  gb, num_back);
 546     tags += num_back;
 547     decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE,   gb, num_lfe);
 548     tags += num_lfe;
 549
 550     skip_bits_long(gb, 4 * num_assoc_data);
 551
 552     decode_channel_map(layout_map + tags, AAC_CHANNEL_CC,    gb, num_cc);
 553     tags += num_cc;
 554
 555     align_get_bits(gb);
 556
 557     /* comment field, first byte is length */
 558     comment_len = get_bits(gb, 8) * 8;
 559     if (get_bits_left(gb) < comment_len) {
 560         av_log(avctx, AV_LOG_ERROR, overread_err);
 561         return -1;
 562     }
 563     skip_bits_long(gb, comment_len);
 564     return tags;
 565 }
 566
 567 /**
 568  * Decode GA "General Audio" specific configuration; reference: table 4.1.
 569  *
 570  * @param   ac          pointer to AACContext, may be null
 571  * @param   avctx       pointer to AVCCodecContext, used for logging
 572  *
 573  * @return  Returns error status. 0 - OK, !0 - error
 574  */
 575 static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
 576                                      GetBitContext *gb,
 577                                      MPEG4AudioConfig *m4ac,
 578                                      int channel_config)
 579 {
 580     int extension_flag, ret;
 581     uint8_t layout_map[MAX_ELEM_ID*4][3];
 582     int tags = 0;
 583
 584     if (get_bits1(gb)) { // frameLengthFlag
 585         av_log_missing_feature(avctx, "960/120 MDCT window is", 1);
 586         return -1;
 587     }
 588
 589     if (get_bits1(gb))       // dependsOnCoreCoder
 590         skip_bits(gb, 14);   // coreCoderDelay
 591     extension_flag = get_bits1(gb);
 592
 593     if (m4ac->object_type == AOT_AAC_SCALABLE ||
 594         m4ac->object_type == AOT_ER_AAC_SCALABLE)
 595         skip_bits(gb, 3);     // layerNr
 596
 597     if (channel_config == 0) {
 598         skip_bits(gb, 4);  // element_instance_tag
 599         tags = decode_pce(avctx, m4ac, layout_map, gb);
 600         if (tags < 0)
 601             return tags;
 602     } else {
 603         if ((ret = set_default_channel_config(avctx, layout_map, &tags, channel_config)))
 604             return ret;
 605     }
 606
 607     if (count_channels(layout_map, tags) > 1) {
 608         m4ac->ps = 0;
 609     } else if (m4ac->sbr == 1 && m4ac->ps == -1)
 610         m4ac->ps = 1;
 611
 612     if (ac && (ret = output_configure(ac, layout_map, tags,
 613                                       channel_config, OC_GLOBAL_HDR)))
 614         return ret;
 615
 616     if (extension_flag) {
 617         switch (m4ac->object_type) {
 618         case AOT_ER_BSAC:
 619             skip_bits(gb, 5);    // numOfSubFrame
 620             skip_bits(gb, 11);   // layer_length
 621             break;
 622         case AOT_ER_AAC_LC:
 623         case AOT_ER_AAC_LTP:
 624         case AOT_ER_AAC_SCALABLE:
 625         case AOT_ER_AAC_LD:
 626             skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
 627                                     * aacScalefactorDataResilienceFlag
 628                                     * aacSpectralDataResilienceFlag
 629                                     */
 630             break;
 631         }
 632         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
 633     }
 634     return 0;
 635 }
 636
 637 /**
 638  * Decode audio specific configuration; reference: table 1.13.
 639  *
 640  * @param   ac          pointer to AACContext, may be null
 641  * @param   avctx       pointer to AVCCodecContext, used for logging
 642  * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
 643  * @param   data        pointer to buffer holding an audio specific config
 644  * @param   bit_size    size of audio specific config or data in bits
 645  * @param   sync_extension look for an appended sync extension
 646  *
 647  * @return  Returns error status or number of consumed bits. <0 - error
 648  */
 649 static int decode_audio_specific_config(AACContext *ac,
 650                                         AVCodecContext *avctx,
 651                                         MPEG4AudioConfig *m4ac,
 652                                         const uint8_t *data, int bit_size,
 653                                         int sync_extension)
 654 {
 655     GetBitContext gb;
 656     int i;
 657
 658     av_dlog(avctx, "extradata size %d\n", avctx->extradata_size);
 659     for (i = 0; i < avctx->extradata_size; i++)
 660          av_dlog(avctx, "%02x ", avctx->extradata[i]);
 661     av_dlog(avctx, "\n");
 662
 663     init_get_bits(&gb, data, bit_size);
 664
 665     if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, sync_extension)) < 0)
 666         return -1;
 667     if (m4ac->sampling_index > 12) {
 668         av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
 669         return -1;
 670     }
 671
 672     skip_bits_long(&gb, i);
 673
 674     switch (m4ac->object_type) {
 675     case AOT_AAC_MAIN:
 676     case AOT_AAC_LC:
 677     case AOT_AAC_LTP:
 678         if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
 679             return -1;
 680         break;
 681     default:
 682         av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
 683                m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
 684         return -1;
 685     }
 686
 687     av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
 688             m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
 689             m4ac->sample_rate, m4ac->sbr, m4ac->ps);
 690
 691     return get_bits_count(&gb);
 692 }
 693
 694 /**
 695  * linear congruential pseudorandom number generator
 696  *
 697  * @param   previous_val    pointer to the current state of the generator
 698  *
 699  * @return  Returns a 32-bit pseudorandom integer
 700  */
 701 static av_always_inline int lcg_random(int previous_val)
 702 {
 703     return previous_val * 1664525 + 1013904223;
 704 }
 705
 706 static av_always_inline void reset_predict_state(PredictorState *ps)
 707 {
 708     ps->r0   = 0.0f;
 709     ps->r1   = 0.0f;
 710     ps->cor0 = 0.0f;
 711     ps->cor1 = 0.0f;
 712     ps->var0 = 1.0f;
 713     ps->var1 = 1.0f;
 714 }
 715
 716 static void reset_all_predictors(PredictorState *ps)
 717 {
 718     int i;
 719     for (i = 0; i < MAX_PREDICTORS; i++)
 720         reset_predict_state(&ps[i]);
 721 }
 722
 723 static int sample_rate_idx (int rate)
 724 {
 725          if (92017 <= rate) return 0;
 726     else if (75132 <= rate) return 1;
 727     else if (55426 <= rate) return 2;
 728     else if (46009 <= rate) return 3;
 729     else if (37566 <= rate) return 4;
 730     else if (27713 <= rate) return 5;
 731     else if (23004 <= rate) return 6;
 732     else if (18783 <= rate) return 7;
 733     else if (13856 <= rate) return 8;
 734     else if (11502 <= rate) return 9;
 735     else if (9391  <= rate) return 10;
 736     else                    return 11;
 737 }
 738
 739 static void reset_predictor_group(PredictorState *ps, int group_num)
 740 {
 741     int i;
 742     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 743         reset_predict_state(&ps[i]);
 744 }
 745
 746 #define AAC_INIT_VLC_STATIC(num, size) \
 747     INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
 748          ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
 749         ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
 750         size);
 751
 752 static av_cold int aac_decode_init(AVCodecContext *avctx)
 753 {
 754     AACContext *ac = avctx->priv_data;
 755     float output_scale_factor;
 756
 757     ac->avctx = avctx;
 758     ac->m4ac.sample_rate = avctx->sample_rate;
 759
 760     if (avctx->extradata_size > 0) {
 761         if (decode_audio_specific_config(ac, ac->avctx, &ac->m4ac,
 762                                          avctx->extradata,
 763                                          avctx->extradata_size*8, 1) < 0)
 764             return -1;
 765     } else {
 766         int sr, i;
 767         uint8_t layout_map[MAX_ELEM_ID*4][3];
 768         int layout_map_tags;
 769
 770         sr = sample_rate_idx(avctx->sample_rate);
 771         ac->m4ac.sampling_index = sr;
 772         ac->m4ac.channels = avctx->channels;
 773         ac->m4ac.sbr = -1;
 774         ac->m4ac.ps = -1;
 775
 776         for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
 777             if (ff_mpeg4audio_channels[i] == avctx->channels)
 778                 break;
 779         if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
 780             i = 0;
 781         }
 782         ac->m4ac.chan_config = i;
 783
 784         if (ac->m4ac.chan_config) {
 785             int ret = set_default_channel_config(avctx, layout_map,
 786                 &layout_map_tags, ac->m4ac.chan_config);
 787             if (!ret)
 788                 output_configure(ac, layout_map, layout_map_tags,
 789                                  ac->m4ac.chan_config, OC_GLOBAL_HDR);
 790             else if (avctx->err_recognition & AV_EF_EXPLODE)
 791                 return AVERROR_INVALIDDATA;
 792         }
 793     }
 794
 795     if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
 796         avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
 797         output_scale_factor = 1.0 / 32768.0;
 798     } else {
 799         avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 800         output_scale_factor = 1.0;
 801     }
 802
 803     AAC_INIT_VLC_STATIC( 0, 304);
 804     AAC_INIT_VLC_STATIC( 1, 270);
 805     AAC_INIT_VLC_STATIC( 2, 550);
 806     AAC_INIT_VLC_STATIC( 3, 300);
 807     AAC_INIT_VLC_STATIC( 4, 328);
 808     AAC_INIT_VLC_STATIC( 5, 294);
 809     AAC_INIT_VLC_STATIC( 6, 306);
 810     AAC_INIT_VLC_STATIC( 7, 268);
 811     AAC_INIT_VLC_STATIC( 8, 510);
 812     AAC_INIT_VLC_STATIC( 9, 366);
 813     AAC_INIT_VLC_STATIC(10, 462);
 814
 815     ff_aac_sbr_init();
 816
 817     ff_dsputil_init(&ac->dsp, avctx);
 818     ff_fmt_convert_init(&ac->fmt_conv, avctx);
 819
 820     ac->random_state = 0x1f2e3d4c;
 821
 822     ff_aac_tableinit();
 823
 824     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
 825                     ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
 826                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
 827                     352);
 828
 829     ff_mdct_init(&ac->mdct,       11, 1, output_scale_factor/1024.0);
 830     ff_mdct_init(&ac->mdct_small,  8, 1, output_scale_factor/128.0);
 831     ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0/output_scale_factor);
 832     // window initialization
 833     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 834     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 835     ff_init_ff_sine_windows(10);
 836     ff_init_ff_sine_windows( 7);
 837
 838     cbrt_tableinit();
 839
 840     avcodec_get_frame_defaults(&ac->frame);
 841     avctx->coded_frame = &ac->frame;
 842
 843     return 0;
 844 }
 845
 846 /**
 847  * Skip data_stream_element; reference: table 4.10.
 848  */
 849 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 850 {
 851     int byte_align = get_bits1(gb);
 852     int count = get_bits(gb, 8);
 853     if (count == 255)
 854         count += get_bits(gb, 8);
 855     if (byte_align)
 856         align_get_bits(gb);
 857
 858     if (get_bits_left(gb) < 8 * count) {
 859         av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 860         return -1;
 861     }
 862     skip_bits_long(gb, 8 * count);
 863     return 0;
 864 }
 865
 866 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
 867                              GetBitContext *gb)
 868 {
 869     int sfb;
 870     if (get_bits1(gb)) {
 871         ics->predictor_reset_group = get_bits(gb, 5);
 872         if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
 873             av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
 874             return -1;
 875         }
 876     }
 877     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
 878         ics->prediction_used[sfb] = get_bits1(gb);
 879     }
 880     return 0;
 881 }
 882
 883 /**
 884  * Decode Long Term Prediction data; reference: table 4.xx.
 885  */
 886 static void decode_ltp(AACContext *ac, LongTermPrediction *ltp,
 887                        GetBitContext *gb, uint8_t max_sfb)
 888 {
 889     int sfb;
 890
 891     ltp->lag  = get_bits(gb, 11);
 892     ltp->coef = ltp_coef[get_bits(gb, 3)];
 893     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
 894         ltp->used[sfb] = get_bits1(gb);
 895 }
 896
 897 /**
 898  * Decode Individual Channel Stream info; reference: table 4.6.
 899  */
 900 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
 901                            GetBitContext *gb)
 902 {
 903     if (get_bits1(gb)) {
 904         av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
 905         return AVERROR_INVALIDDATA;
 906     }
 907     ics->window_sequence[1] = ics->window_sequence[0];
 908     ics->window_sequence[0] = get_bits(gb, 2);
 909     ics->use_kb_window[1]   = ics->use_kb_window[0];
 910     ics->use_kb_window[0]   = get_bits1(gb);
 911     ics->num_window_groups  = 1;
 912     ics->group_len[0]       = 1;
 913     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 914         int i;
 915         ics->max_sfb = get_bits(gb, 4);
 916         for (i = 0; i < 7; i++) {
 917             if (get_bits1(gb)) {
 918                 ics->group_len[ics->num_window_groups - 1]++;
 919             } else {
 920                 ics->num_window_groups++;
 921                 ics->group_len[ics->num_window_groups - 1] = 1;
 922             }
 923         }
 924         ics->num_windows       = 8;
 925         ics->swb_offset        =    ff_swb_offset_128[ac->m4ac.sampling_index];
 926         ics->num_swb           =   ff_aac_num_swb_128[ac->m4ac.sampling_index];
 927         ics->tns_max_bands     = ff_tns_max_bands_128[ac->m4ac.sampling_index];
 928         ics->predictor_present = 0;
 929     } else {
 930         ics->max_sfb               = get_bits(gb, 6);
 931         ics->num_windows           = 1;
 932         ics->swb_offset            =    ff_swb_offset_1024[ac->m4ac.sampling_index];
 933         ics->num_swb               =   ff_aac_num_swb_1024[ac->m4ac.sampling_index];
 934         ics->tns_max_bands         = ff_tns_max_bands_1024[ac->m4ac.sampling_index];
 935         ics->predictor_present     = get_bits1(gb);
 936         ics->predictor_reset_group = 0;
 937         if (ics->predictor_present) {
 938             if (ac->m4ac.object_type == AOT_AAC_MAIN) {
 939                 if (decode_prediction(ac, ics, gb)) {
 940                     return AVERROR_INVALIDDATA;
 941                 }
 942             } else if (ac->m4ac.object_type == AOT_AAC_LC) {
 943                 av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
 944                 return AVERROR_INVALIDDATA;
 945             } else {
 946                 if ((ics->ltp.present = get_bits(gb, 1)))
 947                     decode_ltp(ac, &ics->ltp, gb, ics->max_sfb);
 948             }
 949         }
 950     }
 951
 952     if (ics->max_sfb > ics->num_swb) {
 953         av_log(ac->avctx, AV_LOG_ERROR,
 954                "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
 955                ics->max_sfb, ics->num_swb);
 956         return AVERROR_INVALIDDATA;
 957     }
 958
 959     return 0;
 960 }
 961
 962 /**
 963  * Decode band types (section_data payload); reference: table 4.46.
 964  *
 965  * @param   band_type           array of the used band type
 966  * @param   band_type_run_end   array of the last scalefactor band of a band type run
 967  *
 968  * @return  Returns error status. 0 - OK, !0 - error
 969  */
 970 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
 971                              int band_type_run_end[120], GetBitContext *gb,
 972                              IndividualChannelStream *ics)
 973 {
 974     int g, idx = 0;
 975     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
 976     for (g = 0; g < ics->num_window_groups; g++) {
 977         int k = 0;
 978         while (k < ics->max_sfb) {
 979             uint8_t sect_end = k;
 980             int sect_len_incr;
 981             int sect_band_type = get_bits(gb, 4);
 982             if (sect_band_type == 12) {
 983                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
 984                 return -1;
 985             }
 986             do {
 987                 sect_len_incr = get_bits(gb, bits);
 988                 sect_end += sect_len_incr;
 989                 if (get_bits_left(gb) < 0) {
 990                     av_log(ac->avctx, AV_LOG_ERROR, overread_err);
 991                     return -1;
 992                 }
 993                 if (sect_end > ics->max_sfb) {
 994                     av_log(ac->avctx, AV_LOG_ERROR,
 995                            "Number of bands (%d) exceeds limit (%d).\n",
 996                            sect_end, ics->max_sfb);
 997                     return -1;
 998                 }
 999             } while (sect_len_incr == (1 << bits) - 1);
1000             for (; k < sect_end; k++) {
1001                 band_type        [idx]   = sect_band_type;
1002                 band_type_run_end[idx++] = sect_end;
1003             }
1004         }
1005     }
1006     return 0;
1007 }
1008
1009 /**
1010  * Decode scalefactors; reference: table 4.47.
1011  *
1012  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
1013  * @param   band_type           array of the used band type
1014  * @param   band_type_run_end   array of the last scalefactor band of a band type run
1015  * @param   sf                  array of scalefactors or intensity stereo positions
1016  *
1017  * @return  Returns error status. 0 - OK, !0 - error
1018  */
1019 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
1020                                unsigned int global_gain,
1021                                IndividualChannelStream *ics,
1022                                enum BandType band_type[120],
1023                                int band_type_run_end[120])
1024 {
1025     int g, i, idx = 0;
1026     int offset[3] = { global_gain, global_gain - 90, 0 };
1027     int clipped_offset;
1028     int noise_flag = 1;
1029     for (g = 0; g < ics->num_window_groups; g++) {
1030         for (i = 0; i < ics->max_sfb;) {
1031             int run_end = band_type_run_end[idx];
1032             if (band_type[idx] == ZERO_BT) {
1033                 for (; i < run_end; i++, idx++)
1034                     sf[idx] = 0.;
1035             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
1036                 for (; i < run_end; i++, idx++) {
1037                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1038                     clipped_offset = av_clip(offset[2], -155, 100);
1039                     if (offset[2] != clipped_offset) {
1040                         av_log_ask_for_sample(ac->avctx, "Intensity stereo "
1041                                 "position clipped (%d -> %d).\nIf you heard an "
1042                                 "audible artifact, there may be a bug in the "
1043                                 "decoder. ", offset[2], clipped_offset);
1044                     }
1045                     sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1046                 }
1047             } else if (band_type[idx] == NOISE_BT) {
1048                 for (; i < run_end; i++, idx++) {
1049                     if (noise_flag-- > 0)
1050                         offset[1] += get_bits(gb, 9) - 256;
1051                     else
1052                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1053                     clipped_offset = av_clip(offset[1], -100, 155);
1054                     if (offset[1] != clipped_offset) {
1055                         av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
1056                                 "(%d -> %d).\nIf you heard an audible "
1057                                 "artifact, there may be a bug in the decoder. ",
1058                                 offset[1], clipped_offset);
1059                     }
1060                     sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1061                 }
1062             } else {
1063                 for (; i < run_end; i++, idx++) {
1064                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1065                     if (offset[0] > 255U) {
1066                         av_log(ac->avctx, AV_LOG_ERROR,
1067                                "Scalefactor (%d) out of range.\n", offset[0]);
1068                         return -1;
1069                     }
1070                     sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1071                 }
1072             }
1073         }
1074     }
1075     return 0;
1076 }
1077
1078 /**
1079  * Decode pulse data; reference: table 4.7.
1080  */
1081 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1082                          const uint16_t *swb_offset, int num_swb)
1083 {
1084     int i, pulse_swb;
1085     pulse->num_pulse = get_bits(gb, 2) + 1;
1086     pulse_swb        = get_bits(gb, 6);
1087     if (pulse_swb >= num_swb)
1088         return -1;
1089     pulse->pos[0]    = swb_offset[pulse_swb];
1090     pulse->pos[0]   += get_bits(gb, 5);
1091     if (pulse->pos[0] > 1023)
1092         return -1;
1093     pulse->amp[0]    = get_bits(gb, 4);
1094     for (i = 1; i < pulse->num_pulse; i++) {
1095         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1096         if (pulse->pos[i] > 1023)
1097             return -1;
1098         pulse->amp[i] = get_bits(gb, 4);
1099     }
1100     return 0;
1101 }
1102
1103 /**
1104  * Decode Temporal Noise Shaping data; reference: table 4.48.
1105  *
1106  * @return  Returns error status. 0 - OK, !0 - error
1107  */
1108 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
1109                       GetBitContext *gb, const IndividualChannelStream *ics)
1110 {
1111     int w, filt, i, coef_len, coef_res, coef_compress;
1112     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1113     const int tns_max_order = is8 ? 7 : ac->m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1114     for (w = 0; w < ics->num_windows; w++) {
1115         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1116             coef_res = get_bits1(gb);
1117
1118             for (filt = 0; filt < tns->n_filt[w]; filt++) {
1119                 int tmp2_idx;
1120                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1121
1122                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1123                     av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
1124                            tns->order[w][filt], tns_max_order);
1125                     tns->order[w][filt] = 0;
1126                     return -1;
1127                 }
1128                 if (tns->order[w][filt]) {
1129                     tns->direction[w][filt] = get_bits1(gb);
1130                     coef_compress = get_bits1(gb);
1131                     coef_len = coef_res + 3 - coef_compress;
1132                     tmp2_idx = 2 * coef_compress + coef_res;
1133
1134                     for (i = 0; i < tns->order[w][filt]; i++)
1135                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1136                 }
1137             }
1138         }
1139     }
1140     return 0;
1141 }
1142
1143 /**
1144  * Decode Mid/Side data; reference: table 4.54.
1145  *
1146  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1147  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1148  *                      [3] reserved for scalable AAC
1149  */
1150 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
1151                                    int ms_present)
1152 {
1153     int idx;
1154     if (ms_present == 1) {
1155         for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
1156             cpe->ms_mask[idx] = get_bits1(gb);
1157     } else if (ms_present == 2) {
1158         memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
1159     }
1160 }
1161
1162 #ifndef VMUL2
1163 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
1164                            const float *scale)
1165 {
1166     float s = *scale;
1167     *dst++ = v[idx    & 15] * s;
1168     *dst++ = v[idx>>4 & 15] * s;
1169     return dst;
1170 }
1171 #endif
1172
1173 #ifndef VMUL4
1174 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
1175                            const float *scale)
1176 {
1177     float s = *scale;
1178     *dst++ = v[idx    & 3] * s;
1179     *dst++ = v[idx>>2 & 3] * s;
1180     *dst++ = v[idx>>4 & 3] * s;
1181     *dst++ = v[idx>>6 & 3] * s;
1182     return dst;
1183 }
1184 #endif
1185
1186 #ifndef VMUL2S
1187 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1188                             unsigned sign, const float *scale)
1189 {
1190     union av_intfloat32 s0, s1;
1191
1192     s0.f = s1.f = *scale;
1193     s0.i ^= sign >> 1 << 31;
1194     s1.i ^= sign      << 31;
1195
1196     *dst++ = v[idx    & 15] * s0.f;
1197     *dst++ = v[idx>>4 & 15] * s1.f;
1198
1199     return dst;
1200 }
1201 #endif
1202
1203 #ifndef VMUL4S
1204 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1205                             unsigned sign, const float *scale)
1206 {
1207     unsigned nz = idx >> 12;
1208     union av_intfloat32 s = { .f = *scale };
1209     union av_intfloat32 t;
1210
1211     t.i = s.i ^ (sign & 1U<<31);
1212     *dst++ = v[idx    & 3] * t.f;
1213
1214     sign <<= nz & 1; nz >>= 1;
1215     t.i = s.i ^ (sign & 1U<<31);
1216     *dst++ = v[idx>>2 & 3] * t.f;
1217
1218     sign <<= nz & 1; nz >>= 1;
1219     t.i = s.i ^ (sign & 1U<<31);
1220     *dst++ = v[idx>>4 & 3] * t.f;
1221
1222     sign <<= nz & 1; nz >>= 1;
1223     t.i = s.i ^ (sign & 1U<<31);
1224     *dst++ = v[idx>>6 & 3] * t.f;
1225
1226     return dst;
1227 }
1228 #endif
1229
1230 /**
1231  * Decode spectral data; reference: table 4.50.
1232  * Dequantize and scale spectral data; reference: 4.6.3.3.
1233  *
1234  * @param   coef            array of dequantized, scaled spectral data
1235  * @param   sf              array of scalefactors or intensity stereo positions
1236  * @param   pulse_present   set if pulses are present
1237  * @param   pulse           pointer to pulse data struct
1238  * @param   band_type       array of the used band type
1239  *
1240  * @return  Returns error status. 0 - OK, !0 - error
1241  */
1242 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1243                                        GetBitContext *gb, const float sf[120],
1244                                        int pulse_present, const Pulse *pulse,
1245                                        const IndividualChannelStream *ics,
1246                                        enum BandType band_type[120])
1247 {
1248     int i, k, g, idx = 0;
1249     const int c = 1024 / ics->num_windows;
1250     const uint16_t *offsets = ics->swb_offset;
1251     float *coef_base = coef;
1252
1253     for (g = 0; g < ics->num_windows; g++)
1254         memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1255
1256     for (g = 0; g < ics->num_window_groups; g++) {
1257         unsigned g_len = ics->group_len[g];
1258
1259         for (i = 0; i < ics->max_sfb; i++, idx++) {
1260             const unsigned cbt_m1 = band_type[idx] - 1;
1261             float *cfo = coef + offsets[i];
1262             int off_len = offsets[i + 1] - offsets[i];
1263             int group;
1264
1265             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1266                 for (group = 0; group < g_len; group++, cfo+=128) {
1267                     memset(cfo, 0, off_len * sizeof(float));
1268                 }
1269             } else if (cbt_m1 == NOISE_BT - 1) {
1270                 for (group = 0; group < g_len; group++, cfo+=128) {
1271                     float scale;
1272                     float band_energy;
1273
1274                     for (k = 0; k < off_len; k++) {
1275                         ac->random_state  = lcg_random(ac->random_state);
1276                         cfo[k] = ac->random_state;
1277                     }
1278
1279                     band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1280                     scale = sf[idx] / sqrtf(band_energy);
1281                     ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1282                 }
1283             } else {
1284                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1285                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1286                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1287                 OPEN_READER(re, gb);
1288
1289                 switch (cbt_m1 >> 1) {
1290                 case 0:
1291                     for (group = 0; group < g_len; group++, cfo+=128) {
1292                         float *cf = cfo;
1293                         int len = off_len;
1294
1295                         do {
1296                             int code;
1297                             unsigned cb_idx;
1298
1299                             UPDATE_CACHE(re, gb);
1300                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1301                             cb_idx = cb_vector_idx[code];
1302                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1303                         } while (len -= 4);
1304                     }
1305                     break;
1306
1307                 case 1:
1308                     for (group = 0; group < g_len; group++, cfo+=128) {
1309                         float *cf = cfo;
1310                         int len = off_len;
1311
1312                         do {
1313                             int code;
1314                             unsigned nnz;
1315                             unsigned cb_idx;
1316                             uint32_t bits;
1317
1318                             UPDATE_CACHE(re, gb);
1319                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1320                             cb_idx = cb_vector_idx[code];
1321                             nnz = cb_idx >> 8 & 15;
1322                             bits = nnz ? GET_CACHE(re, gb) : 0;
1323                             LAST_SKIP_BITS(re, gb, nnz);
1324                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1325                         } while (len -= 4);
1326                     }
1327                     break;
1328
1329                 case 2:
1330                     for (group = 0; group < g_len; group++, cfo+=128) {
1331                         float *cf = cfo;
1332                         int len = off_len;
1333
1334                         do {
1335                             int code;
1336                             unsigned cb_idx;
1337
1338                             UPDATE_CACHE(re, gb);
1339                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1340                             cb_idx = cb_vector_idx[code];
1341                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1342                         } while (len -= 2);
1343                     }
1344                     break;
1345
1346                 case 3:
1347                 case 4:
1348                     for (group = 0; group < g_len; group++, cfo+=128) {
1349                         float *cf = cfo;
1350                         int len = off_len;
1351
1352                         do {
1353                             int code;
1354                             unsigned nnz;
1355                             unsigned cb_idx;
1356                             unsigned sign;
1357
1358                             UPDATE_CACHE(re, gb);
1359                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1360                             cb_idx = cb_vector_idx[code];
1361                             nnz = cb_idx >> 8 & 15;
1362                             sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1363                             LAST_SKIP_BITS(re, gb, nnz);
1364                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1365                         } while (len -= 2);
1366                     }
1367                     break;
1368
1369                 default:
1370                     for (group = 0; group < g_len; group++, cfo+=128) {
1371                         float *cf = cfo;
1372                         uint32_t *icf = (uint32_t *) cf;
1373                         int len = off_len;
1374
1375                         do {
1376                             int code;
1377                             unsigned nzt, nnz;
1378                             unsigned cb_idx;
1379                             uint32_t bits;
1380                             int j;
1381
1382                             UPDATE_CACHE(re, gb);
1383                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1384
1385                             if (!code) {
1386                                 *icf++ = 0;
1387                                 *icf++ = 0;
1388                                 continue;
1389                             }
1390
1391                             cb_idx = cb_vector_idx[code];
1392                             nnz = cb_idx >> 12;
1393                             nzt = cb_idx >> 8;
1394                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1395                             LAST_SKIP_BITS(re, gb, nnz);
1396
1397                             for (j = 0; j < 2; j++) {
1398                                 if (nzt & 1<<j) {
1399                                     uint32_t b;
1400                                     int n;
1401                                     /* The total length of escape_sequence must be < 22 bits according
1402                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1403                                     UPDATE_CACHE(re, gb);
1404                                     b = GET_CACHE(re, gb);
1405                                     b = 31 - av_log2(~b);
1406
1407                                     if (b > 8) {
1408                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1409                                         return -1;
1410                                     }
1411
1412                                     SKIP_BITS(re, gb, b + 1);
1413                                     b += 4;
1414                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1415                                     LAST_SKIP_BITS(re, gb, b);
1416                                     *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1417                                     bits <<= 1;
1418                                 } else {
1419                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1420                                     *icf++ = (bits & 1U<<31) | v;
1421                                     bits <<= !!v;
1422                                 }
1423                                 cb_idx >>= 4;
1424                             }
1425                         } while (len -= 2);
1426
1427                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1428                     }
1429                 }
1430
1431                 CLOSE_READER(re, gb);
1432             }
1433         }
1434         coef += g_len << 7;
1435     }
1436
1437     if (pulse_present) {
1438         idx = 0;
1439         for (i = 0; i < pulse->num_pulse; i++) {
1440             float co = coef_base[ pulse->pos[i] ];
1441             while (offsets[idx + 1] <= pulse->pos[i])
1442                 idx++;
1443             if (band_type[idx] != NOISE_BT && sf[idx]) {
1444                 float ico = -pulse->amp[i];
1445                 if (co) {
1446                     co /= sf[idx];
1447                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1448                 }
1449                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1450             }
1451         }
1452     }
1453     return 0;
1454 }
1455
1456 static av_always_inline float flt16_round(float pf)
1457 {
1458     union av_intfloat32 tmp;
1459     tmp.f = pf;
1460     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1461     return tmp.f;
1462 }
1463
1464 static av_always_inline float flt16_even(float pf)
1465 {
1466     union av_intfloat32 tmp;
1467     tmp.f = pf;
1468     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1469     return tmp.f;
1470 }
1471
1472 static av_always_inline float flt16_trunc(float pf)
1473 {
1474     union av_intfloat32 pun;
1475     pun.f = pf;
1476     pun.i &= 0xFFFF0000U;
1477     return pun.f;
1478 }
1479
1480 static av_always_inline void predict(PredictorState *ps, float *coef,
1481                                      int output_enable)
1482 {
1483     const float a     = 0.953125; // 61.0 / 64
1484     const float alpha = 0.90625;  // 29.0 / 32
1485     float e0, e1;
1486     float pv;
1487     float k1, k2;
1488     float   r0 = ps->r0,     r1 = ps->r1;
1489     float cor0 = ps->cor0, cor1 = ps->cor1;
1490     float var0 = ps->var0, var1 = ps->var1;
1491
1492     k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1493     k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1494
1495     pv = flt16_round(k1 * r0 + k2 * r1);
1496     if (output_enable)
1497         *coef += pv;
1498
1499     e0 = *coef;
1500     e1 = e0 - k1 * r0;
1501
1502     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1503     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1504     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1505     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1506
1507     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1508     ps->r0 = flt16_trunc(a * e0);
1509 }
1510
1511 /**
1512  * Apply AAC-Main style frequency domain prediction.
1513  */
1514 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
1515 {
1516     int sfb, k;
1517
1518     if (!sce->ics.predictor_initialized) {
1519         reset_all_predictors(sce->predictor_state);
1520         sce->ics.predictor_initialized = 1;
1521     }
1522
1523     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1524         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
1525             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1526                 predict(&sce->predictor_state[k], &sce->coeffs[k],
1527                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1528             }
1529         }
1530         if (sce->ics.predictor_reset_group)
1531             reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
1532     } else
1533         reset_all_predictors(sce->predictor_state);
1534 }
1535
1536 /**
1537  * Decode an individual_channel_stream payload; reference: table 4.44.
1538  *
1539  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
1540  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1541  *
1542  * @return  Returns error status. 0 - OK, !0 - error
1543  */
1544 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
1545                       GetBitContext *gb, int common_window, int scale_flag)
1546 {
1547     Pulse pulse;
1548     TemporalNoiseShaping    *tns = &sce->tns;
1549     IndividualChannelStream *ics = &sce->ics;
1550     float *out = sce->coeffs;
1551     int global_gain, pulse_present = 0;
1552
1553     /* This assignment is to silence a GCC warning about the variable being used
1554      * uninitialized when in fact it always is.
1555      */
1556     pulse.num_pulse = 0;
1557
1558     global_gain = get_bits(gb, 8);
1559
1560     if (!common_window && !scale_flag) {
1561         if (decode_ics_info(ac, ics, gb) < 0)
1562             return AVERROR_INVALIDDATA;
1563     }
1564
1565     if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1566         return -1;
1567     if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1568         return -1;
1569
1570     pulse_present = 0;
1571     if (!scale_flag) {
1572         if ((pulse_present = get_bits1(gb))) {
1573             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1574                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1575                 return -1;
1576             }
1577             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1578                 av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1579                 return -1;
1580             }
1581         }
1582         if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1583             return -1;
1584         if (get_bits1(gb)) {
1585             av_log_missing_feature(ac->avctx, "SSR", 1);
1586             return -1;
1587         }
1588     }
1589
1590     if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1591         return -1;
1592
1593     if (ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
1594         apply_prediction(ac, sce);
1595
1596     return 0;
1597 }
1598
1599 /**
1600  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1601  */
1602 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
1603 {
1604     const IndividualChannelStream *ics = &cpe->ch[0].ics;
1605     float *ch0 = cpe->ch[0].coeffs;
1606     float *ch1 = cpe->ch[1].coeffs;
1607     int g, i, group, idx = 0;
1608     const uint16_t *offsets = ics->swb_offset;
1609     for (g = 0; g < ics->num_window_groups; g++) {
1610         for (i = 0; i < ics->max_sfb; i++, idx++) {
1611             if (cpe->ms_mask[idx] &&
1612                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1613                 for (group = 0; group < ics->group_len[g]; group++) {
1614                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1615                                               ch1 + group * 128 + offsets[i],
1616                                               offsets[i+1] - offsets[i]);
1617                 }
1618             }
1619         }
1620         ch0 += ics->group_len[g] * 128;
1621         ch1 += ics->group_len[g] * 128;
1622     }
1623 }
1624
1625 /**
1626  * intensity stereo decoding; reference: 4.6.8.2.3
1627  *
1628  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1629  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1630  *                      [3] reserved for scalable AAC
1631  */
1632 static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
1633 {
1634     const IndividualChannelStream *ics = &cpe->ch[1].ics;
1635     SingleChannelElement         *sce1 = &cpe->ch[1];
1636     float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1637     const uint16_t *offsets = ics->swb_offset;
1638     int g, group, i, idx = 0;
1639     int c;
1640     float scale;
1641     for (g = 0; g < ics->num_window_groups; g++) {
1642         for (i = 0; i < ics->max_sfb;) {
1643             if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1644                 const int bt_run_end = sce1->band_type_run_end[idx];
1645                 for (; i < bt_run_end; i++, idx++) {
1646                     c = -1 + 2 * (sce1->band_type[idx] - 14);
1647                     if (ms_present)
1648                         c *= 1 - 2 * cpe->ms_mask[idx];
1649                     scale = c * sce1->sf[idx];
1650                     for (group = 0; group < ics->group_len[g]; group++)
1651                         ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1652                                                    coef0 + group * 128 + offsets[i],
1653                                                    scale,
1654                                                    offsets[i + 1] - offsets[i]);
1655                 }
1656             } else {
1657                 int bt_run_end = sce1->band_type_run_end[idx];
1658                 idx += bt_run_end - i;
1659                 i    = bt_run_end;
1660             }
1661         }
1662         coef0 += ics->group_len[g] * 128;
1663         coef1 += ics->group_len[g] * 128;
1664     }
1665 }
1666
1667 /**
1668  * Decode a channel_pair_element; reference: table 4.4.
1669  *
1670  * @return  Returns error status. 0 - OK, !0 - error
1671  */
1672 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
1673 {
1674     int i, ret, common_window, ms_present = 0;
1675
1676     common_window = get_bits1(gb);
1677     if (common_window) {
1678         if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
1679             return AVERROR_INVALIDDATA;
1680         i = cpe->ch[1].ics.use_kb_window[0];
1681         cpe->ch[1].ics = cpe->ch[0].ics;
1682         cpe->ch[1].ics.use_kb_window[1] = i;
1683         if (cpe->ch[1].ics.predictor_present && (ac->m4ac.object_type != AOT_AAC_MAIN))
1684             if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1685                 decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1686         ms_present = get_bits(gb, 2);
1687         if (ms_present == 3) {
1688             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1689             return -1;
1690         } else if (ms_present)
1691             decode_mid_side_stereo(cpe, gb, ms_present);
1692     }
1693     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1694         return ret;
1695     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1696         return ret;
1697
1698     if (common_window) {
1699         if (ms_present)
1700             apply_mid_side_stereo(ac, cpe);
1701         if (ac->m4ac.object_type == AOT_AAC_MAIN) {
1702             apply_prediction(ac, &cpe->ch[0]);
1703             apply_prediction(ac, &cpe->ch[1]);
1704         }
1705     }
1706
1707     apply_intensity_stereo(ac, cpe, ms_present);
1708     return 0;
1709 }
1710
1711 static const float cce_scale[] = {
1712     1.09050773266525765921, //2^(1/8)
1713     1.18920711500272106672, //2^(1/4)
1714     M_SQRT2,
1715     2,
1716 };
1717
1718 /**
1719  * Decode coupling_channel_element; reference: table 4.8.
1720  *
1721  * @return  Returns error status. 0 - OK, !0 - error
1722  */
1723 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
1724 {
1725     int num_gain = 0;
1726     int c, g, sfb, ret;
1727     int sign;
1728     float scale;
1729     SingleChannelElement *sce = &che->ch[0];
1730     ChannelCoupling     *coup = &che->coup;
1731
1732     coup->coupling_point = 2 * get_bits1(gb);
1733     coup->num_coupled = get_bits(gb, 3);
1734     for (c = 0; c <= coup->num_coupled; c++) {
1735         num_gain++;
1736         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1737         coup->id_select[c] = get_bits(gb, 4);
1738         if (coup->type[c] == TYPE_CPE) {
1739             coup->ch_select[c] = get_bits(gb, 2);
1740             if (coup->ch_select[c] == 3)
1741                 num_gain++;
1742         } else
1743             coup->ch_select[c] = 2;
1744     }
1745     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1746
1747     sign  = get_bits(gb, 1);
1748     scale = cce_scale[get_bits(gb, 2)];
1749
1750     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1751         return ret;
1752
1753     for (c = 0; c < num_gain; c++) {
1754         int idx  = 0;
1755         int cge  = 1;
1756         int gain = 0;
1757         float gain_cache = 1.;
1758         if (c) {
1759             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1760             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1761             gain_cache = powf(scale, -gain);
1762         }
1763         if (coup->coupling_point == AFTER_IMDCT) {
1764             coup->gain[c][0] = gain_cache;
1765         } else {
1766             for (g = 0; g < sce->ics.num_window_groups; g++) {
1767                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1768                     if (sce->band_type[idx] != ZERO_BT) {
1769                         if (!cge) {
1770                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1771                             if (t) {
1772                                 int s = 1;
1773                                 t = gain += t;
1774                                 if (sign) {
1775                                     s  -= 2 * (t & 0x1);
1776                                     t >>= 1;
1777                                 }
1778                                 gain_cache = powf(scale, -t) * s;
1779                             }
1780                         }
1781                         coup->gain[c][idx] = gain_cache;
1782                     }
1783                 }
1784             }
1785         }
1786     }
1787     return 0;
1788 }
1789
1790 /**
1791  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1792  *
1793  * @return  Returns number of bytes consumed.
1794  */
1795 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
1796                                          GetBitContext *gb)
1797 {
1798     int i;
1799     int num_excl_chan = 0;
1800
1801     do {
1802         for (i = 0; i < 7; i++)
1803             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1804     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1805
1806     return num_excl_chan / 7;
1807 }
1808
1809 /**
1810  * Decode dynamic range information; reference: table 4.52.
1811  *
1812  * @param   cnt length of TYPE_FIL syntactic element in bytes
1813  *
1814  * @return  Returns number of bytes consumed.
1815  */
1816 static int decode_dynamic_range(DynamicRangeControl *che_drc,
1817                                 GetBitContext *gb, int cnt)
1818 {
1819     int n             = 1;
1820     int drc_num_bands = 1;
1821     int i;
1822
1823     /* pce_tag_present? */
1824     if (get_bits1(gb)) {
1825         che_drc->pce_instance_tag  = get_bits(gb, 4);
1826         skip_bits(gb, 4); // tag_reserved_bits
1827         n++;
1828     }
1829
1830     /* excluded_chns_present? */
1831     if (get_bits1(gb)) {
1832         n += decode_drc_channel_exclusions(che_drc, gb);
1833     }
1834
1835     /* drc_bands_present? */
1836     if (get_bits1(gb)) {
1837         che_drc->band_incr            = get_bits(gb, 4);
1838         che_drc->interpolation_scheme = get_bits(gb, 4);
1839         n++;
1840         drc_num_bands += che_drc->band_incr;
1841         for (i = 0; i < drc_num_bands; i++) {
1842             che_drc->band_top[i] = get_bits(gb, 8);
1843             n++;
1844         }
1845     }
1846
1847     /* prog_ref_level_present? */
1848     if (get_bits1(gb)) {
1849         che_drc->prog_ref_level = get_bits(gb, 7);
1850         skip_bits1(gb); // prog_ref_level_reserved_bits
1851         n++;
1852     }
1853
1854     for (i = 0; i < drc_num_bands; i++) {
1855         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1856         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1857         n++;
1858     }
1859
1860     return n;
1861 }
1862
1863 /**
1864  * Decode extension data (incomplete); reference: table 4.51.
1865  *
1866  * @param   cnt length of TYPE_FIL syntactic element in bytes
1867  *
1868  * @return Returns number of bytes consumed
1869  */
1870 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
1871                                     ChannelElement *che, enum RawDataBlockType elem_type)
1872 {
1873     int crc_flag = 0;
1874     int res = cnt;
1875     switch (get_bits(gb, 4)) { // extension type
1876     case EXT_SBR_DATA_CRC:
1877         crc_flag++;
1878     case EXT_SBR_DATA:
1879         if (!che) {
1880             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
1881             return res;
1882         } else if (!ac->m4ac.sbr) {
1883             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
1884             skip_bits_long(gb, 8 * cnt - 4);
1885             return res;
1886         } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
1887             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
1888             skip_bits_long(gb, 8 * cnt - 4);
1889             return res;
1890         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
1891             ac->m4ac.sbr = 1;
1892             ac->m4ac.ps = 1;
1893             output_configure(ac, ac->layout_map, ac->layout_map_tags,
1894                              ac->m4ac.chan_config, ac->output_configured);
1895         } else {
1896             ac->m4ac.sbr = 1;
1897         }
1898         res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
1899         break;
1900     case EXT_DYNAMIC_RANGE:
1901         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
1902         break;
1903     case EXT_FILL:
1904     case EXT_FILL_DATA:
1905     case EXT_DATA_ELEMENT:
1906     default:
1907         skip_bits_long(gb, 8 * cnt - 4);
1908         break;
1909     };
1910     return res;
1911 }
1912
1913 /**
1914  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
1915  *
1916  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
1917  * @param   coef    spectral coefficients
1918  */
1919 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
1920                       IndividualChannelStream *ics, int decode)
1921 {
1922     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
1923     int w, filt, m, i;
1924     int bottom, top, order, start, end, size, inc;
1925     float lpc[TNS_MAX_ORDER];
1926     float tmp[TNS_MAX_ORDER];
1927
1928     for (w = 0; w < ics->num_windows; w++) {
1929         bottom = ics->num_swb;
1930         for (filt = 0; filt < tns->n_filt[w]; filt++) {
1931             top    = bottom;
1932             bottom = FFMAX(0, top - tns->length[w][filt]);
1933             order  = tns->order[w][filt];
1934             if (order == 0)
1935                 continue;
1936
1937             // tns_decode_coef
1938             compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
1939
1940             start = ics->swb_offset[FFMIN(bottom, mmm)];
1941             end   = ics->swb_offset[FFMIN(   top, mmm)];
1942             if ((size = end - start) <= 0)
1943                 continue;
1944             if (tns->direction[w][filt]) {
1945                 inc = -1;
1946                 start = end - 1;
1947             } else {
1948                 inc = 1;
1949             }
1950             start += w * 128;
1951
1952             if (decode) {
1953                 // ar filter
1954                 for (m = 0; m < size; m++, start += inc)
1955                     for (i = 1; i <= FFMIN(m, order); i++)
1956                         coef[start] -= coef[start - i * inc] * lpc[i - 1];
1957             } else {
1958                 // ma filter
1959                 for (m = 0; m < size; m++, start += inc) {
1960                     tmp[0] = coef[start];
1961                     for (i = 1; i <= FFMIN(m, order); i++)
1962                         coef[start] += tmp[i] * lpc[i - 1];
1963                     for (i = order; i > 0; i--)
1964                         tmp[i] = tmp[i - 1];
1965                 }
1966             }
1967         }
1968     }
1969 }
1970
1971 /**
1972  *  Apply windowing and MDCT to obtain the spectral
1973  *  coefficient from the predicted sample by LTP.
1974  */
1975 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
1976                                    float *in, IndividualChannelStream *ics)
1977 {
1978     const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1979     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1980     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1981     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1982
1983     if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
1984         ac->dsp.vector_fmul(in, in, lwindow_prev, 1024);
1985     } else {
1986         memset(in, 0, 448 * sizeof(float));
1987         ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
1988     }
1989     if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
1990         ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
1991     } else {
1992         ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
1993         memset(in + 1024 + 576, 0, 448 * sizeof(float));
1994     }
1995     ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
1996 }
1997
1998 /**
1999  * Apply the long term prediction
2000  */
2001 static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
2002 {
2003     const LongTermPrediction *ltp = &sce->ics.ltp;
2004     const uint16_t *offsets = sce->ics.swb_offset;
2005     int i, sfb;
2006
2007     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2008         float *predTime = sce->ret;
2009         float *predFreq = ac->buf_mdct;
2010         int16_t num_samples = 2048;
2011
2012         if (ltp->lag < 1024)
2013             num_samples = ltp->lag + 1024;
2014         for (i = 0; i < num_samples; i++)
2015             predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
2016         memset(&predTime[i], 0, (2048 - i) * sizeof(float));
2017
2018         windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2019
2020         if (sce->tns.present)
2021             apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2022
2023         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2024             if (ltp->used[sfb])
2025                 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2026                     sce->coeffs[i] += predFreq[i];
2027     }
2028 }
2029
2030 /**
2031  * Update the LTP buffer for next frame
2032  */
2033 static void update_ltp(AACContext *ac, SingleChannelElement *sce)
2034 {
2035     IndividualChannelStream *ics = &sce->ics;
2036     float *saved     = sce->saved;
2037     float *saved_ltp = sce->coeffs;
2038     const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2039     const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2040     int i;
2041
2042     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2043         memcpy(saved_ltp,       saved, 512 * sizeof(float));
2044         memset(saved_ltp + 576, 0,     448 * sizeof(float));
2045         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2046         for (i = 0; i < 64; i++)
2047             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2048     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2049         memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float));
2050         memset(saved_ltp + 576, 0,                  448 * sizeof(float));
2051         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2052         for (i = 0; i < 64; i++)
2053             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2054     } else { // LONG_STOP or ONLY_LONG
2055         ac->dsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
2056         for (i = 0; i < 512; i++)
2057             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
2058     }
2059
2060     memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2061     memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
2062     memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
2063 }
2064
2065 /**
2066  * Conduct IMDCT and windowing.
2067  */
2068 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
2069 {
2070     IndividualChannelStream *ics = &sce->ics;
2071     float *in    = sce->coeffs;
2072     float *out   = sce->ret;
2073     float *saved = sce->saved;
2074     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2075     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2076     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2077     float *buf  = ac->buf_mdct;
2078     float *temp = ac->temp;
2079     int i;
2080
2081     // imdct
2082     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2083         for (i = 0; i < 1024; i += 128)
2084             ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2085     } else
2086         ac->mdct.imdct_half(&ac->mdct, buf, in);
2087
2088     /* window overlapping
2089      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2090      * and long to short transitions are considered to be short to short
2091      * transitions. This leaves just two cases (long to long and short to short)
2092      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2093      */
2094     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2095             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
2096         ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
2097     } else {
2098         memcpy(                        out,               saved,            448 * sizeof(float));
2099
2100         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2101             ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
2102             ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
2103             ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
2104             ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
2105             ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
2106             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
2107         } else {
2108             ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
2109             memcpy(                    out + 576,         buf + 64,         448 * sizeof(float));
2110         }
2111     }
2112
2113     // buffer update
2114     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2115         memcpy(                    saved,       temp + 64,         64 * sizeof(float));
2116         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
2117         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2118         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2119         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
2120     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2121         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
2122         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
2123     } else { // LONG_STOP or ONLY_LONG
2124         memcpy(                    saved,       buf + 512,        512 * sizeof(float));
2125     }
2126 }
2127
2128 /**
2129  * Apply dependent channel coupling (applied before IMDCT).
2130  *
2131  * @param   index   index into coupling gain array
2132  */
2133 static void apply_dependent_coupling(AACContext *ac,
2134                                      SingleChannelElement *target,
2135                                      ChannelElement *cce, int index)
2136 {
2137     IndividualChannelStream *ics = &cce->ch[0].ics;
2138     const uint16_t *offsets = ics->swb_offset;
2139     float *dest = target->coeffs;
2140     const float *src = cce->ch[0].coeffs;
2141     int g, i, group, k, idx = 0;
2142     if (ac->m4ac.object_type == AOT_AAC_LTP) {
2143         av_log(ac->avctx, AV_LOG_ERROR,
2144                "Dependent coupling is not supported together with LTP\n");
2145         return;
2146     }
2147     for (g = 0; g < ics->num_window_groups; g++) {
2148         for (i = 0; i < ics->max_sfb; i++, idx++) {
2149             if (cce->ch[0].band_type[idx] != ZERO_BT) {
2150                 const float gain = cce->coup.gain[index][idx];
2151                 for (group = 0; group < ics->group_len[g]; group++) {
2152                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
2153                         // XXX dsputil-ize
2154                         dest[group * 128 + k] += gain * src[group * 128 + k];
2155                     }
2156                 }
2157             }
2158         }
2159         dest += ics->group_len[g] * 128;
2160         src  += ics->group_len[g] * 128;
2161     }
2162 }
2163
2164 /**
2165  * Apply independent channel coupling (applied after IMDCT).
2166  *
2167  * @param   index   index into coupling gain array
2168  */
2169 static void apply_independent_coupling(AACContext *ac,
2170                                        SingleChannelElement *target,
2171                                        ChannelElement *cce, int index)
2172 {
2173     int i;
2174     const float gain = cce->coup.gain[index][0];
2175     const float *src = cce->ch[0].ret;
2176     float *dest = target->ret;
2177     const int len = 1024 << (ac->m4ac.sbr == 1);
2178
2179     for (i = 0; i < len; i++)
2180         dest[i] += gain * src[i];
2181 }
2182
2183 /**
2184  * channel coupling transformation interface
2185  *
2186  * @param   apply_coupling_method   pointer to (in)dependent coupling function
2187  */
2188 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
2189                                    enum RawDataBlockType type, int elem_id,
2190                                    enum CouplingPoint coupling_point,
2191                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2192 {
2193     int i, c;
2194
2195     for (i = 0; i < MAX_ELEM_ID; i++) {
2196         ChannelElement *cce = ac->che[TYPE_CCE][i];
2197         int index = 0;
2198
2199         if (cce && cce->coup.coupling_point == coupling_point) {
2200             ChannelCoupling *coup = &cce->coup;
2201
2202             for (c = 0; c <= coup->num_coupled; c++) {
2203                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2204                     if (coup->ch_select[c] != 1) {
2205                         apply_coupling_method(ac, &cc->ch[0], cce, index);
2206                         if (coup->ch_select[c] != 0)
2207                             index++;
2208                     }
2209                     if (coup->ch_select[c] != 2)
2210                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
2211                 } else
2212                     index += 1 + (coup->ch_select[c] == 3);
2213             }
2214         }
2215     }
2216 }
2217
2218 /**
2219  * Convert spectral data to float samples, applying all supported tools as appropriate.
2220  */
2221 static void spectral_to_sample(AACContext *ac)
2222 {
2223     int i, type;
2224     for (type = 3; type >= 0; type--) {
2225         for (i = 0; i < MAX_ELEM_ID; i++) {
2226             ChannelElement *che = ac->che[type][i];
2227             if (che) {
2228                 if (type <= TYPE_CPE)
2229                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
2230                 if (ac->m4ac.object_type == AOT_AAC_LTP) {
2231                     if (che->ch[0].ics.predictor_present) {
2232                         if (che->ch[0].ics.ltp.present)
2233                             apply_ltp(ac, &che->ch[0]);
2234                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2235                             apply_ltp(ac, &che->ch[1]);
2236                     }
2237                 }
2238                 if (che->ch[0].tns.present)
2239                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2240                 if (che->ch[1].tns.present)
2241                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2242                 if (type <= TYPE_CPE)
2243                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
2244                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2245                     imdct_and_windowing(ac, &che->ch[0]);
2246                     if (ac->m4ac.object_type == AOT_AAC_LTP)
2247                         update_ltp(ac, &che->ch[0]);
2248                     if (type == TYPE_CPE) {
2249                         imdct_and_windowing(ac, &che->ch[1]);
2250                         if (ac->m4ac.object_type == AOT_AAC_LTP)
2251                             update_ltp(ac, &che->ch[1]);
2252                     }
2253                     if (ac->m4ac.sbr > 0) {
2254                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2255                     }
2256                 }
2257                 if (type <= TYPE_CCE)
2258                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
2259             }
2260         }
2261     }
2262 }
2263
2264 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
2265 {
2266     int size;
2267     AACADTSHeaderInfo hdr_info;
2268     uint8_t layout_map[MAX_ELEM_ID*4][3];
2269     int layout_map_tags;
2270
2271     size = avpriv_aac_parse_header(gb, &hdr_info);
2272     if (size > 0) {
2273         if (hdr_info.chan_config) {
2274             ac->m4ac.chan_config = hdr_info.chan_config;
2275             if (set_default_channel_config(ac->avctx, layout_map,
2276                     &layout_map_tags, hdr_info.chan_config))
2277                 return -7;
2278             if (output_configure(ac, layout_map, layout_map_tags,
2279                                  hdr_info.chan_config,
2280                                  FFMAX(ac->output_configured, OC_TRIAL_FRAME)))
2281                 return -7;
2282         } else if (ac->output_configured != OC_LOCKED) {
2283             ac->m4ac.chan_config = 0;
2284             ac->output_configured = OC_NONE;
2285         }
2286         if (ac->output_configured != OC_LOCKED) {
2287             ac->m4ac.sbr = -1;
2288             ac->m4ac.ps  = -1;
2289             ac->m4ac.sample_rate     = hdr_info.sample_rate;
2290             ac->m4ac.sampling_index  = hdr_info.sampling_index;
2291             ac->m4ac.object_type     = hdr_info.object_type;
2292         }
2293         if (!ac->avctx->sample_rate)
2294             ac->avctx->sample_rate = hdr_info.sample_rate;
2295         if (hdr_info.num_aac_frames == 1) {
2296             if (!hdr_info.crc_absent)
2297                 skip_bits(gb, 16);
2298         } else {
2299             av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0);
2300             return -1;
2301         }
2302     }
2303     return size;
2304 }
2305
2306 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2307                                 int *got_frame_ptr, GetBitContext *gb)
2308 {
2309     AACContext *ac = avctx->priv_data;
2310     ChannelElement *che = NULL, *che_prev = NULL;
2311     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2312     int err, elem_id;
2313     int samples = 0, multiplier, audio_found = 0;
2314
2315     if (show_bits(gb, 12) == 0xfff) {
2316         if (parse_adts_frame_header(ac, gb) < 0) {
2317             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2318             return -1;
2319         }
2320         if (ac->m4ac.sampling_index > 12) {
2321             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
2322             return -1;
2323         }
2324     }
2325
2326     ac->tags_mapped = 0;
2327     // parse
2328     while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2329         elem_id = get_bits(gb, 4);
2330
2331         if (elem_type < TYPE_DSE) {
2332             if (!(che=get_che(ac, elem_type, elem_id))) {
2333                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2334                        elem_type, elem_id);
2335                 return -1;
2336             }
2337             samples = 1024;
2338         }
2339
2340         switch (elem_type) {
2341
2342         case TYPE_SCE:
2343             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2344             audio_found = 1;
2345             break;
2346
2347         case TYPE_CPE:
2348             err = decode_cpe(ac, gb, che);
2349             audio_found = 1;
2350             break;
2351
2352         case TYPE_CCE:
2353             err = decode_cce(ac, gb, che);
2354             break;
2355
2356         case TYPE_LFE:
2357             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2358             audio_found = 1;
2359             break;
2360
2361         case TYPE_DSE:
2362             err = skip_data_stream_element(ac, gb);
2363             break;
2364
2365         case TYPE_PCE: {
2366             uint8_t layout_map[MAX_ELEM_ID*4][3];
2367             int tags;
2368             tags = decode_pce(avctx, &ac->m4ac, layout_map, gb);
2369             if (tags < 0) {
2370                 err = tags;
2371                 break;
2372             }
2373             if (ac->output_configured > OC_TRIAL_PCE)
2374                 av_log(avctx, AV_LOG_ERROR,
2375                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2376             else
2377                 err = output_configure(ac, layout_map, tags, 0, OC_TRIAL_PCE);
2378             break;
2379         }
2380
2381         case TYPE_FIL:
2382             if (elem_id == 15)
2383                 elem_id += get_bits(gb, 8) - 1;
2384             if (get_bits_left(gb) < 8 * elem_id) {
2385                     av_log(avctx, AV_LOG_ERROR, overread_err);
2386                     return -1;
2387             }
2388             while (elem_id > 0)
2389                 elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2390             err = 0; /* FIXME */
2391             break;
2392
2393         default:
2394             err = -1; /* should not happen, but keeps compiler happy */
2395             break;
2396         }
2397
2398         che_prev       = che;
2399         elem_type_prev = elem_type;
2400
2401         if (err)
2402             return err;
2403
2404         if (get_bits_left(gb) < 3) {
2405             av_log(avctx, AV_LOG_ERROR, overread_err);
2406             return -1;
2407         }
2408     }
2409
2410     spectral_to_sample(ac);
2411
2412     multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
2413     samples <<= multiplier;
2414     if (ac->output_configured < OC_LOCKED) {
2415         avctx->sample_rate = ac->m4ac.sample_rate << multiplier;
2416         avctx->frame_size = samples;
2417     }
2418
2419     if (samples) {
2420         /* get output buffer */
2421         ac->frame.nb_samples = samples;
2422         if ((err = avctx->get_buffer(avctx, &ac->frame)) < 0) {
2423             av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2424             return err;
2425         }
2426
2427         if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
2428             ac->fmt_conv.float_interleave((float *)ac->frame.data[0],
2429                                           (const float **)ac->output_data,
2430                                           samples, avctx->channels);
2431         else
2432             ac->fmt_conv.float_to_int16_interleave((int16_t *)ac->frame.data[0],
2433                                                    (const float **)ac->output_data,
2434                                                    samples, avctx->channels);
2435
2436         *(AVFrame *)data = ac->frame;
2437     }
2438     *got_frame_ptr = !!samples;
2439
2440     if (ac->output_configured && audio_found)
2441         ac->output_configured = OC_LOCKED;
2442
2443     return 0;
2444 }
2445
2446 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2447                             int *got_frame_ptr, AVPacket *avpkt)
2448 {
2449     AACContext *ac = avctx->priv_data;
2450     const uint8_t *buf = avpkt->data;
2451     int buf_size = avpkt->size;
2452     GetBitContext gb;
2453     int buf_consumed;
2454     int buf_offset;
2455     int err;
2456     int new_extradata_size;
2457     const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
2458                                        AV_PKT_DATA_NEW_EXTRADATA,
2459                                        &new_extradata_size);
2460
2461     if (new_extradata) {
2462         av_free(avctx->extradata);
2463         avctx->extradata = av_mallocz(new_extradata_size +
2464                                       FF_INPUT_BUFFER_PADDING_SIZE);
2465         if (!avctx->extradata)
2466             return AVERROR(ENOMEM);
2467         avctx->extradata_size = new_extradata_size;
2468         memcpy(avctx->extradata, new_extradata, new_extradata_size);
2469         if (decode_audio_specific_config(ac, ac->avctx, &ac->m4ac,
2470                                          avctx->extradata,
2471                                          avctx->extradata_size*8, 1) < 0)
2472             return AVERROR_INVALIDDATA;
2473     }
2474
2475     init_get_bits(&gb, buf, buf_size * 8);
2476
2477     if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb)) < 0)
2478         return err;
2479
2480     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2481     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2482         if (buf[buf_offset])
2483             break;
2484
2485     return buf_size > buf_offset ? buf_consumed : buf_size;
2486 }
2487
2488 static av_cold int aac_decode_close(AVCodecContext *avctx)
2489 {
2490     AACContext *ac = avctx->priv_data;
2491     int i, type;
2492
2493     for (i = 0; i < MAX_ELEM_ID; i++) {
2494         for (type = 0; type < 4; type++) {
2495             if (ac->che[type][i])
2496                 ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2497             av_freep(&ac->che[type][i]);
2498         }
2499     }
2500
2501     ff_mdct_end(&ac->mdct);
2502     ff_mdct_end(&ac->mdct_small);
2503     ff_mdct_end(&ac->mdct_ltp);
2504     return 0;
2505 }
2506
2507
2508 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
2509
2510 struct LATMContext {
2511     AACContext      aac_ctx;             ///< containing AACContext
2512     int             initialized;         ///< initilized after a valid extradata was seen
2513
2514     // parser data
2515     int             audio_mux_version_A; ///< LATM syntax version
2516     int             frame_length_type;   ///< 0/1 variable/fixed frame length
2517     int             frame_length;        ///< frame length for fixed frame length
2518 };
2519
2520 static inline uint32_t latm_get_value(GetBitContext *b)
2521 {
2522     int length = get_bits(b, 2);
2523
2524     return get_bits_long(b, (length+1)*8);
2525 }
2526
2527 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
2528                                              GetBitContext *gb, int asclen)
2529 {
2530     AACContext *ac        = &latmctx->aac_ctx;
2531     AVCodecContext *avctx = ac->avctx;
2532     MPEG4AudioConfig m4ac = {0};
2533     int config_start_bit  = get_bits_count(gb);
2534     int sync_extension    = 0;
2535     int bits_consumed, esize;
2536
2537     if (asclen) {
2538         sync_extension = 1;
2539         asclen         = FFMIN(asclen, get_bits_left(gb));
2540     } else
2541         asclen         = get_bits_left(gb);
2542
2543     if (config_start_bit % 8) {
2544         av_log_missing_feature(latmctx->aac_ctx.avctx, "audio specific "
2545                                "config not byte aligned.\n", 1);
2546         return AVERROR_INVALIDDATA;
2547     }
2548     if (asclen <= 0)
2549         return AVERROR_INVALIDDATA;
2550     bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
2551                                          gb->buffer + (config_start_bit / 8),
2552                                          asclen, sync_extension);
2553
2554     if (bits_consumed < 0)
2555         return AVERROR_INVALIDDATA;
2556
2557     if (ac->m4ac.sample_rate != m4ac.sample_rate ||
2558         ac->m4ac.chan_config != m4ac.chan_config) {
2559
2560         av_log(avctx, AV_LOG_INFO, "audio config changed\n");
2561         latmctx->initialized = 0;
2562
2563         esize = (bits_consumed+7) / 8;
2564
2565         if (avctx->extradata_size < esize) {
2566             av_free(avctx->extradata);
2567             avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
2568             if (!avctx->extradata)
2569                 return AVERROR(ENOMEM);
2570         }
2571
2572         avctx->extradata_size = esize;
2573         memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2574         memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2575     }
2576     skip_bits_long(gb, bits_consumed);
2577
2578     return bits_consumed;
2579 }
2580
2581 static int read_stream_mux_config(struct LATMContext *latmctx,
2582                                   GetBitContext *gb)
2583 {
2584     int ret, audio_mux_version = get_bits(gb, 1);
2585
2586     latmctx->audio_mux_version_A = 0;
2587     if (audio_mux_version)
2588         latmctx->audio_mux_version_A = get_bits(gb, 1);
2589
2590     if (!latmctx->audio_mux_version_A) {
2591
2592         if (audio_mux_version)
2593             latm_get_value(gb);                 // taraFullness
2594
2595         skip_bits(gb, 1);                       // allStreamSameTimeFraming
2596         skip_bits(gb, 6);                       // numSubFrames
2597         // numPrograms
2598         if (get_bits(gb, 4)) {                  // numPrograms
2599             av_log_missing_feature(latmctx->aac_ctx.avctx,
2600                                    "multiple programs are not supported\n", 1);
2601             return AVERROR_PATCHWELCOME;
2602         }
2603
2604         // for each program (which there is only on in DVB)
2605
2606         // for each layer (which there is only on in DVB)
2607         if (get_bits(gb, 3)) {                   // numLayer
2608             av_log_missing_feature(latmctx->aac_ctx.avctx,
2609                                    "multiple layers are not supported\n", 1);
2610             return AVERROR_PATCHWELCOME;
2611         }
2612
2613         // for all but first stream: use_same_config = get_bits(gb, 1);
2614         if (!audio_mux_version) {
2615             if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
2616                 return ret;
2617         } else {
2618             int ascLen = latm_get_value(gb);
2619             if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
2620                 return ret;
2621             ascLen -= ret;
2622             skip_bits_long(gb, ascLen);
2623         }
2624
2625         latmctx->frame_length_type = get_bits(gb, 3);
2626         switch (latmctx->frame_length_type) {
2627         case 0:
2628             skip_bits(gb, 8);       // latmBufferFullness
2629             break;
2630         case 1:
2631             latmctx->frame_length = get_bits(gb, 9);
2632             break;
2633         case 3:
2634         case 4:
2635         case 5:
2636             skip_bits(gb, 6);       // CELP frame length table index
2637             break;
2638         case 6:
2639         case 7:
2640             skip_bits(gb, 1);       // HVXC frame length table index
2641             break;
2642         }
2643
2644         if (get_bits(gb, 1)) {                  // other data
2645             if (audio_mux_version) {
2646                 latm_get_value(gb);             // other_data_bits
2647             } else {
2648                 int esc;
2649                 do {
2650                     esc = get_bits(gb, 1);
2651                     skip_bits(gb, 8);
2652                 } while (esc);
2653             }
2654         }
2655
2656         if (get_bits(gb, 1))                     // crc present
2657             skip_bits(gb, 8);                    // config_crc
2658     }
2659
2660     return 0;
2661 }
2662
2663 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
2664 {
2665     uint8_t tmp;
2666
2667     if (ctx->frame_length_type == 0) {
2668         int mux_slot_length = 0;
2669         do {
2670             tmp = get_bits(gb, 8);
2671             mux_slot_length += tmp;
2672         } while (tmp == 255);
2673         return mux_slot_length;
2674     } else if (ctx->frame_length_type == 1) {
2675         return ctx->frame_length;
2676     } else if (ctx->frame_length_type == 3 ||
2677                ctx->frame_length_type == 5 ||
2678                ctx->frame_length_type == 7) {
2679         skip_bits(gb, 2);          // mux_slot_length_coded
2680     }
2681     return 0;
2682 }
2683
2684 static int read_audio_mux_element(struct LATMContext *latmctx,
2685                                   GetBitContext *gb)
2686 {
2687     int err;
2688     uint8_t use_same_mux = get_bits(gb, 1);
2689     if (!use_same_mux) {
2690         if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2691             return err;
2692     } else if (!latmctx->aac_ctx.avctx->extradata) {
2693         av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2694                "no decoder config found\n");
2695         return AVERROR(EAGAIN);
2696     }
2697     if (latmctx->audio_mux_version_A == 0) {
2698         int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2699         if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2700             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2701             return AVERROR_INVALIDDATA;
2702         } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
2703             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2704                    "frame length mismatch %d << %d\n",
2705                    mux_slot_length_bytes * 8, get_bits_left(gb));
2706             return AVERROR_INVALIDDATA;
2707         }
2708     }
2709     return 0;
2710 }
2711
2712
2713 static int latm_decode_frame(AVCodecContext *avctx, void *out,
2714                              int *got_frame_ptr, AVPacket *avpkt)
2715 {
2716     struct LATMContext *latmctx = avctx->priv_data;
2717     int                 muxlength, err;
2718     GetBitContext       gb;
2719
2720     init_get_bits(&gb, avpkt->data, avpkt->size * 8);
2721
2722     // check for LOAS sync word
2723     if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
2724         return AVERROR_INVALIDDATA;
2725
2726     muxlength = get_bits(&gb, 13) + 3;
2727     // not enough data, the parser should have sorted this
2728     if (muxlength > avpkt->size)
2729         return AVERROR_INVALIDDATA;
2730
2731     if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
2732         return err;
2733
2734     if (!latmctx->initialized) {
2735         if (!avctx->extradata) {
2736             *got_frame_ptr = 0;
2737             return avpkt->size;
2738         } else {
2739             if ((err = decode_audio_specific_config(
2740                     &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.m4ac,
2741                     avctx->extradata, avctx->extradata_size*8, 1)) < 0)
2742                 return err;
2743             latmctx->initialized = 1;
2744         }
2745     }
2746
2747     if (show_bits(&gb, 12) == 0xfff) {
2748         av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2749                "ADTS header detected, probably as result of configuration "
2750                "misparsing\n");
2751         return AVERROR_INVALIDDATA;
2752     }
2753
2754     if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb)) < 0)
2755         return err;
2756
2757     return muxlength;
2758 }
2759
2760 av_cold static int latm_decode_init(AVCodecContext *avctx)
2761 {
2762     struct LATMContext *latmctx = avctx->priv_data;
2763     int ret = aac_decode_init(avctx);
2764
2765     if (avctx->extradata_size > 0)
2766         latmctx->initialized = !ret;
2767
2768     return ret;
2769 }
2770
2771
2772 AVCodec ff_aac_decoder = {
2773     .name           = "aac",
2774     .type           = AVMEDIA_TYPE_AUDIO,
2775     .id             = CODEC_ID_AAC,
2776     .priv_data_size = sizeof(AACContext),
2777     .init           = aac_decode_init,
2778     .close          = aac_decode_close,
2779     .decode         = aac_decode_frame,
2780     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
2781     .sample_fmts = (const enum AVSampleFormat[]) {
2782         AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
2783     },
2784     .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
2785     .channel_layouts = aac_channel_layout,
2786 };
2787
2788 /*
2789     Note: This decoder filter is intended to decode LATM streams transferred
2790     in MPEG transport streams which only contain one program.
2791     To do a more complex LATM demuxing a separate LATM demuxer should be used.
2792 */
2793 AVCodec ff_aac_latm_decoder = {
2794     .name = "aac_latm",
2795     .type = AVMEDIA_TYPE_AUDIO,
2796     .id   = CODEC_ID_AAC_LATM,
2797     .priv_data_size = sizeof(struct LATMContext),
2798     .init   = latm_decode_init,
2799     .close  = aac_decode_close,
2800     .decode = latm_decode_frame,
2801     .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
2802     .sample_fmts = (const enum AVSampleFormat[]) {
2803         AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
2804     },
2805     .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
2806     .channel_layouts = aac_channel_layout,
2807 };