psymodel: extend API to include PE and bit allocation.

[ffmpeg] / libavcodec / aacdec.c
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c

index 7c23bdf0cb4bab546a78966d6a1879cd2a4c34c7..3ce0dce4914d549e61586e399e663877d19d8446 100644 (file)
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -3,20 +3,24 @@
   * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   *
- * This file is part of FFmpeg.
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-ffmpeg@jannau.net>
   *
- * FFmpeg is free software; you can redistribute it and/or
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
@@ -38,7 +42,7 @@
   * Y                    filterbank - standard
   * N (code in SoC repo) filterbank - Scalable Sample Rate
   * Y                    Temporal Noise Shaping
- * N (code in SoC repo) Long Term Prediction
+ * Y                    Long Term Prediction
   * Y                    intensity stereo
   * Y                    channel coupling
   * Y                    frequency domain prediction
@@ -81,7 +85,10 @@
  #include "get_bits.h"
  #include "dsputil.h"
  #include "fft.h"
+#include "fmtconvert.h"
  #include "lpc.h"
+#include "kbdwin.h"
+#include "sinewin.h"
  
  #include "aac.h"
  #include "aactab.h"
@@ -163,7 +170,7 @@ static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
  /**
   * Check for the channel element in the current channel position configuration.
   * If it exists, make sure the appropriate element is allocated and map the
- * channel order to match the internal FFmpeg channel layout.
+ * channel order to match the internal Libav channel layout.
   *
   * @param   che_pos current channel position configuration
   * @param   type channel element type
@@ -452,7 +459,7 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
  static int decode_audio_specific_config(AACContext *ac,
                                          AVCodecContext *avctx,
                                          MPEG4AudioConfig *m4ac,
-                                        void *data, int data_size)
+                                        const uint8_t *data, int data_size)
  {
      GetBitContext gb;
      int i;
@@ -473,6 +480,7 @@ static int decode_audio_specific_config(AACContext *ac,
      switch (m4ac->object_type) {
      case AOT_AAC_MAIN:
      case AOT_AAC_LC:
+    case AOT_AAC_LTP:
          if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
              return -1;
          break;
@@ -541,7 +549,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
              return -1;
      }
  
-    avctx->sample_fmt = SAMPLE_FMT_S16;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  
      AAC_INIT_VLC_STATIC( 0, 304);
      AAC_INIT_VLC_STATIC( 1, 270);
@@ -558,22 +566,15 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
      ff_aac_sbr_init();
  
      dsputil_init(&ac->dsp, avctx);
+    ff_fmt_convert_init(&ac->fmt_conv, avctx);
  
      ac->random_state = 0x1f2e3d4c;
  
      // -1024 - Compensate wrong IMDCT method.
-    // 32768 - Required to scale values to the correct range for the bias method
-    //         for float to int16 conversion.
-
-    if (ac->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
-        ac->add_bias  = 385.0f;
-        ac->sf_scale  = 1. / (-1024. * 32768.);
-        ac->sf_offset = 0;
-    } else {
-        ac->add_bias  = 0.0f;
-        ac->sf_scale  = 1. / -1024.;
-        ac->sf_offset = 60;
-    }
+    // 60    - Required to scale values to the correct range [-32768,32767]
+    //         for float to int16 conversion. (1 << (60 / 4)) == 32768
+    ac->sf_scale  = 1. / -1024.;
+    ac->sf_offset = 60;
  
      ff_aac_tableinit();
  
@@ -582,8 +583,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
                      ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                      352);
  
-    ff_mdct_init(&ac->mdct, 11, 1, 1.0);
-    ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
+    ff_mdct_init(&ac->mdct,       11, 1, 1.0);
+    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0);
+    ff_mdct_init(&ac->mdct_ltp,   11, 0, 1.0);
      // window initialization
      ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
      ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
@@ -632,6 +634,20 @@ static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
      return 0;
  }
  
+/**
+ * Decode Long Term Prediction data; reference: table 4.xx.
+ */
+static void decode_ltp(AACContext *ac, LongTermPrediction *ltp,
+                       GetBitContext *gb, uint8_t max_sfb)
+{
+    int sfb;
+
+    ltp->lag  = get_bits(gb, 11);
+    ltp->coef = ltp_coef[get_bits(gb, 3)] * ac->sf_scale;
+    for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
+        ltp->used[sfb] = get_bits1(gb);
+}
+
  /**
   * Decode Individual Channel Stream info; reference: table 4.6.
   *
@@ -686,9 +702,8 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
                  memset(ics, 0, sizeof(IndividualChannelStream));
                  return -1;
              } else {
-                av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1);
-                memset(ics, 0, sizeof(IndividualChannelStream));
-                return -1;
+                if ((ics->ltp.present = get_bits(gb, 1)))
+                    decode_ltp(ac, &ics->ltp, gb, ics->max_sfb);
              }
          }
      }
@@ -1058,9 +1073,6 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
  
                              UPDATE_CACHE(re, gb);
                              GET_VLC(code, re, gb, vlc_tab, 8, 2);
-#if MIN_CACHE_BITS < 20
-                            UPDATE_CACHE(re, gb);
-#endif
                              cb_idx = cb_vector_idx[code];
                              nnz = cb_idx >> 8 & 15;
                              bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
@@ -1153,12 +1165,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                                          return -1;
                                      }
  
-#if MIN_CACHE_BITS < 21
-                                    LAST_SKIP_BITS(re, gb, b + 1);
-                                    UPDATE_CACHE(re, gb);
-#else
                                      SKIP_BITS(re, gb, b + 1);
-#endif
                                      b += 4;
                                      n = (1 << b) + SHOW_UBITS(re, gb, b);
                                      LAST_SKIP_BITS(re, gb, b);
@@ -1381,13 +1388,13 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
   *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
   *                      [3] reserved for scalable AAC
   */
-static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
+static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
  {
      const IndividualChannelStream *ics = &cpe->ch[1].ics;
      SingleChannelElement         *sce1 = &cpe->ch[1];
      float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
      const uint16_t *offsets = ics->swb_offset;
-    int g, group, i, k, idx = 0;
+    int g, group, i, idx = 0;
      int c;
      float scale;
      for (g = 0; g < ics->num_window_groups; g++) {
@@ -1400,8 +1407,10 @@ static void apply_intensity_stereo(ChannelElement *cpe, int ms_present)
                          c *= 1 - 2 * cpe->ms_mask[idx];
                      scale = c * sce1->sf[idx];
                      for (group = 0; group < ics->group_len[g]; group++)
-                        for (k = offsets[i]; k < offsets[i + 1]; k++)
-                            coef1[group * 128 + k] = scale * coef0[group * 128 + k];
+                        ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
+                                                   coef0 + group * 128 + offsets[i],
+                                                   scale,
+                                                   offsets[i + 1] - offsets[i]);
                  }
              } else {
                  int bt_run_end = sce1->band_type_run_end[idx];
@@ -1430,6 +1439,9 @@ static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
          i = cpe->ch[1].ics.use_kb_window[0];
          cpe->ch[1].ics = cpe->ch[0].ics;
          cpe->ch[1].ics.use_kb_window[1] = i;
+        if (cpe->ch[1].ics.predictor_present && (ac->m4ac.object_type != AOT_AAC_MAIN))
+            if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
+                decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
          ms_present = get_bits(gb, 2);
          if (ms_present == 3) {
              av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
@@ -1451,7 +1463,7 @@ static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
          }
      }
  
-    apply_intensity_stereo(cpe, ms_present);
+    apply_intensity_stereo(ac, cpe, ms_present);
      return 0;
  }
  
@@ -1669,6 +1681,7 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
      int w, filt, m, i;
      int bottom, top, order, start, end, size, inc;
      float lpc[TNS_MAX_ORDER];
+    float tmp[TNS_MAX_ORDER];
  
      for (w = 0; w < ics->num_windows; w++) {
          bottom = ics->num_swb;
@@ -1694,18 +1707,125 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
              }
              start += w * 128;
  
-            // ar filter
-            for (m = 0; m < size; m++, start += inc)
-                for (i = 1; i <= FFMIN(m, order); i++)
-                    coef[start] -= coef[start - i * inc] * lpc[i - 1];
+            if (decode) {
+                // ar filter
+                for (m = 0; m < size; m++, start += inc)
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] -= coef[start - i * inc] * lpc[i - 1];
+            } else {
+                // ma filter
+                for (m = 0; m < size; m++, start += inc) {
+                    tmp[0] = coef[start];
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] += tmp[i] * lpc[i - 1];
+                    for (i = order; i > 0; i--)
+                        tmp[i] = tmp[i - 1];
+                }
+            }
          }
      }
  }
  
+/**
+ *  Apply windowing and MDCT to obtain the spectral
+ *  coefficient from the predicted sample by LTP.
+ */
+static void windowing_and_mdct_ltp(AACContext *ac, float *out,
+                                   float *in, IndividualChannelStream *ics)
+{
+    const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
+    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
+    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
+    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
+
+    if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
+        ac->dsp.vector_fmul(in, in, lwindow_prev, 1024);
+    } else {
+        memset(in, 0, 448 * sizeof(float));
+        ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
+        memcpy(in + 576, in + 576, 448 * sizeof(float));
+    }
+    if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
+        ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
+    } else {
+        memcpy(in + 1024, in + 1024, 448 * sizeof(float));
+        ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
+        memset(in + 1024 + 576, 0, 448 * sizeof(float));
+    }
+    ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
+}
+
+/**
+ * Apply the long term prediction
+ */
+static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
+{
+    const LongTermPrediction *ltp = &sce->ics.ltp;
+    const uint16_t *offsets = sce->ics.swb_offset;
+    int i, sfb;
+
+    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+        float *predTime = sce->ret;
+        float *predFreq = ac->buf_mdct;
+        int16_t num_samples = 2048;
+
+        if (ltp->lag < 1024)
+            num_samples = ltp->lag + 1024;
+        for (i = 0; i < num_samples; i++)
+            predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
+        memset(&predTime[i], 0, (2048 - i) * sizeof(float));
+
+        windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
+
+        if (sce->tns.present)
+            apply_tns(predFreq, &sce->tns, &sce->ics, 0);
+
+        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
+            if (ltp->used[sfb])
+                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
+                    sce->coeffs[i] += predFreq[i];
+    }
+}
+
+/**
+ * Update the LTP buffer for next frame
+ */
+static void update_ltp(AACContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    float *saved     = sce->saved;
+    float *saved_ltp = sce->coeffs;
+    const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
+    const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
+    int i;
+
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        memcpy(saved_ltp,       saved, 512 * sizeof(float));
+        memset(saved_ltp + 576, 0,     448 * sizeof(float));
+        ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
+        for (i = 0; i < 64; i++)
+            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
+    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float));
+        memset(saved_ltp + 576, 0,                  448 * sizeof(float));
+        ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
+        for (i = 0; i < 64; i++)
+            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
+    } else { // LONG_STOP or ONLY_LONG
+        ac->dsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
+        for (i = 0; i < 512; i++)
+            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
+    }
+
+    memcpy(sce->ltp_state, &sce->ltp_state[1024], 1024 * sizeof(int16_t));
+    ac->fmt_conv.float_to_int16(&(sce->ltp_state[1024]), sce->ret,  1024);
+    ac->fmt_conv.float_to_int16(&(sce->ltp_state[2048]), saved_ltp, 1024);
+}
+
  /**
   * Conduct IMDCT and windowing.
   */
-static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
+static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
  {
      IndividualChannelStream *ics = &sce->ics;
      float *in    = sce->coeffs;
@@ -1721,9 +1841,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float
      // imdct
      if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
          for (i = 0; i < 1024; i += 128)
-            ff_imdct_half(&ac->mdct_small, buf + i, in + i);
+            ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
      } else
-        ff_imdct_half(&ac->mdct, buf, in);
+        ac->mdct.imdct_half(&ac->mdct, buf, in);
  
      /* window overlapping
       * NOTE: To simplify the overlapping code, all 'meaningless' short to long
@@ -1733,32 +1853,29 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float
       */
      if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
              (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
-        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
+        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
      } else {
-        for (i = 0; i < 448; i++)
-            out[i] = saved[i] + bias;
+        memcpy(                        out,               saved,            448 * sizeof(float));
  
          if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
-            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
-            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
-            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
-            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
+            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
+            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
+            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
+            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
+            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
              memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
          } else {
-            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
-            for (i = 576; i < 1024; i++)
-                out[i] = buf[i-512] + bias;
+            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
+            memcpy(                    out + 576,         buf + 64,         448 * sizeof(float));
          }
      }
  
      // buffer update
      if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        for (i = 0; i < 64; i++)
-            saved[i] = temp[64 + i] - bias;
-        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
-        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
-        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
+        memcpy(                    saved,       temp + 64,         64 * sizeof(float));
+        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
+        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
+        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
          memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
      } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
          memcpy(                    saved,       buf + 512,        448 * sizeof(float));
@@ -1815,13 +1932,12 @@ static void apply_independent_coupling(AACContext *ac,
  {
      int i;
      const float gain = cce->coup.gain[index][0];
-    const float bias = ac->add_bias;
      const float *src = cce->ch[0].ret;
      float *dest = target->ret;
      const int len = 1024 << (ac->m4ac.sbr == 1);
  
      for (i = 0; i < len; i++)
-        dest[i] += gain * (src[i] - bias);
+        dest[i] += gain * src[i];
  }
  
  /**
@@ -1865,13 +1981,20 @@ static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
  static void spectral_to_sample(AACContext *ac)
  {
      int i, type;
-    float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
      for (type = 3; type >= 0; type--) {
          for (i = 0; i < MAX_ELEM_ID; i++) {
              ChannelElement *che = ac->che[type][i];
              if (che) {
                  if (type <= TYPE_CPE)
                      apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
+                if (ac->m4ac.object_type == AOT_AAC_LTP) {
+                    if (che->ch[0].ics.predictor_present) {
+                        if (che->ch[0].ics.ltp.present)
+                            apply_ltp(ac, &che->ch[0]);
+                        if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
+                            apply_ltp(ac, &che->ch[1]);
+                    }
+                }
                  if (che->ch[0].tns.present)
                      apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
                  if (che->ch[1].tns.present)
@@ -1879,9 +2002,13 @@ static void spectral_to_sample(AACContext *ac)
                  if (type <= TYPE_CPE)
                      apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
                  if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
-                    imdct_and_windowing(ac, &che->ch[0], imdct_bias);
+                    imdct_and_windowing(ac, &che->ch[0]);
+                    if (ac->m4ac.object_type == AOT_AAC_LTP)
+                        update_ltp(ac, &che->ch[0]);
                      if (type == TYPE_CPE) {
-                        imdct_and_windowing(ac, &che->ch[1], imdct_bias);
+                        imdct_and_windowing(ac, &che->ch[1]);
+                        if (ac->m4ac.object_type == AOT_AAC_LTP)
+                            update_ltp(ac, &che->ch[1]);
                      }
                      if (ac->m4ac.sbr > 0) {
                          ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
@@ -2049,7 +2176,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
      *data_size = data_size_tmp;
  
      if (samples)
-        ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
+        ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
  
      if (ac->output_configured)
          ac->output_configured = OC_LOCKED;
@@ -2095,10 +2222,268 @@ static av_cold int aac_decode_close(AVCodecContext *avctx)
  
      ff_mdct_end(&ac->mdct);
      ff_mdct_end(&ac->mdct_small);
+    ff_mdct_end(&ac->mdct_ltp);
+    return 0;
+}
+
+
+#define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
+
+struct LATMContext {
+    AACContext      aac_ctx;             ///< containing AACContext
+    int             initialized;         ///< initilized after a valid extradata was seen
+
+    // parser data
+    int             audio_mux_version_A; ///< LATM syntax version
+    int             frame_length_type;   ///< 0/1 variable/fixed frame length
+    int             frame_length;        ///< frame length for fixed frame length
+};
+
+static inline uint32_t latm_get_value(GetBitContext *b)
+{
+    int length = get_bits(b, 2);
+
+    return get_bits_long(b, (length+1)*8);
+}
+
+static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
+                                             GetBitContext *gb)
+{
+    AVCodecContext *avctx = latmctx->aac_ctx.avctx;
+    MPEG4AudioConfig m4ac;
+    int  config_start_bit = get_bits_count(gb);
+    int     bits_consumed, esize;
+
+    if (config_start_bit % 8) {
+        av_log_missing_feature(latmctx->aac_ctx.avctx, "audio specific "
+                               "config not byte aligned.\n", 1);
+        return AVERROR_INVALIDDATA;
+    } else {
+        bits_consumed =
+            decode_audio_specific_config(NULL, avctx, &m4ac,
+                                         gb->buffer + (config_start_bit / 8),
+                                         get_bits_left(gb) / 8);
+
+        if (bits_consumed < 0)
+            return AVERROR_INVALIDDATA;
+
+        esize = (bits_consumed+7) / 8;
+
+        if (avctx->extradata_size <= esize) {
+            av_free(avctx->extradata);
+            avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
+            if (!avctx->extradata)
+                return AVERROR(ENOMEM);
+        }
+
+        avctx->extradata_size = esize;
+        memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
+        memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+
+        skip_bits_long(gb, bits_consumed);
+    }
+
+    return bits_consumed;
+}
+
+static int read_stream_mux_config(struct LATMContext *latmctx,
+                                  GetBitContext *gb)
+{
+    int ret, audio_mux_version = get_bits(gb, 1);
+
+    latmctx->audio_mux_version_A = 0;
+    if (audio_mux_version)
+        latmctx->audio_mux_version_A = get_bits(gb, 1);
+
+    if (!latmctx->audio_mux_version_A) {
+
+        if (audio_mux_version)
+            latm_get_value(gb);                 // taraFullness
+
+        skip_bits(gb, 1);                       // allStreamSameTimeFraming
+        skip_bits(gb, 6);                       // numSubFrames
+        // numPrograms
+        if (get_bits(gb, 4)) {                  // numPrograms
+            av_log_missing_feature(latmctx->aac_ctx.avctx,
+                                   "multiple programs are not supported\n", 1);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        // for each program (which there is only on in DVB)
+
+        // for each layer (which there is only on in DVB)
+        if (get_bits(gb, 3)) {                   // numLayer
+            av_log_missing_feature(latmctx->aac_ctx.avctx,
+                                   "multiple layers are not supported\n", 1);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        // for all but first stream: use_same_config = get_bits(gb, 1);
+        if (!audio_mux_version) {
+            if ((ret = latm_decode_audio_specific_config(latmctx, gb)) < 0)
+                return ret;
+        } else {
+            int ascLen = latm_get_value(gb);
+            if ((ret = latm_decode_audio_specific_config(latmctx, gb)) < 0)
+                return ret;
+            ascLen -= ret;
+            skip_bits_long(gb, ascLen);
+        }
+
+        latmctx->frame_length_type = get_bits(gb, 3);
+        switch (latmctx->frame_length_type) {
+        case 0:
+            skip_bits(gb, 8);       // latmBufferFullness
+            break;
+        case 1:
+            latmctx->frame_length = get_bits(gb, 9);
+            break;
+        case 3:
+        case 4:
+        case 5:
+            skip_bits(gb, 6);       // CELP frame length table index
+            break;
+        case 6:
+        case 7:
+            skip_bits(gb, 1);       // HVXC frame length table index
+            break;
+        }
+
+        if (get_bits(gb, 1)) {                  // other data
+            if (audio_mux_version) {
+                latm_get_value(gb);             // other_data_bits
+            } else {
+                int esc;
+                do {
+                    esc = get_bits(gb, 1);
+                    skip_bits(gb, 8);
+                } while (esc);
+            }
+        }
+
+        if (get_bits(gb, 1))                     // crc present
+            skip_bits(gb, 8);                    // config_crc
+    }
+
+    return 0;
+}
+
+static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
+{
+    uint8_t tmp;
+
+    if (ctx->frame_length_type == 0) {
+        int mux_slot_length = 0;
+        do {
+            tmp = get_bits(gb, 8);
+            mux_slot_length += tmp;
+        } while (tmp == 255);
+        return mux_slot_length;
+    } else if (ctx->frame_length_type == 1) {
+        return ctx->frame_length;
+    } else if (ctx->frame_length_type == 3 ||
+               ctx->frame_length_type == 5 ||
+               ctx->frame_length_type == 7) {
+        skip_bits(gb, 2);          // mux_slot_length_coded
+    }
      return 0;
  }
  
-AVCodec aac_decoder = {
+static int read_audio_mux_element(struct LATMContext *latmctx,
+                                  GetBitContext *gb)
+{
+    int err;
+    uint8_t use_same_mux = get_bits(gb, 1);
+    if (!use_same_mux) {
+        if ((err = read_stream_mux_config(latmctx, gb)) < 0)
+            return err;
+    } else if (!latmctx->aac_ctx.avctx->extradata) {
+        av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
+               "no decoder config found\n");
+        return AVERROR(EAGAIN);
+    }
+    if (latmctx->audio_mux_version_A == 0) {
+        int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
+        if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
+            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
+            return AVERROR_INVALIDDATA;
+        } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
+            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
+                   "frame length mismatch %d << %d\n",
+                   mux_slot_length_bytes * 8, get_bits_left(gb));
+            return AVERROR_INVALIDDATA;
+        }
+    }
+    return 0;
+}
+
+
+static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size,
+                             AVPacket *avpkt)
+{
+    struct LATMContext *latmctx = avctx->priv_data;
+    int                 muxlength, err;
+    GetBitContext       gb;
+
+    if (avpkt->size == 0)
+        return 0;
+
+    init_get_bits(&gb, avpkt->data, avpkt->size * 8);
+
+    // check for LOAS sync word
+    if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
+        return AVERROR_INVALIDDATA;
+
+    muxlength = get_bits(&gb, 13) + 3;
+    // not enough data, the parser should have sorted this
+    if (muxlength > avpkt->size)
+        return AVERROR_INVALIDDATA;
+
+    if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
+        return err;
+
+    if (!latmctx->initialized) {
+        if (!avctx->extradata) {
+            *out_size = 0;
+            return avpkt->size;
+        } else {
+            if ((err = aac_decode_init(avctx)) < 0)
+                return err;
+            latmctx->initialized = 1;
+        }
+    }
+
+    if (show_bits(&gb, 12) == 0xfff) {
+        av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
+               "ADTS header detected, probably as result of configuration "
+               "misparsing\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((err = aac_decode_frame_int(avctx, out, out_size, &gb)) < 0)
+        return err;
+
+    return muxlength;
+}
+
+av_cold static int latm_decode_init(AVCodecContext *avctx)
+{
+    struct LATMContext *latmctx = avctx->priv_data;
+    int ret;
+
+    ret = aac_decode_init(avctx);
+
+    if (avctx->extradata_size > 0) {
+        latmctx->initialized = !ret;
+    } else {
+        latmctx->initialized = 0;
+    }
+
+    return ret;
+}
+
+
+AVCodec ff_aac_decoder = {
      "aac",
      AVMEDIA_TYPE_AUDIO,
      CODEC_ID_AAC,
@@ -2108,8 +2493,28 @@ AVCodec aac_decoder = {
      aac_decode_close,
      aac_decode_frame,
      .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
-    .sample_fmts = (const enum SampleFormat[]) {
-        SAMPLE_FMT_S16,SAMPLE_FMT_NONE
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+    },
+    .channel_layouts = aac_channel_layout,
+};
+
+/*
+    Note: This decoder filter is intended to decode LATM streams transferred
+    in MPEG transport streams which only contain one program.
+    To do a more complex LATM demuxing a separate LATM demuxer should be used.
+*/
+AVCodec ff_aac_latm_decoder = {
+    .name = "aac_latm",
+    .type = AVMEDIA_TYPE_AUDIO,
+    .id   = CODEC_ID_AAC_LATM,
+    .priv_data_size = sizeof(struct LATMContext),
+    .init   = latm_decode_init,
+    .close  = aac_decode_close,
+    .decode = latm_decode_frame,
+    .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
      },
      .channel_layouts = aac_channel_layout,
  };