]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/aac.c
Add function to export EDGE_WIDTH from libavcodec.
[ffmpeg] / libavcodec / aac.c
index 1341258a7f287acec92750c0cdd1c41b4c97bca2..c4636015a7ead4d7dbc9d3452857899af7073a57 100644 (file)
@@ -62,7 +62,7 @@
  * N                    MIDI
  * N                    Harmonic and Individual Lines plus Noise
  * N                    Text-To-Speech Interface
- * N (in progress)      Spectral Band Replication
+ * Y                    Spectral Band Replication
  * Y (not in this code) Layer-1
  * Y (not in this code) Layer-2
  * Y (not in this code) Layer-3
 #include "internal.h"
 #include "get_bits.h"
 #include "dsputil.h"
+#include "fft.h"
 #include "lpc.h"
 
 #include "aac.h"
 #include "aactab.h"
 #include "aacdectab.h"
+#include "cbrt_tablegen.h"
+#include "sbr.h"
+#include "aacsbr.h"
 #include "mpeg4audio.h"
 #include "aac_parser.h"
 
 #include <math.h>
 #include <string.h>
 
+#if ARCH_ARM
+#   include "arm/aac.h"
+#endif
+
 union float754 {
     float f;
     uint32_t i;
@@ -101,7 +109,7 @@ union float754 {
 static VLC vlc_scalefactors;
 static VLC vlc_spectral[11];
 
-static uint32_t cbrt_tab[1<<13];
+static const char overread_err[] = "Input buffer exhausted before END element found\n";
 
 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
 {
@@ -165,7 +173,7 @@ static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
  *
  * @return  Returns error status. 0 - OK, !0 - error
  */
-static int che_configure(AACContext *ac,
+static av_cold int che_configure(AACContext *ac,
                          enum ChannelPosition che_pos[4][MAX_ELEM_ID],
                          int type, int id,
                          int *channels)
@@ -173,14 +181,18 @@ static int che_configure(AACContext *ac,
     if (che_pos[type][id]) {
         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
             return AVERROR(ENOMEM);
+        ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
         if (type != TYPE_CCE) {
             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
             if (type == TYPE_CPE) {
                 ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
             }
         }
-    } else
+    } else {
+        if (ac->che[type][id])
+            ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
         av_freep(&ac->che[type][id]);
+    }
     return 0;
 }
 
@@ -192,7 +204,7 @@ static int che_configure(AACContext *ac,
  *
  * @return  Returns error status. 0 - OK, !0 - error
  */
-static int output_configure(AACContext *ac,
+static av_cold int output_configure(AACContext *ac,
                             enum ChannelPosition che_pos[4][MAX_ELEM_ID],
                             enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
                             int channel_config, enum OCStatus oc_type)
@@ -274,6 +286,7 @@ static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_EL
                       GetBitContext *gb)
 {
     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
+    int comment_len;
 
     skip_bits(gb, 2);  // object_type
 
@@ -308,7 +321,12 @@ static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_EL
     align_get_bits(gb);
 
     /* comment field, first byte is length */
-    skip_bits_long(gb, 8 * get_bits(gb, 8));
+    comment_len = get_bits(gb, 8) * 8;
+    if (get_bits_left(gb) < comment_len) {
+        av_log(ac->avccontext, AV_LOG_ERROR, overread_err);
+        return -1;
+    }
+    skip_bits_long(gb, comment_len);
     return 0;
 }
 
@@ -320,7 +338,7 @@ static int decode_pce(AACContext *ac, enum ChannelPosition new_che_pos[4][MAX_EL
  *
  * @return  Returns error status. 0 - OK, !0 - error
  */
-static int set_default_channel_config(AACContext *ac,
+static av_cold int set_default_channel_config(AACContext *ac,
                                       enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
                                       int channel_config)
 {
@@ -466,7 +484,7 @@ static av_always_inline int lcg_random(int previous_val)
     return previous_val * 1664525 + 1013904223;
 }
 
-static void reset_predict_state(PredictorState *ps)
+static av_always_inline void reset_predict_state(PredictorState *ps)
 {
     ps->r0   = 0.0f;
     ps->r1   = 0.0f;
@@ -496,29 +514,28 @@ static av_cold int aac_decode_init(AVCodecContext *avccontext)
     int i;
 
     ac->avccontext = avccontext;
+    ac->m4ac.sample_rate = avccontext->sample_rate;
 
     if (avccontext->extradata_size > 0) {
         if (decode_audio_specific_config(ac, avccontext->extradata, avccontext->extradata_size))
             return -1;
-        avccontext->sample_rate = ac->m4ac.sample_rate;
-    } else if (avccontext->channels > 0) {
-        ac->m4ac.sample_rate = avccontext->sample_rate;
     }
 
     avccontext->sample_fmt = SAMPLE_FMT_S16;
-    avccontext->frame_size = 1024;
-
-    AAC_INIT_VLC_STATIC( 0, 144);
-    AAC_INIT_VLC_STATIC( 1, 114);
-    AAC_INIT_VLC_STATIC( 2, 188);
-    AAC_INIT_VLC_STATIC( 3, 180);
-    AAC_INIT_VLC_STATIC( 4, 172);
-    AAC_INIT_VLC_STATIC( 5, 140);
-    AAC_INIT_VLC_STATIC( 6, 168);
-    AAC_INIT_VLC_STATIC( 7, 114);
-    AAC_INIT_VLC_STATIC( 8, 262);
-    AAC_INIT_VLC_STATIC( 9, 248);
-    AAC_INIT_VLC_STATIC(10, 384);
+
+    AAC_INIT_VLC_STATIC( 0, 304);
+    AAC_INIT_VLC_STATIC( 1, 270);
+    AAC_INIT_VLC_STATIC( 2, 550);
+    AAC_INIT_VLC_STATIC( 3, 300);
+    AAC_INIT_VLC_STATIC( 4, 328);
+    AAC_INIT_VLC_STATIC( 5, 294);
+    AAC_INIT_VLC_STATIC( 6, 306);
+    AAC_INIT_VLC_STATIC( 7, 268);
+    AAC_INIT_VLC_STATIC( 8, 510);
+    AAC_INIT_VLC_STATIC( 9, 366);
+    AAC_INIT_VLC_STATIC(10, 462);
+
+    ff_aac_sbr_init();
 
     dsputil_init(&ac->dsp, avccontext);
 
@@ -556,13 +573,7 @@ static av_cold int aac_decode_init(AVCodecContext *avccontext)
     ff_init_ff_sine_windows(10);
     ff_init_ff_sine_windows( 7);
 
-    if (!cbrt_tab[(1<<13) - 1]) {
-        for (i = 0; i < 1<<13; i++) {
-            union float754 f;
-            f.f = cbrtf(i) * i;
-            cbrt_tab[i] = f.i;
-        }
-    }
+    cbrt_tableinit();
 
     return 0;
 }
@@ -570,7 +581,7 @@ static av_cold int aac_decode_init(AVCodecContext *avccontext)
 /**
  * Skip data_stream_element; reference: table 4.10.
  */
-static void skip_data_stream_element(GetBitContext *gb)
+static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
 {
     int byte_align = get_bits1(gb);
     int count = get_bits(gb, 8);
@@ -578,7 +589,13 @@ static void skip_data_stream_element(GetBitContext *gb)
         count += get_bits(gb, 8);
     if (byte_align)
         align_get_bits(gb);
+
+    if (get_bits_left(gb) < 8 * count) {
+        av_log(ac->avccontext, AV_LOG_ERROR, overread_err);
+        return -1;
+    }
     skip_bits_long(gb, 8 * count);
+    return 0;
 }
 
 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
@@ -697,6 +714,10 @@ static int decode_band_types(AACContext *ac, enum BandType band_type[120],
             while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1)
                 sect_end += sect_len_incr;
             sect_end += sect_len_incr;
+            if (get_bits_left(gb) < 0) {
+                av_log(ac->avccontext, AV_LOG_ERROR, overread_err);
+                return -1;
+            }
             if (sect_end > ics->max_sfb) {
                 av_log(ac->avccontext, AV_LOG_ERROR,
                        "Number of bands (%d) exceeds limit (%d).\n",
@@ -823,7 +844,7 @@ static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 
                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
-                    av_log(ac->avccontext, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.",
+                    av_log(ac->avccontext, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
                            tns->order[w][filt], tns_max_order);
                     tns->order[w][filt] = 0;
                     return -1;
@@ -862,6 +883,7 @@ static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
     }
 }
 
+#ifndef VMUL2
 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
                            const float *scale)
 {
@@ -870,7 +892,9 @@ static inline float *VMUL2(float *dst, const float *v, unsigned idx,
     *dst++ = v[idx>>4 & 15] * s;
     return dst;
 }
+#endif
 
+#ifndef VMUL4
 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
                            const float *scale)
 {
@@ -881,7 +905,9 @@ static inline float *VMUL4(float *dst, const float *v, unsigned idx,
     *dst++ = v[idx>>6 & 3] * s;
     return dst;
 }
+#endif
 
+#ifndef VMUL2S
 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
                             unsigned sign, const float *scale)
 {
@@ -896,7 +922,9 @@ static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 
     return dst;
 }
+#endif
 
+#ifndef VMUL4S
 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                             unsigned sign, const float *scale)
 {
@@ -921,6 +949,7 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 
     return dst;
 }
+#endif
 
 /**
  * Decode spectral data; reference: table 4.50.
@@ -981,6 +1010,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
                 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
                 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
+                OPEN_READER(re, gb);
 
                 switch (cbt_m1 >> 1) {
                 case 0:
@@ -989,15 +1019,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                         int len = off_len;
 
                         do {
-                            const int index = get_vlc2(gb, vlc_tab, 6, 3);
+                            int code;
                             unsigned cb_idx;
 
-                            if (index >= cb_size) {
-                                err_idx = index;
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+
+                            if (code >= cb_size) {
+                                err_idx = code;
                                 goto err_cb_overflow;
                             }
 
-                            cb_idx = cb_vector_idx[index];
+                            cb_idx = cb_vector_idx[code];
                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
                         } while (len -= 4);
                     }
@@ -1009,19 +1042,26 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                         int len = off_len;
 
                         do {
-                            const int index = get_vlc2(gb, vlc_tab, 6, 3);
+                            int code;
                             unsigned nnz;
                             unsigned cb_idx;
                             uint32_t bits;
 
-                            if (index >= cb_size) {
-                                err_idx = index;
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+
+                            if (code >= cb_size) {
+                                err_idx = code;
                                 goto err_cb_overflow;
                             }
 
-                            cb_idx = cb_vector_idx[index];
+#if MIN_CACHE_BITS < 20
+                            UPDATE_CACHE(re, gb);
+#endif
+                            cb_idx = cb_vector_idx[code];
                             nnz = cb_idx >> 8 & 15;
-                            bits = get_bits(gb, nnz) << (32-nnz);
+                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
+                            LAST_SKIP_BITS(re, gb, nnz);
                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
                         } while (len -= 4);
                     }
@@ -1033,15 +1073,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                         int len = off_len;
 
                         do {
-                            const int index = get_vlc2(gb, vlc_tab, 6, 3);
+                            int code;
                             unsigned cb_idx;
 
-                            if (index >= cb_size) {
-                                err_idx = index;
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+
+                            if (code >= cb_size) {
+                                err_idx = code;
                                 goto err_cb_overflow;
                             }
 
-                            cb_idx = cb_vector_idx[index];
+                            cb_idx = cb_vector_idx[code];
                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
                         } while (len -= 2);
                     }
@@ -1054,19 +1097,23 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                         int len = off_len;
 
                         do {
-                            const int index = get_vlc2(gb, vlc_tab, 6, 3);
+                            int code;
                             unsigned nnz;
                             unsigned cb_idx;
                             unsigned sign;
 
-                            if (index >= cb_size) {
-                                err_idx = index;
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+
+                            if (code >= cb_size) {
+                                err_idx = code;
                                 goto err_cb_overflow;
                             }
 
-                            cb_idx = cb_vector_idx[index];
+                            cb_idx = cb_vector_idx[code];
                             nnz = cb_idx >> 8 & 15;
-                            sign = get_bits(gb, nnz) << (cb_idx >> 12);
+                            sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
+                            LAST_SKIP_BITS(re, gb, nnz);
                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
                         } while (len -= 2);
                     }
@@ -1079,39 +1126,56 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                         int len = off_len;
 
                         do {
-                            const int index = get_vlc2(gb, vlc_tab, 6, 3);
+                            int code;
                             unsigned nzt, nnz;
                             unsigned cb_idx;
                             uint32_t bits;
                             int j;
 
-                            if (!index) {
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+
+                            if (!code) {
                                 *icf++ = 0;
                                 *icf++ = 0;
                                 continue;
                             }
 
-                            if (index >= cb_size) {
-                                err_idx = index;
+                            if (code >= cb_size) {
+                                err_idx = code;
                                 goto err_cb_overflow;
                             }
 
-                            cb_idx = cb_vector_idx[index];
+                            cb_idx = cb_vector_idx[code];
                             nnz = cb_idx >> 12;
                             nzt = cb_idx >> 8;
-                            bits = get_bits(gb, nnz) << (32-nnz);
+                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
+                            LAST_SKIP_BITS(re, gb, nnz);
 
                             for (j = 0; j < 2; j++) {
                                 if (nzt & 1<<j) {
-                                    int n = 4;
+                                    uint32_t b;
+                                    int n;
                                     /* The total length of escape_sequence must be < 22 bits according
                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
-                                    while (get_bits1(gb) && n < 13) n++;
-                                    if (n == 13) {
+                                    UPDATE_CACHE(re, gb);
+                                    b = GET_CACHE(re, gb);
+                                    b = 31 - av_log2(~b);
+
+                                    if (b > 8) {
                                         av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
                                         return -1;
                                     }
-                                    n = (1 << n) + get_bits(gb, n);
+
+#if MIN_CACHE_BITS < 21
+                                    LAST_SKIP_BITS(re, gb, b + 1);
+                                    UPDATE_CACHE(re, gb);
+#else
+                                    SKIP_BITS(re, gb, b + 1);
+#endif
+                                    b += 4;
+                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
+                                    LAST_SKIP_BITS(re, gb, b);
                                     *icf++ = cbrt_tab[n] | (bits & 1<<31);
                                     bits <<= 1;
                                 } else {
@@ -1126,6 +1190,8 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
                         ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
                     }
                 }
+
+                CLOSE_READER(re, gb);
             }
         }
         coef += g_len << 7;
@@ -1180,7 +1246,7 @@ static av_always_inline float flt16_trunc(float pf)
     return pun.f;
 }
 
-static void predict(AACContext *ac, PredictorState *ps, float *coef,
+static av_always_inline void predict(AACContext *ac, PredictorState *ps, float *coef,
                     int output_enable)
 {
     const float a     = 0.953125; // 61.0 / 64
@@ -1479,23 +1545,6 @@ static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
     return 0;
 }
 
-/**
- * Decode Spectral Band Replication extension data; reference: table 4.55.
- *
- * @param   crc flag indicating the presence of CRC checksum
- * @param   cnt length of TYPE_FIL syntactic element in bytes
- *
- * @return  Returns number of bytes consumed from the TYPE_FIL element.
- */
-static int decode_sbr_extension(AACContext *ac, GetBitContext *gb,
-                                int crc, int cnt)
-{
-    // TODO : sbr_extension implementation
-    av_log_missing_feature(ac->avccontext, "SBR", 0);
-    skip_bits_long(gb, 8 * cnt - 4); // -4 due to reading extension type
-    return cnt;
-}
-
 /**
  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
  *
@@ -1576,7 +1625,8 @@ static int decode_dynamic_range(DynamicRangeControl *che_drc,
  *
  * @return Returns number of bytes consumed
  */
-static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt)
+static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
+                                    ChannelElement *che, enum RawDataBlockType elem_type)
 {
     int crc_flag = 0;
     int res = cnt;
@@ -1584,7 +1634,21 @@ static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt)
     case EXT_SBR_DATA_CRC:
         crc_flag++;
     case EXT_SBR_DATA:
-        res = decode_sbr_extension(ac, gb, crc_flag, cnt);
+        if (!che) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
+            return res;
+        } else if (!ac->m4ac.sbr) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
+            skip_bits_long(gb, 8 * cnt - 4);
+            return res;
+        } else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
+            skip_bits_long(gb, 8 * cnt - 4);
+            return res;
+        } else {
+            ac->m4ac.sbr = 1;
+        }
+        res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
         break;
     case EXT_DYNAMIC_RANGE:
         res = decode_dynamic_range(&ac->che_drc, gb, cnt);
@@ -1648,7 +1712,7 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
 /**
  * Conduct IMDCT and windowing.
  */
-static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
+static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce, float bias)
 {
     IndividualChannelStream *ics = &sce->ics;
     float *in    = sce->coeffs;
@@ -1680,29 +1744,29 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
      */
     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
-        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, ac->add_bias, 512);
+        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, bias, 512);
     } else {
         for (i = 0; i < 448; i++)
-            out[i] = saved[i] + ac->add_bias;
+            out[i] = saved[i] + bias;
 
         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, ac->add_bias, 64);
-            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      ac->add_bias, 64);
-            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      ac->add_bias, 64);
-            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      ac->add_bias, 64);
-            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      ac->add_bias, 64);
+            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, bias, 64);
+            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      bias, 64);
+            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      bias, 64);
+            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      bias, 64);
+            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      bias, 64);
             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
         } else {
-            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, ac->add_bias, 64);
+            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, bias, 64);
             for (i = 576; i < 1024; i++)
-                out[i] = buf[i-512] + ac->add_bias;
+                out[i] = buf[i-512] + bias;
         }
     }
 
     // buffer update
     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
         for (i = 0; i < 64; i++)
-            saved[i] = temp[64 + i] - ac->add_bias;
+            saved[i] = temp[64 + i] - bias;
         ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
         ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
         ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
@@ -1765,8 +1829,9 @@ static void apply_independent_coupling(AACContext *ac,
     const float bias = ac->add_bias;
     const float *src = cce->ch[0].ret;
     float *dest = target->ret;
+    const int len = 1024 << (ac->m4ac.sbr == 1);
 
-    for (i = 0; i < 1024; i++)
+    for (i = 0; i < len; i++)
         dest[i] += gain * (src[i] - bias);
 }
 
@@ -1812,6 +1877,7 @@ static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
 static void spectral_to_sample(AACContext *ac)
 {
     int i, type;
+    float imdct_bias = (ac->m4ac.sbr <= 0) ? ac->add_bias : 0.0f;
     for (type = 3; type >= 0; type--) {
         for (i = 0; i < MAX_ELEM_ID; i++) {
             ChannelElement *che = ac->che[type][i];
@@ -1824,10 +1890,18 @@ static void spectral_to_sample(AACContext *ac)
                     apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
                 if (type <= TYPE_CPE)
                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
-                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT)
-                    imdct_and_windowing(ac, &che->ch[0]);
-                if (type == TYPE_CPE)
-                    imdct_and_windowing(ac, &che->ch[1]);
+                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
+                    imdct_and_windowing(ac, &che->ch[0], imdct_bias);
+                    if (ac->m4ac.sbr > 0) {
+                        ff_sbr_dequant(ac, &che->sbr, type == TYPE_CPE ? TYPE_CPE : TYPE_SCE);
+                        ff_sbr_apply(ac, &che->sbr, 0, che->ch[0].ret, che->ch[0].ret);
+                    }
+                }
+                if (type == TYPE_CPE) {
+                    imdct_and_windowing(ac, &che->ch[1], imdct_bias);
+                    if (ac->m4ac.sbr > 0)
+                        ff_sbr_apply(ac, &che->sbr, 1, che->ch[1].ret, che->ch[1].ret);
+                }
                 if (type <= TYPE_CCE)
                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
             }
@@ -1877,10 +1951,12 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     AACContext *ac = avccontext->priv_data;
-    ChannelElement *che = NULL;
+    ChannelElement *che = NULL, *che_prev = NULL;
     GetBitContext gb;
-    enum RawDataBlockType elem_type;
+    enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
     int err, elem_id, data_size_tmp;
+    int buf_consumed;
+    int samples = 1024, multiplier;
 
     init_get_bits(&gb, buf, buf_size * 8);
 
@@ -1923,8 +1999,7 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
             break;
 
         case TYPE_DSE:
-            skip_data_stream_element(&gb);
-            err = 0;
+            err = skip_data_stream_element(ac, &gb);
             break;
 
         case TYPE_PCE: {
@@ -1943,8 +2018,12 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
         case TYPE_FIL:
             if (elem_id == 15)
                 elem_id += get_bits(&gb, 8) - 1;
+            if (get_bits_left(&gb) < 8 * elem_id) {
+                    av_log(avccontext, AV_LOG_ERROR, overread_err);
+                    return -1;
+            }
             while (elem_id > 0)
-                elem_id -= decode_extension_payload(ac, &gb, elem_id);
+                elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
             err = 0; /* FIXME */
             break;
 
@@ -1953,19 +2032,28 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
             break;
         }
 
+        che_prev       = che;
+        elem_type_prev = elem_type;
+
         if (err)
             return err;
+
+        if (get_bits_left(&gb) < 3) {
+            av_log(avccontext, AV_LOG_ERROR, overread_err);
+            return -1;
+        }
     }
 
     spectral_to_sample(ac);
 
-    if (!ac->is_saved) {
-        ac->is_saved = 1;
-        *data_size = 0;
-        return buf_size;
+    multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
+    samples <<= multiplier;
+    if (ac->output_configured < OC_LOCKED) {
+        avccontext->sample_rate = ac->m4ac.sample_rate << multiplier;
+        avccontext->frame_size = samples;
     }
 
-    data_size_tmp = 1024 * avccontext->channels * sizeof(int16_t);
+    data_size_tmp = samples * avccontext->channels * sizeof(int16_t);
     if (*data_size < data_size_tmp) {
         av_log(avccontext, AV_LOG_ERROR,
                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
@@ -1974,12 +2062,13 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
     }
     *data_size = data_size_tmp;
 
-    ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);
+    ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avccontext->channels);
 
     if (ac->output_configured)
         ac->output_configured = OC_LOCKED;
 
-    return buf_size;
+    buf_consumed = (get_bits_count(&gb) + 7) >> 3;
+    return buf_size > buf_consumed ? buf_consumed : buf_size;
 }
 
 static av_cold int aac_decode_close(AVCodecContext *avccontext)
@@ -1988,8 +2077,11 @@ static av_cold int aac_decode_close(AVCodecContext *avccontext)
     int i, type;
 
     for (i = 0; i < MAX_ELEM_ID; i++) {
-        for (type = 0; type < 4; type++)
+        for (type = 0; type < 4; type++) {
+            if (ac->che[type][i])
+                ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
             av_freep(&ac->che[type][i]);
+        }
     }
 
     ff_mdct_end(&ac->mdct);
@@ -1999,7 +2091,7 @@ static av_cold int aac_decode_close(AVCodecContext *avccontext)
 
 AVCodec aac_decoder = {
     "aac",
-    CODEC_TYPE_AUDIO,
+    AVMEDIA_TYPE_AUDIO,
     CODEC_ID_AAC,
     sizeof(AACContext),
     aac_decode_init,