h264_parser: Fix POC parsing for the case where MMCO_RESET is present.

[ffmpeg] / libavcodec / aacdec.c
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c

index 8eaee60d20e8d25a4043becf458cb5a560b1f8a5..824839a7720f7354492e285c519490a6a020f388 100644 (file)
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -706,7 +706,7 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
                                       MPEG4AudioConfig *m4ac,
                                       int channel_config)
  {
-    int extension_flag, ret;
+    int extension_flag, ret, ep_config, res_flags;
      uint8_t layout_map[MAX_ELEM_ID*4][3];
      int tags = 0;
  
@@ -752,14 +752,30 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
          case AOT_ER_AAC_LTP:
          case AOT_ER_AAC_SCALABLE:
          case AOT_ER_AAC_LD:
-            skip_bits(gb, 3);      /* aacSectionDataResilienceFlag
-                                    * aacScalefactorDataResilienceFlag
-                                    * aacSpectralDataResilienceFlag
-                                    */
+            res_flags = get_bits(gb, 3);
+            if (res_flags) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "AAC data resilience not supported (flags %x)\n",
+                       res_flags);
+                return AVERROR_PATCHWELCOME;
+            }
              break;
          }
          skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
      }
+    switch (m4ac->object_type) {
+    case AOT_ER_AAC_LC:
+    case AOT_ER_AAC_LTP:
+    case AOT_ER_AAC_SCALABLE:
+    case AOT_ER_AAC_LD:
+        ep_config = get_bits(gb, 2);
+        if (ep_config) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "epConfig %d is not supported.\n",
+                   ep_config);
+            return AVERROR_PATCHWELCOME;
+        }
+    }
      return 0;
  }
  
@@ -801,6 +817,13 @@ static int decode_audio_specific_config(AACContext *ac,
                 m4ac->sampling_index);
          return AVERROR_INVALIDDATA;
      }
+    if (m4ac->object_type == AOT_ER_AAC_LD &&
+        (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "invalid low delay sampling rate index %d\n",
+               m4ac->sampling_index);
+        return AVERROR_INVALIDDATA;
+    }
  
      skip_bits_long(&gb, i);
  
@@ -808,6 +831,8 @@ static int decode_audio_specific_config(AACContext *ac,
      case AOT_AAC_MAIN:
      case AOT_AAC_LC:
      case AOT_AAC_LTP:
+    case AOT_ER_AAC_LC:
+    case AOT_ER_AAC_LD:
          if ((ret = decode_ga_specific_config(ac, avctx, &gb,
                                              m4ac, m4ac->chan_config)) < 0)
              return ret;
@@ -968,12 +993,15 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
                      352);
  
      ff_mdct_init(&ac->mdct,       11, 1, 1.0 / (32768.0 * 1024.0));
+    ff_mdct_init(&ac->mdct_ld,    10, 1, 1.0 / (32768.0 * 512.0));
      ff_mdct_init(&ac->mdct_small,  8, 1, 1.0 / (32768.0 * 128.0));
      ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0 * 32768.0);
      // window initialization
      ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
+    ff_kbd_window_init(ff_aac_kbd_long_512,  4.0, 512);
      ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
      ff_init_ff_sine_windows(10);
+    ff_init_ff_sine_windows( 9);
      ff_init_ff_sine_windows( 7);
  
      cbrt_tableinit();
@@ -1046,6 +1074,14 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
      }
      ics->window_sequence[1] = ics->window_sequence[0];
      ics->window_sequence[0] = get_bits(gb, 2);
+    if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD &&
+        ics->window_sequence[0] != ONLY_LONG_SEQUENCE) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "AAC LD is only defined for ONLY_LONG_SEQUENCE but "
+               "window sequence %d found.\n", ics->window_sequence[0]);
+        ics->window_sequence[0] = ONLY_LONG_SEQUENCE;
+        return AVERROR_INVALIDDATA;
+    }
      ics->use_kb_window[1]   = ics->use_kb_window[0];
      ics->use_kb_window[0]   = get_bits1(gb);
      ics->num_window_groups  = 1;
@@ -1069,8 +1105,15 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
      } else {
          ics->max_sfb               = get_bits(gb, 6);
          ics->num_windows           = 1;
-        ics->swb_offset            =    ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index];
-        ics->num_swb               =   ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index];
+        if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD) {
+            ics->swb_offset        =     ff_swb_offset_512[ac->oc[1].m4ac.sampling_index];
+            ics->num_swb           =    ff_aac_num_swb_512[ac->oc[1].m4ac.sampling_index];
+            if (!ics->num_swb || !ics->swb_offset)
+                return AVERROR_BUG;
+        } else {
+            ics->swb_offset        =    ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index];
+            ics->num_swb           =   ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index];
+        }
          ics->tns_max_bands         = ff_tns_max_bands_1024[ac->oc[1].m4ac.sampling_index];
          ics->predictor_present     = get_bits1(gb);
          ics->predictor_reset_group = 0;
@@ -1079,11 +1122,17 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
                  if (decode_prediction(ac, ics, gb)) {
                      return AVERROR_INVALIDDATA;
                  }
-            } else if (ac->oc[1].m4ac.object_type == AOT_AAC_LC) {
+            } else if (ac->oc[1].m4ac.object_type == AOT_AAC_LC ||
+                       ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC) {
                  av_log(ac->avctx, AV_LOG_ERROR,
                         "Prediction is not allowed in AAC-LC.\n");
                  return AVERROR_INVALIDDATA;
              } else {
+                if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD) {
+                    av_log(ac->avctx, AV_LOG_ERROR,
+                           "LTP in ER AAC LD not yet implemented.\n");
+                    return AVERROR_PATCHWELCOME;
+                }
                  if ((ics->ltp.present = get_bits(gb, 1)))
                      decode_ltp(&ics->ltp, gb, ics->max_sfb);
              }
@@ -1701,7 +1750,7 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce,
      TemporalNoiseShaping    *tns = &sce->tns;
      IndividualChannelStream *ics = &sce->ics;
      float *out = sce->coeffs;
-    int global_gain, pulse_present = 0;
+    int global_gain, er_syntax, pulse_present = 0;
      int ret;
  
      /* This assignment is to silence a GCC warning about the variable being used
@@ -1724,6 +1773,9 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce,
          return ret;
  
      pulse_present = 0;
+    er_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC ||
+                ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP ||
+                ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD;
      if (!scale_flag) {
          if ((pulse_present = get_bits1(gb))) {
              if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
@@ -1737,12 +1789,19 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce,
                  return AVERROR_INVALIDDATA;
              }
          }
-        if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
-            return AVERROR_INVALIDDATA;
+        tns->present = get_bits1(gb);
+        if (tns->present && !er_syntax)
+            if (decode_tns(ac, tns, gb, ics) < 0)
+                return AVERROR_INVALIDDATA;
          if (get_bits1(gb)) {
              avpriv_request_sample(ac->avctx, "SSR");
              return AVERROR_PATCHWELCOME;
          }
+        // I see no textual basis in the spec for this occuring after SSR gain
+        // control, but this is what both reference and real implmentations do
+        if (tns->present && er_syntax)
+            if (decode_tns(ac, tns, gb, ics) < 0)
+                return AVERROR_INVALIDDATA;
      }
  
      if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present,
@@ -2286,6 +2345,25 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
      }
  }
  
+static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    float *in    = sce->coeffs;
+    float *out   = sce->ret;
+    float *saved = sce->saved;
+    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_512 : ff_sine_512;
+    float *buf  = ac->buf_mdct;
+
+    // imdct
+    ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
+
+    // window overlapping
+    ac->fdsp.vector_fmul_window(out, saved, buf, lwindow_prev, 256);
+
+    // buffer update
+    memcpy(saved, buf + 256, 256 * sizeof(float));
+}
+
  /**
   * Apply dependent channel coupling (applied before IMDCT).
   *
@@ -2382,6 +2460,11 @@ static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
  static void spectral_to_sample(AACContext *ac)
  {
      int i, type;
+    void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce);
+    if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD)
+        imdct_and_window = imdct_and_windowing_ld;
+    else
+        imdct_and_window = imdct_and_windowing;
      for (type = 3; type >= 0; type--) {
          for (i = 0; i < MAX_ELEM_ID; i++) {
              ChannelElement *che = ac->che[type][i];
@@ -2403,11 +2486,11 @@ static void spectral_to_sample(AACContext *ac)
                  if (type <= TYPE_CPE)
                      apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
                  if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
-                    imdct_and_windowing(ac, &che->ch[0]);
+                    imdct_and_window(ac, &che->ch[0]);
                      if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
                          update_ltp(ac, &che->ch[0]);
                      if (type == TYPE_CPE) {
-                        imdct_and_windowing(ac, &che->ch[1]);
+                        imdct_and_window(ac, &che->ch[1]);
                          if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
                              update_ltp(ac, &che->ch[1]);
                      }
@@ -2427,7 +2510,7 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
      int size;
      AACADTSHeaderInfo hdr_info;
      uint8_t layout_map[MAX_ELEM_ID*4][3];
-    int layout_map_tags;
+    int layout_map_tags, ret;
  
      size = avpriv_aac_parse_header(gb, &hdr_info);
      if (size > 0) {
@@ -2439,12 +2522,15 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
          push_output_configuration(ac);
          if (hdr_info.chan_config) {
              ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
-            if (set_default_channel_config(ac->avctx, layout_map,
-                    &layout_map_tags, hdr_info.chan_config))
-                return -7;
-            if (output_configure(ac, layout_map, layout_map_tags,
-                                 FFMAX(ac->oc[1].status, OC_TRIAL_FRAME), 0))
-                return -7;
+            if ((ret = set_default_channel_config(ac->avctx,
+                                                  layout_map,
+                                                  &layout_map_tags,
+                                                  hdr_info.chan_config)) < 0)
+                return ret;
+            if ((ret = output_configure(ac, layout_map, layout_map_tags,
+                                        FFMAX(ac->oc[1].status,
+                                              OC_TRIAL_FRAME), 0)) < 0)
+                return ret;
          } else {
              ac->oc[1].m4ac.chan_config = 0;
          }
@@ -2463,6 +2549,64 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
      return size;
  }
  
+static int aac_decode_er_frame(AVCodecContext *avctx, void *data,
+                               int *got_frame_ptr, GetBitContext *gb)
+{
+    AACContext *ac = avctx->priv_data;
+    ChannelElement *che;
+    int err, i;
+    int samples = 1024;
+    int chan_config = ac->oc[1].m4ac.chan_config;
+
+    if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD)
+        samples >>= 1;
+
+    ac->frame = data;
+
+    if ((err = frame_configure_elements(avctx)) < 0)
+        return err;
+
+    ac->tags_mapped = 0;
+
+    if (chan_config < 0 || chan_config >= 8) {
+        avpriv_request_sample(avctx, "Unknown ER channel configuration %d",
+                              ac->oc[1].m4ac.chan_config);
+        return AVERROR_INVALIDDATA;
+    }
+    for (i = 0; i < tags_per_config[chan_config]; i++) {
+        const int elem_type = aac_channel_layout_map[chan_config-1][i][0];
+        const int elem_id   = aac_channel_layout_map[chan_config-1][i][1];
+        if (!(che=get_che(ac, elem_type, elem_id))) {
+            av_log(ac->avctx, AV_LOG_ERROR,
+                   "channel element %d.%d is not allocated\n",
+                   elem_type, elem_id);
+            return AVERROR_INVALIDDATA;
+        }
+        skip_bits(gb, 4);
+        switch (elem_type) {
+        case TYPE_SCE:
+            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            break;
+        case TYPE_CPE:
+            err = decode_cpe(ac, gb, che);
+            break;
+        case TYPE_LFE:
+            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            break;
+        }
+        if (err < 0)
+            return err;
+    }
+
+    spectral_to_sample(ac);
+
+    ac->frame->nb_samples = samples;
+    *got_frame_ptr = 1;
+
+    skip_bits_long(gb, get_bits_left(gb));
+    return 0;
+}
+
  static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
                                  int *got_frame_ptr, GetBitContext *gb)
  {
@@ -2475,22 +2619,19 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
      ac->frame = data;
  
      if (show_bits(gb, 12) == 0xfff) {
-        if (parse_adts_frame_header(ac, gb) < 0) {
+        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
              av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
-            err = -1;
              goto fail;
          }
          if (ac->oc[1].m4ac.sampling_index > 12) {
              av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
-            err = -1;
+            err = AVERROR_INVALIDDATA;
              goto fail;
          }
      }
  
-    if (frame_configure_elements(avctx) < 0) {
-        err = -1;
+    if ((err = frame_configure_elements(avctx)) < 0)
          goto fail;
-    }
  
      ac->tags_mapped = 0;
      // parse
@@ -2501,7 +2642,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
              if (!(che=get_che(ac, elem_type, elem_id))) {
                  av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
                         elem_type, elem_id);
-                err = -1;
+                err = AVERROR_INVALIDDATA;
                  goto fail;
              }
              samples = 1024;
@@ -2557,7 +2698,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
                  elem_id += get_bits(gb, 8) - 1;
              if (get_bits_left(gb) < 8 * elem_id) {
                      av_log(avctx, AV_LOG_ERROR, overread_err);
-                    err = -1;
+                    err = AVERROR_INVALIDDATA;
                      goto fail;
              }
              while (elem_id > 0)
@@ -2566,7 +2707,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
              break;
  
          default:
-            err = -1; /* should not happen, but keeps compiler happy */
+            err = AVERROR_BUG; /* should not happen, but keeps compiler happy */
              break;
          }
  
@@ -2578,7 +2719,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
  
          if (get_bits_left(gb) < 3) {
              av_log(avctx, AV_LOG_ERROR, overread_err);
-            err = -1;
+            err = AVERROR_INVALIDDATA;
              goto fail;
          }
      }
@@ -2639,7 +2780,16 @@ static int aac_decode_frame(AVCodecContext *avctx, void *data,
      if ((err = init_get_bits(&gb, buf, buf_size * 8)) < 0)
          return err;
  
-    if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb)) < 0)
+    switch (ac->oc[1].m4ac.object_type) {
+    case AOT_ER_AAC_LC:
+    case AOT_ER_AAC_LTP:
+    case AOT_ER_AAC_LD:
+        err = aac_decode_er_frame(avctx, data, got_frame_ptr, &gb);
+        break;
+    default:
+        err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb);
+    }
+    if (err < 0)
          return err;
  
      buf_consumed = (get_bits_count(&gb) + 7) >> 3;
@@ -2665,6 +2815,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx)
  
      ff_mdct_end(&ac->mdct);
      ff_mdct_end(&ac->mdct_small);
+    ff_mdct_end(&ac->mdct_ld);
      ff_mdct_end(&ac->mdct_ltp);
      return 0;
  }
@@ -2938,13 +3089,13 @@ static av_cold int latm_decode_init(AVCodecContext *avctx)
  
  AVCodec ff_aac_decoder = {
      .name            = "aac",
+    .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
      .type            = AVMEDIA_TYPE_AUDIO,
      .id              = AV_CODEC_ID_AAC,
      .priv_data_size  = sizeof(AACContext),
      .init            = aac_decode_init,
      .close           = aac_decode_close,
      .decode          = aac_decode_frame,
-    .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
      .sample_fmts     = (const enum AVSampleFormat[]) {
          AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
      },
@@ -2959,13 +3110,13 @@ AVCodec ff_aac_decoder = {
  */
  AVCodec ff_aac_latm_decoder = {
      .name            = "aac_latm",
+    .long_name       = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
      .type            = AVMEDIA_TYPE_AUDIO,
      .id              = AV_CODEC_ID_AAC_LATM,
      .priv_data_size  = sizeof(struct LATMContext),
      .init            = latm_decode_init,
      .close           = aac_decode_close,
      .decode          = latm_decode_frame,
-    .long_name       = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
      .sample_fmts     = (const enum AVSampleFormat[]) {
          AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
      },