Merge commit '3e853ff7acc56b180950ab39e7282f1b938c8784'

[ffmpeg] / libavcodec / aacenc.c
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c

index a3c31de684c740553152c18dacdd037fbfa2fffd..0d84a82001bb23ef022a56bcdc608b46b9a6dc25 100644 (file)
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -472,12 +472,32 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
                                                     sce->ics.swb_sizes[i],
                                                     sce->sf_idx[w*16 + i],
                                                     sce->band_type[w*16 + i],
-                                                   s->lambda);
+                                                   s->lambda, sce->ics.window_clipping[w]);
              start += sce->ics.swb_sizes[i];
          }
      }
  }
  
+/**
+ * Downscale spectral coefficients for near-clipping windows to avoid artifacts
+ */
+static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
+{
+    int start, i, j, w;
+
+    if (sce->ics.clip_avoidance_factor < 1.0f) {
+        for (w = 0; w < sce->ics.num_windows; w++) {
+            start = 0;
+            for (i = 0; i < sce->ics.max_sfb; i++) {
+                float *swb_coeffs = sce->coeffs + start + w*128;
+                for (j = 0; j < sce->ics.swb_sizes[i]; j++)
+                    swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
+                start += sce->ics.swb_sizes[i];
+            }
+        }
+    }
+}
+
  /**
   * Encode one channel of audio data.
   */
@@ -578,6 +598,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
          for (ch = 0; ch < chans; ch++) {
              IndividualChannelStream *ics = &cpe->ch[ch].ics;
              int cur_channel = start_ch + ch;
+            float clip_avoidance_factor;
              overlap  = &samples[cur_channel][0];
              samples2 = overlap + 1024;
              la       = samples2 + (448+64);
@@ -605,25 +626,40 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
              ics->num_windows        = wi[ch].num_windows;
              ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
              ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
+            clip_avoidance_factor = 0.0f;
              for (w = 0; w < ics->num_windows; w++)
                  ics->group_len[w] = wi[ch].grouping[w];
+            for (w = 0; w < ics->num_windows; w++) {
+                if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
+                    ics->window_clipping[w] = 1;
+                    clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
+                } else {
+                    ics->window_clipping[w] = 0;
+                }
+            }
+            if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
+                ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
+            } else {
+                ics->clip_avoidance_factor = 1.0f;
+            }
  
              apply_window_and_mdct(s, &cpe->ch[ch], overlap);
              if (isnan(cpe->ch->coeffs[0])) {
                  av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
                  return AVERROR(EINVAL);
              }
+            avoid_clipping(s, &cpe->ch[ch]);
          }
          start_ch += chans;
      }
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
          return ret;
      do {
          int frame_bits;
  
          init_put_bits(&s->pb, avpkt->data, avpkt->size);
  
-        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
+        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
              put_bitstream_info(s, LIBAVCODEC_IDENT);
          start_ch = 0;
          memset(chan_el_counter, 0, sizeof(chan_el_counter));
@@ -660,7 +696,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
              if (s->options.pns && s->coder->search_for_pns) {
                  for (ch = 0; ch < chans; ch++) {
                      s->cur_channel = start_ch + ch;
-                    s->coder->search_for_pns(s, avctx, &cpe->ch[ch], s->lambda);
+                    s->coder->search_for_pns(s, avctx, &cpe->ch[ch]);
                  }
              }
              s->cur_channel = start_ch;
@@ -671,11 +707,11 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                          for (g = 0;  g < ics->num_swb; g++)
                              cpe->ms_mask[w*16+g] = 1;
                  } else if (s->coder->search_for_ms) {
-                    s->coder->search_for_ms(s, cpe, s->lambda);
+                    s->coder->search_for_ms(s, cpe);
                  }
              }
              if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
-                s->coder->search_for_is(s, avctx, cpe, s->lambda);
+                s->coder->search_for_is(s, avctx, cpe);
                  if (cpe->is_mode) is_mode = 1;
              }
              if (s->coder->set_special_band_scalefactors)
@@ -721,7 +757,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
      avctx->frame_bits = put_bits_count(&s->pb);
  
      // rate control stuff
-    if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
+    if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
          float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
          s->lambda *= ratio;
          s->lambda = FFMIN(s->lambda, 65536.f);
@@ -758,7 +794,7 @@ static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
  {
      int ret = 0;
  
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
      if (!s->fdsp)
          return AVERROR(ENOMEM);
  
@@ -781,7 +817,7 @@ static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
      int ch;
      FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
      FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
-    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
+    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
  
      for(ch = 0; ch < s->channels; ch++)
          s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
@@ -869,7 +905,7 @@ static const AVOption aacenc_options[] = {
          {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
          {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
          {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
-    {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
+    {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
          {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
          {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
          {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
@@ -907,8 +943,8 @@ AVCodec ff_aac_encoder = {
      .encode2        = aac_encode_frame,
      .close          = aac_encode_end,
      .supported_samplerates = mpeg4audio_sample_rates,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
-                      CODEC_CAP_EXPERIMENTAL,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY |
+                      AV_CODEC_CAP_EXPERIMENTAL,
      .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
      .priv_class     = &aacenc_class,