opus_pvq_search: only use rsqrtps approximation on CPUs with avx

[ffmpeg] / libavcodec / opusenc.c
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c

index 5f5700ea50165c9480de704a3edaa85cd0dc1412..8f2da4a7ba3cc260f6d202279e98d8fb1e5a3a88 100644 (file)
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -55,6 +55,7 @@ typedef struct OpusEncContext {
      AudioFrameQueue afq;
      AVFloatDSPContext *dsp;
      MDCT15Context *mdct[CELT_BLOCK_NB];
+    CeltPVQ *pvq;
      struct FFBufQueue bufqueue;
  
      enum OpusMode mode;
@@ -206,20 +207,18 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
  /* Create the window and do the mdct */
  static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
  {
-    int i, t, ch;
-    float *win = s->scratch;
+    int t, ch;
+    float *win = s->scratch, *temp = s->scratch + 1920;
  
-    /* I think I can use s->dsp->vector_fmul_window for transients at least */
      if (f->transient) {
          for (ch = 0; ch < f->channels; ch++) {
              CeltBlock *b = &f->block[ch];
              float *src1 = b->overlap;
              for (t = 0; t < f->blocks; t++) {
                  float *src2 = &b->samples[CELT_OVERLAP*t];
-                for (i = 0; i < CELT_OVERLAP; i++) {
-                    win[               i] = src1[i]*ff_celt_window[i];
-                    win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1];
-                }
+                s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
+                s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
+                                            ff_celt_window - 8, 128);
                  src1 = src2;
                  s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
              }
@@ -227,21 +226,21 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
      } else {
          int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
          int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
+        memset(win, 0, wlen*sizeof(float));
          for (ch = 0; ch < f->channels; ch++) {
              CeltBlock *b = &f->block[ch];
  
-            memset(win, 0, wlen*sizeof(float));
+            /* Overlap */
+            s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128);
+            memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float));
  
+            /* Samples, flat top window */
              memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
  
-            /* Alignment fucks me over */
-            //s->dsp->vector_fmul(&dst[lap_dst], b->overlap, ff_celt_window, CELT_OVERLAP);
-            //s->dsp->vector_fmul_reverse(&dst[lap_dst + blk_len - CELT_OVERLAP], b->samples, ff_celt_window, CELT_OVERLAP);
-
-            for (i = 0; i < CELT_OVERLAP; i++) {
-                win[lap_dst           + i] = b->overlap[i]       *ff_celt_window[i];
-                win[lap_dst + blk_len + i] = b->samples[rwin + i]*ff_celt_window[CELT_OVERLAP - i - 1];
-            }
+            /* Samples, windowed */
+            s->dsp->vector_fmul_reverse(temp, b->samples + rwin,
+                                        ff_celt_window - 8, 128);
+            memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
  
              s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
          }
@@ -645,10 +644,10 @@ static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
  
      if (intra) {
          alpha = 0.0f;
-        beta  = 1.0f - 4915.0f/32768.0f;
+        beta  = 1.0f - (4915.0f/32768.0f);
      } else {
          alpha = ff_celt_alpha_coef[f->size];
-        beta  = 1.0f - ff_celt_beta_coef[f->size];
+        beta  = ff_celt_beta_coef[f->size];
      }
  
      for (i = f->start_band; i < f->end_band; i++) {
@@ -797,15 +796,15 @@ static void celt_quant_bands(OpusRangeCoder *rc, CeltFrame *f)
          }
  
          if (f->dual_stereo) {
-            cm[0] = ff_celt_encode_band(f, rc, i, X, NULL, band_size, b / 2, f->blocks,
+            cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, NULL, band_size, b / 2, f->blocks,
                                          effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
                                          norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]);
  
-            cm[1] = ff_celt_encode_band(f, rc, i, Y, NULL, band_size, b / 2, f->blocks,
+            cm[1] = f->pvq->encode_band(f->pvq, f, rc, i, Y, NULL, band_size, b / 2, f->blocks,
                                          effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size,
                                          norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]);
          } else {
-            cm[0] = ff_celt_encode_band(f, rc, i, X, Y, band_size, b, f->blocks,
+            cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, Y, band_size, b, f->blocks,
                                          effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
                                          norm + band_offset, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
              cm[1] = cm[0];
@@ -883,6 +882,7 @@ static void ff_opus_psy_celt_frame_setup(OpusEncContext *s, CeltFrame *f, int in
  
      f->avctx = s->avctx;
      f->dsp = s->dsp;
+    f->pvq = s->pvq;
      f->start_band = (s->mode == OPUS_MODE_HYBRID) ? 17 : 0;
      f->end_band = ff_celt_band_end[s->bandwidth];
      f->channels = s->channels;
@@ -1019,6 +1019,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx)
      for (i = 0; i < CELT_BLOCK_NB; i++)
          ff_mdct15_uninit(&s->mdct[i]);
  
+    ff_celt_pvq_uninit(&s->pvq);
      av_freep(&s->dsp);
      av_freep(&s->frame);
      av_freep(&s->rc);
@@ -1046,8 +1047,6 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
      /* Initial padding will change if SILK is ever supported */
      avctx->initial_padding = 120;
  
-    avctx->cutoff = !avctx->cutoff ? 20000 : avctx->cutoff;
-
      if (!avctx->bit_rate) {
          int coupled = ff_opus_default_coupled_streams[s->channels - 1];
          avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);
@@ -1075,6 +1074,9 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
  
      ff_af_queue_init(avctx, &s->afq);
  
+    if ((ret = ff_celt_pvq_init(&s->pvq)) < 0)
+        return ret;
+
      if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
          return AVERROR(ENOMEM);
  
@@ -1083,8 +1085,10 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
          if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
              return AVERROR(ENOMEM);
  
-    for (i = 0; i < OPUS_MAX_FRAMES_PER_PACKET; i++)
+    for (i = 0; i < OPUS_MAX_FRAMES_PER_PACKET; i++) {
          s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
+        s->frame[i].seed = 0;
+    }
  
      /* Zero out previous energy (matters for inter first frame) */
      for (ch = 0; ch < s->channels; ch++)