]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/opusenc.c
opus_pvq_search: only use rsqrtps approximation on CPUs with avx
[ffmpeg] / libavcodec / opusenc.c
index 303f11f7e7d0414c032850aef5117a95d935dc84..8f2da4a7ba3cc260f6d202279e98d8fb1e5a3a88 100644 (file)
@@ -207,20 +207,18 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
 /* Create the window and do the mdct */
 static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
 {
-    int i, t, ch;
-    float *win = s->scratch;
+    int t, ch;
+    float *win = s->scratch, *temp = s->scratch + 1920;
 
-    /* I think I can use s->dsp->vector_fmul_window for transients at least */
     if (f->transient) {
         for (ch = 0; ch < f->channels; ch++) {
             CeltBlock *b = &f->block[ch];
             float *src1 = b->overlap;
             for (t = 0; t < f->blocks; t++) {
                 float *src2 = &b->samples[CELT_OVERLAP*t];
-                for (i = 0; i < CELT_OVERLAP; i++) {
-                    win[               i] = src1[i]*ff_celt_window[i];
-                    win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1];
-                }
+                s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
+                s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
+                                            ff_celt_window - 8, 128);
                 src1 = src2;
                 s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
             }
@@ -228,21 +226,21 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
     } else {
         int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
         int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
+        memset(win, 0, wlen*sizeof(float));
         for (ch = 0; ch < f->channels; ch++) {
             CeltBlock *b = &f->block[ch];
 
-            memset(win, 0, wlen*sizeof(float));
+            /* Overlap */
+            s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128);
+            memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float));
 
+            /* Samples, flat top window */
             memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
 
-            /* Alignment fucks me over */
-            //s->dsp->vector_fmul(&dst[lap_dst], b->overlap, ff_celt_window, CELT_OVERLAP);
-            //s->dsp->vector_fmul_reverse(&dst[lap_dst + blk_len - CELT_OVERLAP], b->samples, ff_celt_window, CELT_OVERLAP);
-
-            for (i = 0; i < CELT_OVERLAP; i++) {
-                win[lap_dst           + i] = b->overlap[i]       *ff_celt_window[i];
-                win[lap_dst + blk_len + i] = b->samples[rwin + i]*ff_celt_window[CELT_OVERLAP - i - 1];
-            }
+            /* Samples, windowed */
+            s->dsp->vector_fmul_reverse(temp, b->samples + rwin,
+                                        ff_celt_window - 8, 128);
+            memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
 
             s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
         }
@@ -646,10 +644,10 @@ static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
 
     if (intra) {
         alpha = 0.0f;
-        beta  = 1.0f - 4915.0f/32768.0f;
+        beta  = 1.0f - (4915.0f/32768.0f);
     } else {
         alpha = ff_celt_alpha_coef[f->size];
-        beta  = 1.0f - ff_celt_beta_coef[f->size];
+        beta  = ff_celt_beta_coef[f->size];
     }
 
     for (i = f->start_band; i < f->end_band; i++) {
@@ -1049,8 +1047,6 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
     /* Initial padding will change if SILK is ever supported */
     avctx->initial_padding = 120;
 
-    avctx->cutoff = !avctx->cutoff ? 20000 : avctx->cutoff;
-
     if (!avctx->bit_rate) {
         int coupled = ff_opus_default_coupled_streams[s->channels - 1];
         avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);