H.264: disable 2tap qpel with CODEC_FLAG2_FAST and >8-bit

[ffmpeg] / libavcodec / aacsbr.c
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c

index 27c379e15b4aa151d69f611491b85cd049b01429..81b0b4c0013bbea80a518f0cfec49dd866e188e7 100644 (file)
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -3,20 +3,20 @@
   * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
   * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
   *
- * This file is part of FFmpeg.
+ * This file is part of Libav.
   *
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
@@ -31,6 +31,8 @@
  #include "aacsbr.h"
  #include "aacsbrdata.h"
  #include "fft.h"
+#include "aacps.h"
+#include "libavutil/libm.h"
  
  #include <stdint.h>
  #include <float.h>
@@ -71,9 +73,6 @@ enum {
  static VLC vlc_sbr[10];
  static const int8_t vlc_sbr_lav[10] =
      { 60, 60, 24, 24, 31, 31, 12, 12, 31, 12 };
-static DECLARE_ALIGNED(16, float, analysis_cos_pre)[64];
-static DECLARE_ALIGNED(16, float, analysis_sin_pre)[64];
-static DECLARE_ALIGNED(16, float, analysis_cossin_post)[32][2];
  static const DECLARE_ALIGNED(16, float, zero64)[64];
  
  #define SBR_INIT_VLC_STATIC(num, size) \
@@ -87,7 +86,7 @@ static const DECLARE_ALIGNED(16, float, zero64)[64];
  
  av_cold void ff_aac_sbr_init(void)
  {
-    int n, k;
+    int n;
      static const struct {
          const void *sbr_codes, *sbr_bits;
          const unsigned int table_size, elem_size;
@@ -116,16 +115,6 @@ av_cold void ff_aac_sbr_init(void)
      SBR_INIT_VLC_STATIC(8, 592);
      SBR_INIT_VLC_STATIC(9, 512);
  
-    for (n = 0; n < 64; n++) {
-        float pre = M_PI * n / 64;
-        analysis_cos_pre[n] = cosf(pre);
-        analysis_sin_pre[n] = sinf(pre);
-    }
-    for (k = 0; k < 32; k++) {
-        float post = M_PI * (k + 0.5) / 128;
-        analysis_cossin_post[k][0] =  4.0 * cosf(post);
-        analysis_cossin_post[k][1] = -4.0 * sinf(post);
-    }
      for (n = 1; n < 320; n++)
          sbr_qmf_window_us[320 + n] = sbr_qmf_window_us[320 - n];
      sbr_qmf_window_us[384] = -sbr_qmf_window_us[384];
@@ -133,22 +122,30 @@ av_cold void ff_aac_sbr_init(void)
  
      for (n = 0; n < 320; n++)
          sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
+
+    ff_ps_init();
  }
  
-av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr)
+av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr)
  {
+    float mdct_scale;
      sbr->kx[0] = sbr->kx[1] = 32; //Typo in spec, kx' inits to 32
      sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1;
      sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
      sbr->data[1].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
-    ff_mdct_init(&sbr->mdct, 7, 1, 1.0/64);
-    ff_rdft_init(&sbr->rdft, 6, IDFT_R2C);
+    /* SBR requires samples to be scaled to +/-32768.0 to work correctly.
+     * mdct scale factors are adjusted to scale up from +/-1.0 at analysis
+     * and scale back down at synthesis. */
+    mdct_scale = ac->avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? 32768.0f : 1.0f;
+    ff_mdct_init(&sbr->mdct,     7, 1, 1.0 / (64 * mdct_scale));
+    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * mdct_scale);
+    ff_ps_ctx_init(&sbr->ps);
  }
  
  av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr)
  {
      ff_mdct_end(&sbr->mdct);
-    ff_rdft_end(&sbr->rdft);
+    ff_mdct_end(&sbr->mdct_ana);
  }
  
  static int qsort_comparison_function_int16(const void *a, const void *b)
@@ -388,8 +385,7 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
      }
  
      if (!spectrum->bs_freq_scale) {
-        unsigned int dk;
-        int k2diff;
+        int dk, k2diff;
  
          dk = spectrum->bs_alter_scale + 1;
          sbr->n_master = ((sbr->k[2] - sbr->k[0] + (dk&2)) >> dk) << 1;
@@ -402,7 +398,7 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
          k2diff = sbr->k[2] - sbr->k[0] - sbr->n_master * dk;
          if (k2diff < 0) {
              sbr->f_master[1]--;
-            sbr->f_master[2]-= (k2diff < 1);
+            sbr->f_master[2]-= (k2diff < -1);
          } else if (k2diff) {
              sbr->f_master[sbr->n_master]++;
          }
@@ -903,7 +899,6 @@ static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
                                 GetBitContext *gb,
                                 int bs_extension_id, int *num_bits_left)
  {
-//TODO - implement ps_data for parametric stereo parsing
      switch (bs_extension_id) {
      case EXTENSION_ID_PS:
          if (!ac->m4ac.ps) {
@@ -911,8 +906,8 @@ static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
              skip_bits_long(gb, *num_bits_left); // bs_fill_bits
              *num_bits_left = 0;
          } else {
-#if 0
-            *num_bits_left -= ff_ps_data(gb, ps);
+#if 1
+            *num_bits_left -= ff_ps_read_data(ac->avctx, gb, &sbr->ps, *num_bits_left);
  #else
              av_log_missing_feature(ac->avctx, "Parametric Stereo is", 0);
              skip_bits_long(gb, *num_bits_left); // bs_fill_bits
@@ -1021,6 +1016,11 @@ static unsigned int read_sbr_data(AACContext *ac, SpectralBandReplication *sbr,
              num_bits_left -= 2;
              read_sbr_extension(ac, sbr, gb, get_bits(gb, 2), &num_bits_left); // bs_extension_id
          }
+        if (num_bits_left < 0) {
+            av_log(ac->avctx, AV_LOG_ERROR, "SBR Extension over read.\n");
+        }
+        if (num_bits_left > 0)
+            skip_bits(gb, num_bits_left);
      }
  
      return get_bits_count(gb) - cnt;
@@ -1139,36 +1139,32 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
   * @param   x       pointer to the beginning of the first sample window
   * @param   W       array of complex-valued samples split into subbands
   */
-static void sbr_qmf_analysis(DSPContext *dsp, RDFTContext *rdft, const float *in, float *x,
-                             float z[320], float W[2][32][32][2],
-                             float scale)
+static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct, const float *in, float *x,
+                             float z[320], float W[2][32][32][2])
  {
      int i, k;
      memcpy(W[0], W[1], sizeof(W[0]));
      memcpy(x    , x+1024, (320-32)*sizeof(x[0]));
-    if (scale != 1.0f)
-        dsp->vector_fmul_scalar(x+288, in, scale, 1024);
-    else
-        memcpy(x+288, in, 1024*sizeof(*x));
+    memcpy(x+288, in,         1024*sizeof(x[0]));
      for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
                                 // are not supported
-        float re, im;
          dsp->vector_fmul_reverse(z, sbr_qmf_window_ds, x, 320);
          for (k = 0; k < 64; k++) {
              float f = z[k] + z[k + 64] + z[k + 128] + z[k + 192] + z[k + 256];
-            z[k] = f * analysis_cos_pre[k];
-            z[k+64] = f;
+            z[k] = f;
          }
-        ff_rdft_calc(rdft, z);
-        re = z[0] * 0.5f;
-        im = 0.5f * dsp->scalarproduct_float(z+64, analysis_sin_pre, 64);
-        W[1][i][0][0] = re * analysis_cossin_post[0][0] - im * analysis_cossin_post[0][1];
-        W[1][i][0][1] = re * analysis_cossin_post[0][1] + im * analysis_cossin_post[0][0];
+        //Shuffle to IMDCT
+        z[64] = z[0];
          for (k = 1; k < 32; k++) {
-            re = z[2*k  ] - re;
-            im = z[2*k+1] - im;
-            W[1][i][k][0] = re * analysis_cossin_post[k][0] - im * analysis_cossin_post[k][1];
-            W[1][i][k][1] = re * analysis_cossin_post[k][1] + im * analysis_cossin_post[k][0];
+            z[64+2*k-1] =  z[   k];
+            z[64+2*k  ] = -z[64-k];
+        }
+        z[64+63] = z[32];
+
+        mdct->imdct_half(mdct, z, z+64);
+        for (k = 0; k < 32; k++) {
+            W[1][i][k][0] = -z[63-k];
+            W[1][i][k][1] = z[k];
          }
          x += 32;
      }
@@ -1179,14 +1175,12 @@ static void sbr_qmf_analysis(DSPContext *dsp, RDFTContext *rdft, const float *in
   * (14496-3 sp04 p206)
   */
  static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
-                              float *out, float X[2][32][64],
+                              float *out, float X[2][38][64],
                                float mdct_buf[2][64],
-                              float *v0, int *v_off, const unsigned int div,
-                              float bias, float scale)
+                              float *v0, int *v_off, const unsigned int div)
  {
      int i, n;
      const float *sbr_qmf_window = div ? sbr_qmf_window_ds : sbr_qmf_window_us;
-    int scale_and_bias = scale != 1.0f || bias != 0.0f;
      float *v;
      for (i = 0; i < 32; i++) {
          if (*v_off == 0) {
@@ -1197,21 +1191,22 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
              *v_off -= 128 >> div;
          }
          v = v0 + *v_off;
-        for (n = 1; n < 64 >> div; n+=2) {
-            X[1][i][n] = -X[1][i][n];
-        }
-        if (div) {
-            memset(X[0][i]+32, 0, 32*sizeof(float));
-            memset(X[1][i]+32, 0, 32*sizeof(float));
-        }
-        ff_imdct_half(mdct, mdct_buf[0], X[0][i]);
-        ff_imdct_half(mdct, mdct_buf[1], X[1][i]);
          if (div) {
              for (n = 0; n < 32; n++) {
-                v[      n] = -mdct_buf[0][63 - 2*n] + mdct_buf[1][2*n    ];
-                v[ 63 - n] =  mdct_buf[0][62 - 2*n] + mdct_buf[1][2*n + 1];
+                X[0][i][   n] = -X[0][i][n];
+                X[0][i][32+n] =  X[1][i][31-n];
+            }
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
+            for (n = 0; n < 32; n++) {
+                v[     n] =  mdct_buf[0][63 - 2*n];
+                v[63 - n] = -mdct_buf[0][62 - 2*n];
              }
          } else {
+            for (n = 1; n < 64; n+=2) {
+                X[1][i][n] = -X[1][i][n];
+            }
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
+            mdct->imdct_half(mdct, mdct_buf[1], X[1][i]);
              for (n = 0; n < 64; n++) {
                  v[      n] = -mdct_buf[0][63 -   n] + mdct_buf[1][  n    ];
                  v[127 - n] =  mdct_buf[0][63 -   n] + mdct_buf[1][  n    ];
@@ -1227,9 +1222,6 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
          dsp->vector_fmul_add(out, v + ( 960 >> div), sbr_qmf_window + (448 >> div), out   , 64 >> div);
          dsp->vector_fmul_add(out, v + (1024 >> div), sbr_qmf_window + (512 >> div), out   , 64 >> div);
          dsp->vector_fmul_add(out, v + (1216 >> div), sbr_qmf_window + (576 >> div), out   , 64 >> div);
-        if (scale_and_bias)
-            for (n = 0; n < 64 >> div; n++)
-                out[n] = out[n] * scale + bias;
          out += 64 >> div;
      }
  }
@@ -1414,7 +1406,7 @@ static int sbr_hf_gen(AACContext *ac, SpectralBandReplication *sbr,
  }
  
  /// Generate the subband filtered lowband
-static int sbr_x_gen(SpectralBandReplication *sbr, float X[2][32][64],
+static int sbr_x_gen(SpectralBandReplication *sbr, float X[2][38][64],
                       const float X_low[32][40][2], const float Y[2][38][64][2],
                       int ch)
  {
@@ -1436,7 +1428,7 @@ static int sbr_x_gen(SpectralBandReplication *sbr, float X[2][32][64],
      }
  
      for (k = 0; k < sbr->kx[1]; k++) {
-        for (i = i_Temp; i < i_f; i++) {
+        for (i = i_Temp; i < 38; i++) {
              X[0][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][0];
              X[1][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][1];
          }
@@ -1585,7 +1577,7 @@ static void sbr_gain_calc(AACContext *ac, SpectralBandReplication *sbr,
                  sum[1] += sbr->e_curr[e][m];
              }
              gain_max = limgain[sbr->bs_limiter_gains] * sqrtf((FLT_EPSILON + sum[0]) / (FLT_EPSILON + sum[1]));
-            gain_max = FFMIN(100000, gain_max);
+            gain_max = FFMIN(100000.f, gain_max);
              for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
                  float q_m_max   = sbr->q_m[e][m] * gain_max / sbr->gain[e][m];
                  sbr->q_m[e][m]  = FFMIN(sbr->q_m[e][m], q_m_max);
@@ -1599,7 +1591,7 @@ static void sbr_gain_calc(AACContext *ac, SpectralBandReplication *sbr,
                            + (delta && !sbr->s_m[e][m]) * sbr->q_m[e][m] * sbr->q_m[e][m];
              }
              gain_boost = sqrtf((FLT_EPSILON + sum[0]) / (FLT_EPSILON + sum[1]));
-            gain_boost = FFMIN(1.584893192, gain_boost);
+            gain_boost = FFMIN(1.584893192f, gain_boost);
              for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
                  sbr->gain[e][m] *= gain_boost;
                  sbr->q_m[e][m]  *= gain_boost;
@@ -1730,9 +1722,9 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
      }
      for (ch = 0; ch < nch; ch++) {
          /* decode channel */
-        sbr_qmf_analysis(&ac->dsp, &sbr->rdft, ch ? R : L, sbr->data[ch].analysis_filterbank_samples,
+        sbr_qmf_analysis(&ac->dsp, &sbr->mdct_ana, ch ? R : L, sbr->data[ch].analysis_filterbank_samples,
                           (float*)sbr->qmf_filter_scratch,
-                         sbr->data[ch].W, 1/(-1024 * ac->sf_scale));
+                         sbr->data[ch].W);
          sbr_lf_gen(ac, sbr, sbr->X_low, sbr->data[ch].W);
          if (sbr->start) {
              sbr_hf_inverse_filter(sbr->alpha0, sbr->alpha1, sbr->X_low, sbr->k[0]);
@@ -1752,15 +1744,23 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
          /* synthesis */
          sbr_x_gen(sbr, sbr->X[ch], sbr->X_low, sbr->data[ch].Y, ch);
      }
+
+    if (ac->m4ac.ps == 1) {
+        if (sbr->ps.start) {
+            ff_ps_apply(ac->avctx, &sbr->ps, sbr->X[0], sbr->X[1], sbr->kx[1] + sbr->m[1]);
+        } else {
+            memcpy(sbr->X[1], sbr->X[0], sizeof(sbr->X[0]));
+        }
+        nch = 2;
+    }
+
      sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, L, sbr->X[0], sbr->qmf_filter_scratch,
                        sbr->data[0].synthesis_filterbank_samples,
                        &sbr->data[0].synthesis_filterbank_samples_offset,
-                      downsampled,
-                      ac->add_bias, -1024 * ac->sf_scale);
+                      downsampled);
      if (nch == 2)
          sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, R, sbr->X[1], sbr->qmf_filter_scratch,
                            sbr->data[1].synthesis_filterbank_samples,
                            &sbr->data[1].synthesis_filterbank_samples_offset,
-                          downsampled,
-                          ac->add_bias, -1024 * ac->sf_scale);
+                          downsampled);
  }