vertically aligning as per Michael's suggestion

[ffmpeg] / libavcodec / dca.c
diff --git a/libavcodec/dca.c b/libavcodec/dca.c

index 98fa1f66f9337713e9fdf31cb72d95096ec76d03..a04e29ddf16cafed2335092ab975a30276c31150 100644 (file)
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -69,7 +69,7 @@ enum DCAMode {
  #define HEADER_SIZE 14
  #define CONVERT_BIAS 384
  
-#define DCA_MAX_FRAME_SIZE 16383
+#define DCA_MAX_FRAME_SIZE 16384
  
  /** Bit allocation */
  typedef struct {
@@ -84,9 +84,6 @@ static BitAlloc dca_tmode;             ///< transition mode VLCs
  static BitAlloc dca_scalefactor;       ///< scalefactor VLCs
  static BitAlloc dca_smpl_bitalloc[11]; ///< samples VLCs
  
-/** Pre-calculated cosine modulation coefs for the QMF */
-static float cos_mod[544];
-
  static av_always_inline int get_bitalloc(GetBitContext *gb, BitAlloc *ba, int idx)
  {
      return get_vlc2(gb, ba->vlc[idx].table, ba->vlc[idx].bits, ba->wrap) + ba->offset;
@@ -157,14 +154,15 @@ typedef struct {
  
      /* Subband samples history (for ADPCM) */
      float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
-    float subband_fir_hist[DCA_PRIM_CHANNELS_MAX][512];
-    float subband_fir_noidea[DCA_PRIM_CHANNELS_MAX][64];
+    DECLARE_ALIGNED_16(float, subband_fir_hist[DCA_PRIM_CHANNELS_MAX][512]);
+    float subband_fir_noidea[DCA_PRIM_CHANNELS_MAX][32];
+    int hist_index[DCA_PRIM_CHANNELS_MAX];
  
      int output;                 ///< type of output
      int bias;                   ///< output bias
  
      DECLARE_ALIGNED_16(float, samples[1536]);  /* 6 * 256 = 1536, might only need 5 */
-    DECLARE_ALIGNED_16(int16_t, tsamples[1536]);
+    const float *samples_chanptr[6];
  
      uint8_t dca_buffer[DCA_MAX_FRAME_SIZE];
      int dca_buffer_size;        ///< how much data is in the dca_buffer
@@ -176,6 +174,7 @@ typedef struct {
  
      int debug_flag;             ///< used for suppressing repeated error messages output
      DSPContext dsp;
+    MDCTContext imdct;
  } DCAContext;
  
  static av_cold void dca_init_vlcs(void)
@@ -655,15 +654,15 @@ static void qmf_32_subbands(DCAContext * s, int chans,
                              float scale, float bias)
  {
      const float *prCoeff;
-    int i, j, k;
-    float praXin[33], *raXin = &praXin[1];
+    int i, j;
+    DECLARE_ALIGNED_16(float, raXin[32]);
  
-    float *subband_fir_hist = s->subband_fir_hist[chans];
+    int hist_index= s->hist_index[chans];
      float *subband_fir_hist2 = s->subband_fir_noidea[chans];
  
-    int chindex = 0, subindex;
+    int subindex;
  
-    praXin[0] = 0.0;
+    scale *= sqrt(1/8.0);
  
      /* Select filter */
      if (!s->multirate_inter)    /* Non-perfect reconstruction */
@@ -673,50 +672,45 @@ static void qmf_32_subbands(DCAContext * s, int chans,
  
      /* Reconstructed channel sample index */
      for (subindex = 0; subindex < 8; subindex++) {
-        float t1, t2, sum[16], diff[16];
-
+        float *subband_fir_hist = s->subband_fir_hist[chans] + hist_index;
          /* Load in one sample from each subband and clear inactive subbands */
-        for (i = 0; i < s->subband_activity[chans]; i++)
-            raXin[i] = samples_in[i][subindex];
+        for (i = 0; i < s->subband_activity[chans]; i++){
+            if((i-1)&2) raXin[i] = -samples_in[i][subindex];
+            else        raXin[i] =  samples_in[i][subindex];
+        }
          for (; i < 32; i++)
              raXin[i] = 0.0;
  
-        /* Multiply by cosine modulation coefficients and
-         * create temporary arrays SUM and DIFF */
-        for (j = 0, k = 0; k < 16; k++) {
-            t1 = 0.0;
-            t2 = 0.0;
-            for (i = 0; i < 16; i++, j++){
-                t1 += (raXin[2 * i] + raXin[2 * i + 1]) * cos_mod[j];
-                t2 += (raXin[2 * i] + raXin[2 * i - 1]) * cos_mod[j + 256];
-            }
-            sum[k] = t1 + t2;
-            diff[k] = t1 - t2;
-        }
-
-        j = 512;
-        /* Store history */
-        for (k = 0; k < 16; k++)
-            subband_fir_hist[k] = cos_mod[j++] * sum[k];
-        for (k = 0; k < 16; k++)
-            subband_fir_hist[32-k-1] = cos_mod[j++] * diff[k];
+        ff_imdct_half(&s->imdct, subband_fir_hist, raXin);
  
          /* Multiply by filter coefficients */
-        for (k = 31, i = 0; i < 32; i++, k--)
-            for (j = 0; j < 512; j += 64){
-                subband_fir_hist2[i]    += prCoeff[i+j]  * ( subband_fir_hist[i+j] - subband_fir_hist[j+k]);
-                subband_fir_hist2[i+32] += prCoeff[i+j+32]*(-subband_fir_hist[i+j] - subband_fir_hist[j+k]);
+        for (i = 0; i < 16; i++){
+            float a= subband_fir_hist2[i   ];
+            float b= subband_fir_hist2[i+16];
+            float c= 0;
+            float d= 0;
+            for (j = 0; j < 512-hist_index; j += 64){
+                a += prCoeff[i+j   ]*(-subband_fir_hist[15-i+j]);
+                b += prCoeff[i+j+16]*( subband_fir_hist[   i+j]);
+                c += prCoeff[i+j+32]*( subband_fir_hist[16+i+j]);
+                d += prCoeff[i+j+48]*( subband_fir_hist[31-i+j]);
              }
+            for (     ; j < 512; j += 64){
+                a += prCoeff[i+j   ]*(-subband_fir_hist[15-i+j-512]);
+                b += prCoeff[i+j+16]*( subband_fir_hist[   i+j-512]);
+                c += prCoeff[i+j+32]*( subband_fir_hist[16+i+j-512]);
+                d += prCoeff[i+j+48]*( subband_fir_hist[31-i+j-512]);
+            }
+            samples_out[i   ] = a * scale + bias;
+            samples_out[i+16] = b * scale + bias;
+            subband_fir_hist2[i   ] = c;
+            subband_fir_hist2[i+16] = d;
+        }
+        samples_out+= 32;
  
-        /* Create 32 PCM output samples */
-        for (i = 0; i < 32; i++)
-            samples_out[chindex++] = subband_fir_hist2[i] * scale + bias;
-
-        /* Update working arrays */
-        memmove(&subband_fir_hist[32], &subband_fir_hist[0], (512 - 32) * sizeof(float));
-        memmove(&subband_fir_hist2[0], &subband_fir_hist2[32], 32 * sizeof(float));
-        memset(&subband_fir_hist2[32], 0, 32 * sizeof(float));
+        hist_index = (hist_index-32)&511;
      }
+    s->hist_index[chans]= hist_index;
  }
  
  static void lfe_interpolation_fir(int decimation_select,
@@ -990,7 +984,7 @@ static int dca_subsubframe(DCAContext * s)
  /*        static float pcm_to_double[8] =
              {32768.0, 32768.0, 524288.0, 524288.0, 0, 8388608.0, 8388608.0};*/
           qmf_32_subbands(s, k, subband_samples[k], &s->samples[256 * k],
-                            2.0 / 3 /*pcm_to_double[s->source_pcm_res] */ ,
+                            M_SQRT1_2 /*pcm_to_double[s->source_pcm_res] */ ,
                              0 /*s->bias */ );
      }
  
@@ -1116,12 +1110,12 @@ static int dca_convert_bitstream(const uint8_t * src, int src_size, uint8_t * ds
      mrk = AV_RB32(src);
      switch (mrk) {
      case DCA_MARKER_RAW_BE:
-        memcpy(dst, src, FFMIN(src_size, max_size));
-        return FFMIN(src_size, max_size);
+        memcpy(dst, src, src_size);
+        return src_size;
      case DCA_MARKER_RAW_LE:
-        for (i = 0; i < (FFMIN(src_size, max_size) + 1) >> 1; i++)
+        for (i = 0; i < (src_size + 1) >> 1; i++)
              *sdst++ = bswap_16(*ssrc++);
-        return FFMIN(src_size, max_size);
+        return src_size;
      case DCA_MARKER_14B_BE:
      case DCA_MARKER_14B_LE:
          init_put_bits(&pb, dst, max_size);
@@ -1145,7 +1139,7 @@ static int dca_decode_frame(AVCodecContext * avctx,
                              const uint8_t * buf, int buf_size)
  {
  
-    int i, j, k;
+    int i;
      int16_t *samples = data;
      DCAContext *s = avctx->priv_data;
      int channels;
@@ -1173,20 +1167,21 @@ static int dca_decode_frame(AVCodecContext * avctx,
          s->output = DCA_STEREO;
      }
  
-    avctx->channels = channels;
+    /* There is nothing that prevents a dts frame to change channel configuration
+       but FFmpeg doesn't support that so only set the channels if it is previously
+       unset. Ideally during the first probe for channels the crc should be checked
+       and only set avctx->channels when the crc is ok. Right now the decoder could
+       set the channels based on a broken first frame.*/
+    if (!avctx->channels)
+        avctx->channels = channels;
+
      if(*data_size < (s->sample_blocks / 8) * 256 * sizeof(int16_t) * channels)
          return -1;
-    *data_size = 0;
+    *data_size = 256 / 8 * s->sample_blocks * sizeof(int16_t) * channels;
      for (i = 0; i < (s->sample_blocks / 8); i++) {
          dca_decode_block(s);
-        s->dsp.float_to_int16(s->tsamples, s->samples, 256 * channels);
-        /* interleave samples */
-        for (j = 0; j < 256; j++) {
-            for (k = 0; k < channels; k++)
-                samples[k] = s->tsamples[j + k * 256];
-            samples += channels;
-        }
-        *data_size += 256 * sizeof(int16_t) * channels;
+        s->dsp.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
+        samples += 256 * channels;
      }
  
      return buf_size;
@@ -1194,36 +1189,6 @@ static int dca_decode_frame(AVCodecContext * avctx,
  
  
  
-/**
- * Build the cosine modulation tables for the QMF
- *
- * @param s     pointer to the DCAContext
- */
-
-static av_cold void pre_calc_cosmod(DCAContext * s)
-{
-    int i, j, k;
-    static int cosmod_initialized = 0;
-
-    if(cosmod_initialized) return;
-    for (j = 0, k = 0; k < 16; k++)
-        for (i = 0; i < 16; i++)
-            cos_mod[j++] = cos((2 * i + 1) * (2 * k + 1) * M_PI / 64);
-
-    for (k = 0; k < 16; k++)
-        for (i = 0; i < 16; i++)
-            cos_mod[j++] = cos((i) * (2 * k + 1) * M_PI / 32);
-
-    for (k = 0; k < 16; k++)
-        cos_mod[j++] = 0.25 / (2 * cos((2 * k + 1) * M_PI / 128));
-
-    for (k = 0; k < 16; k++)
-        cos_mod[j++] = -0.25 / (2.0 * sin((2 * k + 1) * M_PI / 128));
-
-    cosmod_initialized = 1;
-}
-
-
  /**
   * DCA initialization
   *
@@ -1233,22 +1198,31 @@ static av_cold void pre_calc_cosmod(DCAContext * s)
  static av_cold int dca_decode_init(AVCodecContext * avctx)
  {
      DCAContext *s = avctx->priv_data;
+    int i;
  
      s->avctx = avctx;
      dca_init_vlcs();
-    pre_calc_cosmod(s);
  
      dsputil_init(&s->dsp, avctx);
+    ff_mdct_init(&s->imdct, 6, 1);
  
      /* allow downmixing to stereo */
      if (avctx->channels > 0 && avctx->request_channels < avctx->channels &&
              avctx->request_channels == 2) {
          avctx->channels = avctx->request_channels;
      }
-
+    for(i = 0; i < 6; i++)
+        s->samples_chanptr[i] = s->samples + i * 256;
+    avctx->sample_fmt = SAMPLE_FMT_S16;
      return 0;
  }
  
+static av_cold int dca_decode_end(AVCodecContext * avctx)
+{
+    DCAContext *s = avctx->priv_data;
+    ff_mdct_end(&s->imdct);
+    return 0;
+}
  
  AVCodec dca_decoder = {
      .name = "dca",
@@ -1257,4 +1231,6 @@ AVCodec dca_decoder = {
      .priv_data_size = sizeof(DCAContext),
      .init = dca_decode_init,
      .decode = dca_decode_frame,
+    .close = dca_decode_end,
+    .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
  };