Merge commit 'bd71c300f9ba7f9875bb5df17ce522e9128bae10'

[ffmpeg] / libavcodec / wmalosslessdec.c
diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c

index 99139eba78edab17e5e362f6be0db6b4f28fd4d1..ca62dc1cf98b8572e42936755ed074e39ae4c52e 100644 (file)
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -31,6 +31,7 @@
  #include "internal.h"
  #include "get_bits.h"
  #include "put_bits.h"
+#include "lossless_audiodsp.h"
  #include "wma.h"
  #include "wma_common.h"
  
@@ -46,6 +47,7 @@
  #define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS)    ///< maximum block size
  #define WMALL_BLOCK_SIZES    (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes
  
+#define WMALL_COEFF_PAD_SIZE   16                       ///< pad coef buffers with 0 for use with SIMD
  
  /**
   * @brief frame-specific decoder context for a single channel
@@ -69,6 +71,7 @@ typedef struct WmallDecodeCtx {
      /* generic decoder variables */
      AVCodecContext  *avctx;
      AVFrame         *frame;
+    LLAudDSPContext dsp;                           ///< accelerated DSP functions
      uint8_t         frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE];  ///< compressed frame data
      PutBitContext   pb;                             ///< context for filling the frame_data buffer
  
@@ -143,9 +146,9 @@ typedef struct WmallDecodeCtx {
          int scaling;
          int coefsend;
          int bitsend;
-        int16_t coefs[MAX_ORDER];
-        int16_t lms_prevvalues[MAX_ORDER * 2];
-        int16_t lms_updates[MAX_ORDER * 2];
+        DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int16_t, lms_prevvalues)[MAX_ORDER * 2];
+        DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2];
          int recent;
      } cdlms[WMALL_MAX_CHANNELS][9];
  
@@ -186,6 +189,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
      }
  
      s->avctx = avctx;
+    ff_llauddsp_init(&s->dsp);
      init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
  
      if (avctx->extradata_size >= 18) {
@@ -459,6 +463,13 @@ static int decode_cdlms(WmallDecodeCtx *s)
                  s->cdlms[0][0].order = 0;
                  return AVERROR_INVALIDDATA;
              }
+            if(s->cdlms[c][i].order & 8) {
+                static int warned;
+                if(!warned)
+                    avpriv_request_sample(s->avctx, "CDLMS of order %d",
+                                          s->cdlms[c][i].order);
+                warned = 1;
+            }
          }
  
          for (i = 0; i < s->cdlms_ttl[c]; i++)
@@ -484,6 +495,10 @@ static int decode_cdlms(WmallDecodeCtx *s)
                          (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
              }
          }
+
+        for (i = 0; i < s->cdlms_ttl[c]; i++)
+            memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order,
+                   0, WMALL_COEFF_PAD_SIZE);
      }
  
      return 0;
@@ -693,35 +708,11 @@ static void revert_mclms(WmallDecodeCtx *s, int tile_size)
      }
  }
  
-static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
-{
-    int pred = 0, icoef;
-    int recent = s->cdlms[ich][ilms].recent;
-
-    for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
-        pred += s->cdlms[ich][ilms].coefs[icoef] *
-                s->cdlms[ich][ilms].lms_prevvalues[icoef + recent];
-
-    return pred;
-}
-
-static void lms_update(WmallDecodeCtx *s, int ich, int ilms,
-                       int input, int residue)
+static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input)
  {
-    int icoef;
      int recent = s->cdlms[ich][ilms].recent;
      int range  = 1 << s->bits_per_sample - 1;
  
-    if (residue < 0) {
-        for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
-            s->cdlms[ich][ilms].coefs[icoef] -=
-                s->cdlms[ich][ilms].lms_updates[icoef + recent];
-    } else if (residue > 0) {
-        for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
-            s->cdlms[ich][ilms].coefs[icoef] +=
-                s->cdlms[ich][ilms].lms_updates[icoef + recent];
-    }
-
      if (recent)
          recent--;
      else {
@@ -782,6 +773,9 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
      s->update_speed[ich] = 8;
  }
  
+/** Get sign of integer (1 for positive, -1 for negative and 0 for zero) */
+#define WMASIGN(x) ((x > 0) - (x < 0))
+
  static void revert_cdlms(WmallDecodeCtx *s, int ch,
                           int coef_begin, int coef_end)
  {
@@ -792,9 +786,15 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch,
          for (icoef = coef_begin; icoef < coef_end; icoef++) {
              pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
              residue = s->channel_residues[ch][icoef];
-            pred += lms_predict(s, ch, ilms);
+            pred += s->dsp.scalarproduct_and_madd_int16(s->cdlms[ch][ilms].coefs,
+                                                        s->cdlms[ch][ilms].lms_prevvalues
+                                                            + s->cdlms[ch][ilms].recent,
+                                                        s->cdlms[ch][ilms].lms_updates
+                                                            + s->cdlms[ch][ilms].recent,
+                                                        s->cdlms[ch][ilms].order,
+                                                        WMASIGN(residue));
              input = residue + (pred >> s->cdlms[ch][ilms].scaling);
-            lms_update(s, ch, ilms, input, residue);
+            lms_update(s, ch, ilms, input);
              s->channel_residues[ch][icoef] = input;
          }
      }