Implement a SIMD version of emulated_edge_mc() for x86.

[ffmpeg] / libavcodec / iirfilter.c
diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c

index 6133a5405519126f53c4b311822627123354512d..bc63c3991a6f820d2a3f6aa9c0396fe31f610c5f 100644 (file)
--- a/libavcodec/iirfilter.c
+++ b/libavcodec/iirfilter.c
@@ -137,15 +137,15 @@ static int biquad_init_coeffs(void *avc, struct FFIIRFilterCoeffs *c,
  
      if (filt_mode == FF_FILTER_MODE_HIGHPASS) {
          c->gain  =  ((1.0 + cos_w0) / 2.0)  / a0;
-        x0       = (-(1.0 + cos_w0))        / a0;
-        x1       =  ((1.0 + cos_w0) / 2.0)  / a0;
+        x0       =  ((1.0 + cos_w0) / 2.0)  / a0;
+        x1       = (-(1.0 + cos_w0))        / a0;
      } else { // FF_FILTER_MODE_LOWPASS
          c->gain  =  ((1.0 - cos_w0) / 2.0)  / a0;
-        x0       =   (1.0 - cos_w0)         / a0;
-        x1       =  ((1.0 - cos_w0) / 2.0)  / a0;
+        x0       =  ((1.0 - cos_w0) / 2.0)  / a0;
+        x1       =   (1.0 - cos_w0)         / a0;
      }
-    c->cy[0] =  (2.0 *  cos_w0)        / a0;
-    c->cy[1] = (-1.0 + (sin_w0 / 2.0)) / a0;
+    c->cy[0] = (-1.0 + (sin_w0 / 2.0)) / a0;
+    c->cy[1] =  (2.0 *  cos_w0)        / a0;
  
      // divide by gain to make the x coeffs integers.
      // during filtering, the delay state will include the gain multiplication
@@ -256,11 +256,29 @@ av_cold struct FFIIRFilterState* ff_iir_filter_init_state(int order)
      }                                                                       \
  }
  
+#define FILTER_O2(type, fmt) {                                              \
+    int i;                                                                  \
+    const type *src0 = src;                                                 \
+    type       *dst0 = dst;                                                 \
+    for (i = 0; i < size; i++) {                                            \
+        float in = *src0   * c->gain  +                                     \
+                   s->x[0] * c->cy[0] +                                     \
+                   s->x[1] * c->cy[1];                                      \
+        CONV_##fmt(*dst0, s->x[0] + in + s->x[1] * c->cx[1])                \
+        s->x[0] = s->x[1];                                                  \
+        s->x[1] = in;                                                       \
+        src0 += sstep;                                                      \
+        dst0 += dstep;                                                      \
+    }                                                                       \
+}
+
  void ff_iir_filter(const struct FFIIRFilterCoeffs *c,
                     struct FFIIRFilterState *s, int size,
                     const int16_t *src, int sstep, int16_t *dst, int dstep)
  {
-    if (c->order == 4) {
+    if (c->order == 2) {
+        FILTER_O2(int16_t, S16)
+    } else if (c->order == 4) {
          FILTER_BW_O4(int16_t, S16)
      } else {
          FILTER_DIRECT_FORM_II(int16_t, S16)
@@ -269,9 +287,11 @@ void ff_iir_filter(const struct FFIIRFilterCoeffs *c,
  
  void ff_iir_filter_flt(const struct FFIIRFilterCoeffs *c,
                         struct FFIIRFilterState *s, int size,
-                       const float *src, int sstep, void *dst, int dstep)
+                       const float *src, int sstep, float *dst, int dstep)
  {
-    if (c->order == 4) {
+    if (c->order == 2) {
+        FILTER_O2(float, FLT)
+    } else if (c->order == 4) {
          FILTER_BW_O4(float, FLT)
      } else {
          FILTER_DIRECT_FORM_II(float, FLT)