dsputil: Move RV40-specific bits into rv40dsp

[ffmpeg] / libavcodec / vp8dsp.c
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c

index 86dc42ed37871b486ae3786166065921e61bbdcb..ad7c60309c934496aae685045e00f49132843f43 100644 (file)
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -24,11 +24,12 @@
   * VP8 compatible video decoder
   */
  
-#include "dsputil.h"
+#include "mathops.h"
  #include "vp8dsp.h"
+#include "libavutil/common.h"
  
  // TODO: Maybe add dequant
-static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
+static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
  {
      int i, t0, t1, t2, t3;
  
@@ -61,7 +62,7 @@ static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
      }
  }
  
-static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
+static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
  {
      int i, val = (dc[0] + 3) >> 3;
      dc[0] = 0;
@@ -77,11 +78,10 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
  #define MUL_20091(a) ((((a)*20091) >> 16) + (a))
  #define MUL_35468(a)  (((a)*35468) >> 16)
  
-static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
+static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
  {
      int i, t0, t1, t2, t3;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-    DCTELEM tmp[16];
+    int16_t tmp[16];
  
      for (i = 0; i < 4; i++) {
          t0 = block[0*4+i] + block[2*4+i];
@@ -105,30 +105,29 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
          t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
          t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
  
-        dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)];
-        dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)];
-        dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)];
-        dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)];
+        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
+        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
+        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
+        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
          dst += stride;
      }
  }
  
-static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
+static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
  {
      int i, dc = (block[0] + 4) >> 3;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
      block[0] = 0;
  
      for (i = 0; i < 4; i++) {
-        dst[0] = cm[dst[0]];
-        dst[1] = cm[dst[1]];
-        dst[2] = cm[dst[2]];
-        dst[3] = cm[dst[3]];
+        dst[0] = av_clip_uint8(dst[0] + dc);
+        dst[1] = av_clip_uint8(dst[1] + dc);
+        dst[2] = av_clip_uint8(dst[2] + dc);
+        dst[3] = av_clip_uint8(dst[3] + dc);
          dst += stride;
      }
  }
  
-static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride)
+static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
  {
      vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride);
      vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride);
@@ -136,7 +135,7 @@ static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t s
      vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride);
  }
  
-static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride)
+static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
  {
      vp8_idct_dc_add_c(dst+ 0, block[0], stride);
      vp8_idct_dc_add_c(dst+ 4, block[1], stride);
@@ -161,7 +160,7 @@ static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4
  {
      LOAD_PIXELS
      int a, f1, f2;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
  
      a = 3*(q0 - p0);
  
@@ -216,7 +215,7 @@ static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
  static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
  {
      int a0, a1, a2, w;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
  
      LOAD_PIXELS
  
@@ -338,7 +337,7 @@ PUT_PIXELS(4)
  static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
  { \
      const uint8_t *filter = subpel_filters[mx-1]; \
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
      int x, y; \
  \
      for (y = 0; y < h; y++) { \
@@ -352,7 +351,7 @@ static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dst
  static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
  { \
      const uint8_t *filter = subpel_filters[my-1]; \
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
      int x, y; \
  \
      for (y = 0; y < h; y++) { \
@@ -366,7 +365,7 @@ static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dst
  static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
  { \
      const uint8_t *filter = subpel_filters[mx-1]; \
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
      int x, y; \
      uint8_t tmp_array[(2*SIZE+VTAPS-1)*SIZE]; \
      uint8_t *tmp = tmp_array; \
@@ -416,7 +415,7 @@ VP8_EPEL_HV(8,  6, 6)
  VP8_EPEL_HV(4,  6, 6)
  
  #define VP8_BILINEAR(SIZE) \
-static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s2, int h, int mx, int my) \
+static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
  { \
      int a = 8-mx, b = mx; \
      int x, y; \
@@ -424,24 +423,24 @@ static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t stride, uin
      for (y = 0; y < h; y++) { \
          for (x = 0; x < SIZE; x++) \
              dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
-        dst += stride; \
-        src += stride; \
+        dst += dstride; \
+        src += sstride; \
      } \
  } \
-static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s2, int h, int mx, int my) \
+static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
  { \
      int c = 8-my, d = my; \
      int x, y; \
  \
      for (y = 0; y < h; y++) { \
          for (x = 0; x < SIZE; x++) \
-            dst[x] = (c*src[x] + d*src[x+stride] + 4) >> 3; \
-        dst += stride; \
-        src += stride; \
+            dst[x] = (c*src[x] + d*src[x+sstride] + 4) >> 3; \
+        dst += dstride; \
+        src += sstride; \
      } \
  } \
  \
-static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s2, int h, int mx, int my) \
+static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
  { \
      int a = 8-mx, b = mx; \
      int c = 8-my, d = my; \
@@ -453,7 +452,7 @@ static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t stride, ui
          for (x = 0; x < SIZE; x++) \
              tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
          tmp += SIZE; \
-        src += stride; \
+        src += sstride; \
      } \
  \
      tmp = tmp_array; \
@@ -461,7 +460,7 @@ static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t stride, ui
      for (y = 0; y < h; y++) { \
          for (x = 0; x < SIZE; x++) \
              dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \
-        dst += stride; \
+        dst += dstride; \
          tmp += SIZE; \
      } \
  }
@@ -522,10 +521,10 @@ av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
      VP8_BILINEAR_MC_FUNC(1, 8);
      VP8_BILINEAR_MC_FUNC(2, 4);
  
-    if (HAVE_MMX)
-        ff_vp8dsp_init_x86(dsp);
-    if (HAVE_ALTIVEC)
-        ff_vp8dsp_init_altivec(dsp);
      if (ARCH_ARM)
          ff_vp8dsp_init_arm(dsp);
+    if (ARCH_PPC)
+        ff_vp8dsp_init_ppc(dsp);
+    if (ARCH_X86)
+        ff_vp8dsp_init_x86(dsp);
  }