Merge commit 'ed9625eb62be1e1c44cecdd73ea0d80077a15d48'

[ffmpeg] / libavcodec / vc1dsp.c
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c

index e47bd5aff2c826911d69130dfd20afce7ea06a58..6e4a796d405cb02691243c0a2f477334288b9e08 100644 (file)
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -2,20 +2,20 @@
   * VC-1 and WMV3 decoder - DSP functions
   * Copyright (c) 2006 Konstantin Shishkov
   *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
   *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
@@ -25,8 +25,11 @@
   *
   */
  
+#include "libavutil/avassert.h"
  #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
  #include "h264chroma.h"
+#include "rnd_avg.h"
  #include "vc1dsp.h"
  
  /* Apply overlap transform to horizontal edge */
@@ -580,10 +583,10 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride,
  }
  
  /* Function used to do motion compensation with bicubic interpolation */
-#define VC1_MSPEL_MC(OP, OPNAME)                                              \
+#define VC1_MSPEL_MC(OP, OP4, OPNAME)                                         \
  static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst,             \
                                                      const uint8_t *src,       \
-                                                    int stride,               \
+                                                    ptrdiff_t stride,         \
                                                      int hmode,                \
                                                      int vmode,                \
                                                      int rnd)                  \
@@ -638,13 +641,24 @@ static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst,             \
          dst += stride;                                                        \
          src += stride;                                                        \
      }                                                                         \
+}\
+static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
+    int i;\
+    for(i=0; i<8; i++){\
+        OP4(*(uint32_t*)(block  ), AV_RN32(pixels  ));\
+        OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
  }
  
  #define op_put(a, b) a = av_clip_uint8(b)
  #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
+#define op4_avg(a, b) a = rnd_avg32(a, b)
+#define op4_put(a, b) a = b
  
-VC1_MSPEL_MC(op_put, put_)
-VC1_MSPEL_MC(op_avg, avg_)
+VC1_MSPEL_MC(op_put, op4_put, put_)
+VC1_MSPEL_MC(op_avg, op4_avg, avg_)
  
  /* pixel functions - really are entry points to vc1_mspel_mc */
  
@@ -694,7 +708,7 @@ static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
      const int D =     (x) *     (y);
      int i;
  
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
  
      for (i = 0; i < h; i++) {
          dst[0] = chroma_mc(0);
@@ -719,7 +733,7 @@ static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
      const int D =     (x) *     (y);
      int i;
  
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
  
      for (i = 0; i < h; i++) {
          dst[0] = chroma_mc(0);
@@ -742,7 +756,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
      const int D =     (x) *     (y);
      int i;
  
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
  
      for (i = 0; i < h; i++) {
          dst[0] = avg2(dst[0], chroma_mc(0));
@@ -768,7 +782,7 @@ static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
      const int D = (    x) * (    y);
      int i;
  
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
  
      for (i = 0; i < h; i++) {
          dst[0] = avg2(dst[0], chroma_mc(0));
@@ -887,7 +901,7 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
      dsp->vc1_v_loop_filter16  = vc1_v_loop_filter16_c;
      dsp->vc1_h_loop_filter16  = vc1_h_loop_filter16_c;
  
-    dsp->put_vc1_mspel_pixels_tab[0]  = ff_put_pixels8x8_c;
+    dsp->put_vc1_mspel_pixels_tab[0]  = put_pixels8x8_c;
      dsp->put_vc1_mspel_pixels_tab[1]  = put_vc1_mspel_mc10_c;
      dsp->put_vc1_mspel_pixels_tab[2]  = put_vc1_mspel_mc20_c;
      dsp->put_vc1_mspel_pixels_tab[3]  = put_vc1_mspel_mc30_c;
@@ -904,7 +918,7 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
      dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
      dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
  
-    dsp->avg_vc1_mspel_pixels_tab[0]  = ff_avg_pixels8x8_c;
+    dsp->avg_vc1_mspel_pixels_tab[0]  = avg_pixels8x8_c;
      dsp->avg_vc1_mspel_pixels_tab[1]  = avg_vc1_mspel_mc10_c;
      dsp->avg_vc1_mspel_pixels_tab[2]  = avg_vc1_mspel_mc20_c;
      dsp->avg_vc1_mspel_pixels_tab[3]  = avg_vc1_mspel_mc30_c;