r1548 broke subme < 3 + p8x8/b8x8

author Loren Merritt <pengvado@akuvian.org>

Thu, 29 Apr 2010 17:35:25 +0000 (17:35 +0000)

committer Fiona Glaser <fiona@x264.com>

Thu, 29 Apr 2010 17:52:58 +0000 (10:52 -0700)
author Loren Merritt <pengvado@akuvian.org>
Thu, 29 Apr 2010 17:35:25 +0000 (17:35 +0000)
committer Fiona Glaser <fiona@x264.com>
Thu, 29 Apr 2010 17:52:58 +0000 (10:52 -0700)
diff --git a/common/common.h b/common/common.h

index a3b5d5a95a6d487e305abe1dac4c3715609d5a27..2f35244d0e8f5a75c75ff60a89a8273959914c91 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -188,14 +188,14 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
      return amvd0 + (amvd1<<8);
  }
  
-static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
  {
      for( int i = 0; i < i_mvc; i++ )
      {
          int mx = (mvc[i][0] + 2) >> 2;
          int my = (mvc[i][1] + 2) >> 2;
-        mvc[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
-        mvc[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
+        dst[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
+        dst[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
      }
  }
  
diff --git a/common/x86/util.h b/common/x86/util.h

index 3721fab69eb70a556d8840d6fffdbdbfcaa4e4b1..03050da0592d86766b50498eb28199f205d970d0 100644 (file)
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -109,7 +109,7 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_
  }
  
  #define x264_predictor_roundclip x264_predictor_roundclip_mmxext
-static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
  {
      uint32_t mv_min = pack16to32_mask( mv_x_min, mv_y_min );
      uint32_t mv_max = pack16to32_mask( mv_x_max, mv_y_max );
@@ -123,7 +123,7 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
          "punpckldq %%mm6, %%mm6  \n"
          "test $1, %0             \n"
          "jz 1f                   \n"
-        "movd -4(%5,%0,4), %%mm0 \n"
+        "movd -4(%6,%0,4), %%mm0 \n"
          "paddw %%mm7, %%mm0      \n"
          "psraw $2, %%mm0         \n"
          "pmaxsw %%mm5, %%mm0     \n"
@@ -132,7 +132,7 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
          "dec %0                  \n"
          "jz 2f                   \n"
          "1:                      \n"
-        "movq -8(%5,%0,4), %%mm0 \n"
+        "movq -8(%6,%0,4), %%mm0 \n"
          "paddw %%mm7, %%mm0      \n"
          "psraw $2, %%mm0         \n"
          "pmaxsw %%mm5, %%mm0     \n"
@@ -141,8 +141,8 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
          "sub $2, %0              \n"
          "jnz 1b                  \n"
          "2:                      \n"
-        :"+r"(i), "+m"(M64( mvc ))
-        :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(mvc)
+        :"+r"(i), "=m"(M64( dst ))
+        :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(dst), "r"(mvc), "m"(M64( mvc ))
      );
  }
  
diff --git a/encoder/me.c b/encoder/me.c

index 3b72d5052ebde6220befcc49265590d518a56a1b..84923f3d3a673eeef4fb68790943fab367e0bdf6 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -245,14 +245,15 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
          pmv = pack16to32_mask( bmx, bmy );
          if( i_mvc > 0 )
          {
-            x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+            ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
+            x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
              bcost <<= 4;
              for( int i = 1; i <= i_mvc; i++ )
              {
-                if( M32( mvc[i-1] ) && (pmv != M32( mvc[i-1] )) )
+                if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
                  {
-                    int mx = mvc[i-1][0];
-                    int my = mvc[i-1][1];
+                    int mx = mvc_fpel[i-1][0];
+                    int my = mvc_fpel[i-1][1];
                      int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
                      cost = (cost << 4) + i;
                      COPY1_IF_LT( bcost, cost );
@@ -260,8 +261,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
              }
              if( bcost&15 )
              {
-                bmx = mvc[(bcost&15)-1][0];
-                bmy = mvc[(bcost&15)-1][1];
+                bmx = mvc_fpel[(bcost&15)-1][0];
+                bmy = mvc_fpel[(bcost&15)-1][1];
              }
              bcost >>= 4;
          }
author	Loren Merritt <pengvado@akuvian.org>
	Thu, 29 Apr 2010 17:35:25 +0000 (17:35 +0000)
committer	Fiona Glaser <fiona@x264.com>
	Thu, 29 Apr 2010 17:52:58 +0000 (10:52 -0700)
common/common.h		patch \| blob \| history
common/x86/util.h		patch \| blob \| history
encoder/me.c		patch \| blob \| history