return amvd0 + (amvd1<<8);
}
+static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+{
+ for( int i = 0; i < i_mvc; i++ )
+ {
+ int mx = (mvc[i][0] + 2) >> 2;
+ int my = (mvc[i][1] + 2) >> 2;
+ mvc[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
+ mvc[i][0] = x264_clip3( my, mv_y_min, mv_y_max );
+ }
+}
+
extern const uint8_t x264_exp2_lut[64];
extern const float x264_log2_lut[128];
extern const float x264_log2_lz_lut[32];
:"m"(M32( a )), "m"(M32( b )), "m"(M32( c ))
);
}
+
#define x264_predictor_difference x264_predictor_difference_mmxext
static ALWAYS_INLINE int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t i_mvc )
{
);
return sum;
}
+
#define x264_cabac_mvd_sum x264_cabac_mvd_sum_mmxext
static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_t *mvdtop)
{
);
return amvd;
}
+
+#define x264_predictor_roundclip x264_predictor_roundclip_mmxext
+static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+{
+ uint32_t mv_min = pack16to32_mask( mv_x_min, mv_y_min );
+ uint32_t mv_max = pack16to32_mask( mv_x_max, mv_y_max );
+ static const uint64_t pw_2 = 0x0002000200020002ULL;
+ intptr_t i = i_mvc;
+ asm(
+ "movd %2, %%mm5 \n"
+ "movd %3, %%mm6 \n"
+ "movq %4, %%mm7 \n"
+ "punpckldq %%mm5, %%mm5 \n"
+ "punpckldq %%mm6, %%mm6 \n"
+ "test $1, %0 \n"
+ "jz 1f \n"
+ "movd -4(%5,%0,4), %%mm0 \n"
+ "paddw %%mm7, %%mm0 \n"
+ "psraw $2, %%mm0 \n"
+ "pmaxsw %%mm5, %%mm0 \n"
+ "pminsw %%mm6, %%mm0 \n"
+ "movd %%mm0, -4(%5,%0,4) \n"
+ "dec %0 \n"
+ "jz 2f \n"
+ "1: \n"
+ "movq -8(%5,%0,4), %%mm0 \n"
+ "paddw %%mm7, %%mm0 \n"
+ "psraw $2, %%mm0 \n"
+ "pmaxsw %%mm5, %%mm0 \n"
+ "pminsw %%mm6, %%mm0 \n"
+ "movq %%mm0, -8(%5,%0,4) \n"
+ "sub $2, %0 \n"
+ "jnz 1b \n"
+ "2: \n"
+ :"+r"(i), "+m"(M64( mvc ))
+ :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(mvc)
+ );
+}
+
#undef M128_ZERO
#define M128_ZERO ((__m128){0,0,0,0})
#define x264_union128_t x264_union128_sse_t
* sensible to omit the cost of the MV from the rounded MVP to avoid unfairly
* biasing against use of the predicted motion vector. */
bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
+ uint32_t bmv = pack16to32_mask( bmx, bmy );
+ if( i_mvc )
+ x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
for( int i = 0; i < i_mvc; i++ )
{
- int mx = (mvc[i][0] + 2) >> 2;
- int my = (mvc[i][1] + 2) >> 2;
- if( (mx | my) && ((mx-bmx) | (my-bmy)) )
+ if( M32( mvc[i] ) && (bmv - M32( mvc[i] )) )
{
- mx = x264_clip3( mx, mv_x_min, mv_x_max );
- my = x264_clip3( my, mv_y_min, mv_y_max );
+ int mx = mvc[i][0];
+ int my = mvc[i][1];
COST_MV( mx, my );
}
}