return sum;
}
+static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
+{
+ int amvd0 = abs(mvdleft[0]) + abs(mvdtop[0]);
+ int amvd1 = abs(mvdleft[1]) + abs(mvdtop[1]);
+ amvd0 = (amvd0 > 2) + (amvd0 > 32);
+ amvd1 = (amvd1 > 2) + (amvd1 > 32);
+ return amvd0 + (amvd1<<16);
+}
+
/****************************************************************************
*
****************************************************************************/
}
else return array_non_zero_int_c( v, i_count );
}
+#define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext
+static ALWAYS_INLINE uint32_t x264_cabac_amvd_sum_mmxext(int16_t *mvdleft, int16_t *mvdtop)
+{
+ static const uint64_t pw_2 = 0x0002000200020002ULL;
+ static const uint64_t pw_28 = 0x001C001C001C001CULL;
+ static const uint64_t pw_2184 = 0x0888088808880888ULL;
+ /* MIN(((x+28)*2184)>>16,2) = (x>2) + (x>32) */
+ /* 2184 = fix16(1/30) */
+ uint32_t amvd;
+ asm(
+ "movd %1, %%mm0 \n"
+ "movd %2, %%mm1 \n"
+ "pxor %%mm2, %%mm2 \n"
+ "pxor %%mm3, %%mm3 \n"
+ "psubw %%mm0, %%mm2 \n"
+ "psubw %%mm1, %%mm3 \n"
+ "pmaxsw %%mm2, %%mm0 \n"
+ "pmaxsw %%mm3, %%mm1 \n"
+ "paddw %3, %%mm0 \n"
+ "paddw %%mm1, %%mm0 \n"
+ "pmulhuw %4, %%mm0 \n"
+ "pminsw %5, %%mm0 \n"
+ "movd %%mm0, %0 \n"
+ :"=r"(amvd)
+ :"m"(*(uint32_t*)mvdleft),"m"(*(uint32_t*)mvdtop),
+ "m"(pw_28),"m"(pw_2184),"m"(pw_2)
+ );
+ return amvd;
+}
#endif
#endif
x264_cabac_encode_decision( cb, 54 + ctx, 0 );
}
-static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd )
+static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
{
static const uint8_t ctxes[9] = { 0,3,4,5,6,6,6,6,6 };
- const int amvd = abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 1][l] ) +
- abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 8][l] );
const int i_abs = abs( mvd );
const int ctxbase = l ? 47 : 40;
- int ctx = (amvd>2) + (amvd>32);
int i;
if( i_abs == 0 )
static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width, int height )
{
DECLARE_ALIGNED_4( int16_t mvp[2] );
+ uint32_t amvd;
int mdx, mdy;
/* Calculate mvd */
x264_mb_predict_mv( h, i_list, idx, width, mvp );
mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
+ amvd = x264_cabac_amvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
+ h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
/* encode */
- x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx );
- x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy );
+ x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFFFF );
+ x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>16 );
return pack16to32_mask(mdx,mdy);
}