+#endif // !HIGH_BIT_DEPTH
+
+#define INTRA_MBCMP_8x8( mbcmp, cpu, cpu2 )\
+void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\
+{\
+ ALIGNED_ARRAY_16( pixel, pix, [8*FDEC_STRIDE] );\
+ x264_predict_8x8_v##cpu2( pix, edge );\
+ res[0] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_8x8_h##cpu2( pix, edge );\
+ res[1] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_8x8_dc##cpu2( pix, edge );\
+ res[2] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+}
+
+INTRA_MBCMP_8x8( sad,, _c )
+INTRA_MBCMP_8x8(sa8d,, _c )
+#if HIGH_BIT_DEPTH && HAVE_MMX
+#define x264_predict_8x8_v_sse2 x264_predict_8x8_v_sse
+INTRA_MBCMP_8x8( sad, _mmx2, _c )
+INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 )
+#endif
+
+#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
+void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
+{\
+ x264_predict_##size##chroma##_##pred1##cpu2( fdec );\
+ res[0] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##chroma##_##pred2##cpu2( fdec );\
+ res[1] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##chroma##_##pred3##cpu2( fdec );\
+ res[2] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+}
+
+INTRA_MBCMP( sad, 4x4, v, h, dc, ,, _c )
+INTRA_MBCMP(satd, 4x4, v, h, dc, ,, _c )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c,, _c )
+INTRA_MBCMP(satd, 8x8, dc, h, v, c,, _c )
+INTRA_MBCMP( sad, 8x16, dc, h, v, c,, _c )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c,, _c )
+INTRA_MBCMP( sad, 16x16, v, h, dc, ,, _c )
+INTRA_MBCMP(satd, 16x16, v, h, dc, ,, _c )
+
+#if HAVE_MMX
+#if HIGH_BIT_DEPTH
+#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
+#define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
+#define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
+#define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
+INTRA_MBCMP( sad, 4x4, v, h, dc, , _mmx2, _c )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _mmx2 )
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _mmx2, _mmx2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _mmx2, _mmx2 )
+INTRA_MBCMP( sad, 16x16, v, h, dc, , _mmx2, _mmx2 )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _sse2, _sse2 )
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _sse2, _sse2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse2, _sse2 )
+INTRA_MBCMP( sad, 16x16, v, h, dc, , _sse2, _sse2 )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _ssse3, _sse2 )
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _ssse3, _sse2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _ssse3, _sse2 )
+INTRA_MBCMP( sad, 16x16, v, h, dc, , _ssse3, _sse2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse4, _sse2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _avx, _sse2 )
+#else
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _mmx2, _mmx2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _mmx2, _mmx2 )
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _sse2, _mmx2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse2, _mmx2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _ssse3, _mmx2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse4, _mmx2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _avx, _mmx2 )
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _xop, _mmx2 )
+#endif
+#endif
+
+// No C implementation of intra_satd_x9. See checkasm for its behavior,
+// or see x264_mb_analyse_intra for the entirely different algorithm we
+// use when lacking an asm implementation of it.
+
+