+#define CP128(dst,src) M128(dst) = M128(src)
+
+#if HIGH_BIT_DEPTH
+ typedef uint16_t pixel;
+ typedef uint64_t pixel4;
+ typedef int32_t dctcoef;
+ typedef uint32_t udctcoef;
+
+# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+# define MPIXEL_X4(src) M64(src)
+#else
+ typedef uint8_t pixel;
+ typedef uint32_t pixel4;
+ typedef int16_t dctcoef;
+ typedef uint16_t udctcoef;
+
+# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+# define MPIXEL_X4(src) M32(src)
+#endif
+
+#define BIT_DEPTH X264_BIT_DEPTH
+
+#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
+
+#define X264_SCAN8_SIZE (6*8)
+#define X264_SCAN8_LUMA_SIZE (5*8)
+#define X264_SCAN8_0 (4+1*8)
+
+static const unsigned x264_scan8[16+2*4+3] =
+{
+ /* Luma */
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+
+ /* Cb */
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+
+ /* Cr */
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+
+ /* Luma DC */
+ 4+5*8,
+
+ /* Chroma DC */
+ 6+5*8, 7+5*8
+};
+/*
+ 0 1 2 3 4 5 6 7
+ 0
+ 1 B B L L L L
+ 2 B B L L L L
+ 3 L L L L
+ 4 R R L L L L
+ 5 R R Dy DuDv
+*/