/*
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ * Copyright (c) 2011 Daniel Kang
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "dsputil_mmx.h"
+#if HAVE_INLINE_ASM
+
/***********************************/
/* motion compensation */
"2: \n\t"\
\
: "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "rm"(h)\
: "memory"\
);\
src += 4-(h+5)*srcStride;\
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
"2: \n\t"\
: "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "g"(size)\
+ : "c"(tmp), "S"((x86_reg)srcStride), "rm"(size)\
: "memory"\
);\
tmp += 4;\
"2: \n\t"\
\
: "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "rm"(h)\
: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", \
"%xmm4", "%xmm5", "%xmm6", "%xmm7",)\
"memory"\
QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48)
"2: \n\t"
: "+a"(src)
- : "c"(tmp), "S"((x86_reg)srcStride), "g"(size)
+ : "c"(tmp), "S"((x86_reg)srcStride), "rm"(size)
: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",)
"memory"
QPEL(avg_, 8, XMM, 16)\
QPEL(avg_, 16,XMM, 16)\
-
-#define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgusb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-#define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-
#define PAVGB "pavgusb"
QPEL_H264(put_, PUT_OP, 3dnow)
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
H264_MC_816(H264_MC_H, ssse3)
H264_MC_816(H264_MC_HV, ssse3)
#endif
+
+#endif /* HAVE_INLINE_ASM */
+
+//10bit
+#define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
+void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
+ (uint8_t *dst, uint8_t *src, int stride);
+
+#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+#define LUMA_MC_816(DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+LUMA_MC_ALL(10, mc00, mmxext)
+LUMA_MC_ALL(10, mc10, mmxext)
+LUMA_MC_ALL(10, mc20, mmxext)
+LUMA_MC_ALL(10, mc30, mmxext)
+LUMA_MC_ALL(10, mc01, mmxext)
+LUMA_MC_ALL(10, mc11, mmxext)
+LUMA_MC_ALL(10, mc21, mmxext)
+LUMA_MC_ALL(10, mc31, mmxext)
+LUMA_MC_ALL(10, mc02, mmxext)
+LUMA_MC_ALL(10, mc12, mmxext)
+LUMA_MC_ALL(10, mc22, mmxext)
+LUMA_MC_ALL(10, mc32, mmxext)
+LUMA_MC_ALL(10, mc03, mmxext)
+LUMA_MC_ALL(10, mc13, mmxext)
+LUMA_MC_ALL(10, mc23, mmxext)
+LUMA_MC_ALL(10, mc33, mmxext)
+
+LUMA_MC_816(10, mc00, sse2)
+LUMA_MC_816(10, mc10, sse2)
+LUMA_MC_816(10, mc10, sse2_cache64)
+LUMA_MC_816(10, mc10, ssse3_cache64)
+LUMA_MC_816(10, mc20, sse2)
+LUMA_MC_816(10, mc20, sse2_cache64)
+LUMA_MC_816(10, mc20, ssse3_cache64)
+LUMA_MC_816(10, mc30, sse2)
+LUMA_MC_816(10, mc30, sse2_cache64)
+LUMA_MC_816(10, mc30, ssse3_cache64)
+LUMA_MC_816(10, mc01, sse2)
+LUMA_MC_816(10, mc11, sse2)
+LUMA_MC_816(10, mc21, sse2)
+LUMA_MC_816(10, mc31, sse2)
+LUMA_MC_816(10, mc02, sse2)
+LUMA_MC_816(10, mc12, sse2)
+LUMA_MC_816(10, mc22, sse2)
+LUMA_MC_816(10, mc32, sse2)
+LUMA_MC_816(10, mc03, sse2)
+LUMA_MC_816(10, mc13, sse2)
+LUMA_MC_816(10, mc23, sse2)
+LUMA_MC_816(10, mc33, sse2)
+
+#define QPEL16_OPMC(OP, MC, MMX)\
+void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+ src += 8*stride;\
+ dst += 8*stride;\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+}
+
+#define QPEL16_OP(MC, MMX)\
+QPEL16_OPMC(put, MC, MMX)\
+QPEL16_OPMC(avg, MC, MMX)
+
+#define QPEL16(MMX)\
+QPEL16_OP(mc00, MMX)\
+QPEL16_OP(mc01, MMX)\
+QPEL16_OP(mc02, MMX)\
+QPEL16_OP(mc03, MMX)\
+QPEL16_OP(mc10, MMX)\
+QPEL16_OP(mc11, MMX)\
+QPEL16_OP(mc12, MMX)\
+QPEL16_OP(mc13, MMX)\
+QPEL16_OP(mc20, MMX)\
+QPEL16_OP(mc21, MMX)\
+QPEL16_OP(mc22, MMX)\
+QPEL16_OP(mc23, MMX)\
+QPEL16_OP(mc30, MMX)\
+QPEL16_OP(mc31, MMX)\
+QPEL16_OP(mc32, MMX)\
+QPEL16_OP(mc33, MMX)
+
+#if ARCH_X86_32 && HAVE_YASM // ARCH_X86_64 implies sse2+
+QPEL16(mmxext)
+#endif