1 ;*****************************************************************************
2 ;* MMX/SSE2/SSSE3-optimized H.264 QPEL code
3 ;*****************************************************************************
4 ;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
5 ;* Copyright (C) 2012 Daniel Kang
7 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
9 ;* This file is part of FFmpeg.
11 ;* FFmpeg is free software; you can redistribute it and/or
12 ;* modify it under the terms of the GNU Lesser General Public
13 ;* License as published by the Free Software Foundation; either
14 ;* version 2.1 of the License, or (at your option) any later version.
16 ;* FFmpeg is distributed in the hope that it will be useful,
17 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;* Lesser General Public License for more details.
21 ;* You should have received a copy of the GNU Lesser General Public
22 ;* License along with FFmpeg; if not, write to the Free Software
23 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 ;******************************************************************************
26 %include "libavutil/x86/x86util.asm"
56 %macro QPEL4_H_LOWPASS_OP 1
57 cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
96 QPEL4_H_LOWPASS_OP put
97 QPEL4_H_LOWPASS_OP avg
99 %macro QPEL8_H_LOWPASS_OP 1
100 cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
156 QPEL8_H_LOWPASS_OP put
157 QPEL8_H_LOWPASS_OP avg
159 %macro QPEL8_H_LOWPASS_OP_XMM 1
160 cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
199 QPEL8_H_LOWPASS_OP_XMM put
200 QPEL8_H_LOWPASS_OP_XMM avg
203 %macro QPEL4_H_LOWPASS_L2_OP 1
204 cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
246 QPEL4_H_LOWPASS_L2_OP put
247 QPEL4_H_LOWPASS_L2_OP avg
250 %macro QPEL8_H_LOWPASS_L2_OP 1
251 cglobal %1_h264_qpel8_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
310 QPEL8_H_LOWPASS_L2_OP put
311 QPEL8_H_LOWPASS_L2_OP avg
314 %macro QPEL8_H_LOWPASS_L2_OP_XMM 1
315 cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Stride
357 QPEL8_H_LOWPASS_L2_OP_XMM put
358 QPEL8_H_LOWPASS_L2_OP_XMM avg
361 ; All functions that call this are required to have function arguments of
362 ; dst, src, dstStride, srcStride
378 op_%1h m6, [r0], m0 ; 1
380 SWAP 0, 1, 2, 3, 4, 5
383 %macro QPEL4_V_LOWPASS_OP 1
384 cglobal %1_h264_qpel4_v_lowpass, 4,4 ; dst, src, dstStride, srcStride
411 QPEL4_V_LOWPASS_OP put
412 QPEL4_V_LOWPASS_OP avg
416 %macro QPEL8OR16_V_LOWPASS_OP 1
418 cglobal %1_h264_qpel8or16_v_lowpass, 5,5,8 ; dst, src, dstStride, srcStride, h
424 cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride, h
465 QPEL8OR16_V_LOWPASS_OP put
466 QPEL8OR16_V_LOWPASS_OP avg
469 QPEL8OR16_V_LOWPASS_OP put
470 QPEL8OR16_V_LOWPASS_OP avg
473 ; All functions that use this are required to have args:
475 %macro FILT_HV 1 ; offset
489 SWAP 0, 1, 2, 3, 4, 5
492 %macro QPEL4_HV1_LOWPASS_OP 1
493 cglobal %1_h264_qpel4_hv_lowpass_v, 3,3 ; src, tmp, srcStride
515 cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
542 QPEL4_HV1_LOWPASS_OP put
543 QPEL4_HV1_LOWPASS_OP avg
545 %macro QPEL8OR16_HV1_LOWPASS_OP 1
546 cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
585 QPEL8OR16_HV1_LOWPASS_OP put
586 QPEL8OR16_HV1_LOWPASS_OP avg
589 QPEL8OR16_HV1_LOWPASS_OP put
593 %macro QPEL8OR16_HV2_LOWPASS_OP 1
594 ; unused is to match ssse3 and mmxext args
595 cglobal %1_h264_qpel8or16_hv2_lowpass_op, 5,5 ; dst, tmp, dstStride, unused, h
634 QPEL8OR16_HV2_LOWPASS_OP put
635 QPEL8OR16_HV2_LOWPASS_OP avg
637 %macro QPEL8OR16_HV2_LOWPASS_OP_XMM 1
638 cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, size
725 QPEL8OR16_HV2_LOWPASS_OP_XMM put
726 QPEL8OR16_HV2_LOWPASS_OP_XMM avg
729 %macro PIXELS4_L2_SHIFT5 1
730 cglobal %1_pixels4_l2_shift5,6,6 ; dst, src16, src8, dstStride, src8Stride, h
742 op_%1h m1, [r0+r3], m5
754 op_%1h m1, [r0+r3], m5
759 PIXELS4_L2_SHIFT5 put
760 PIXELS4_L2_SHIFT5 avg
763 %macro PIXELS8_L2_SHIFT5 1
764 cglobal %1_pixels8_l2_shift5, 6, 6 ; dst, src16, src8, dstStride, src8Stride, h
781 op_%1 m2, [r0+r3], m5
791 PIXELS8_L2_SHIFT5 put
792 PIXELS8_L2_SHIFT5 avg
796 %macro QPEL16_H_LOWPASS_L2_OP 1
797 cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2Stride
860 QPEL16_H_LOWPASS_L2_OP put
861 QPEL16_H_LOWPASS_L2_OP avg