1 ;*****************************************************************************
2 ;* SSE2-optimized weighted prediction code
3 ;*****************************************************************************
4 ;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
5 ;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com>
7 ;* This file is part of FFmpeg.
9 ;* FFmpeg is free software; you can redistribute it and/or
10 ;* modify it under the terms of the GNU Lesser General Public
11 ;* License as published by the Free Software Foundation; either
12 ;* version 2.1 of the License, or (at your option) any later version.
14 ;* FFmpeg is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;* Lesser General Public License for more details.
19 ;* You should have received a copy of the GNU Lesser General Public
20 ;* License along with FFmpeg; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;******************************************************************************
24 %include "libavutil/x86/x86inc.asm"
28 ;-----------------------------------------------------------------------------
31 ; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride,
32 ; int height, int log2_denom, int weightd,
33 ; int weights, int offset);
35 ; void h264_weight_16_sse2(uint8_t *dst, int stride, int height,
36 ; int log2_denom, int weight, int offset);
37 ;-----------------------------------------------------------------------------
74 cglobal h264_weight_16_mmx2, 6, 6, 0
86 %macro WEIGHT_FUNC_MM 3
87 cglobal h264_weight_%1_%3, 6, 6, %2
99 WEIGHT_FUNC_MM 8, 0, mmx2
101 WEIGHT_FUNC_MM 16, 8, sse2
103 %macro WEIGHT_FUNC_HALF_MM 3
104 cglobal h264_weight_%1_%3, 6, 6, %2
124 WEIGHT_FUNC_HALF_MM 4, 0, mmx2
126 WEIGHT_FUNC_HALF_MM 8, 8, sse2
128 %macro BIWEIGHT_SETUP 0
159 %macro BIWEIGHT_STEPA 3
169 %macro BIWEIGHT_STEPB 0
178 cglobal h264_biweight_16_mmx2, 7, 8, 0
182 BIWEIGHT_STEPA 0, 1, 0
183 BIWEIGHT_STEPA 1, 2, 4
186 BIWEIGHT_STEPA 0, 1, 8
187 BIWEIGHT_STEPA 1, 2, 12
196 %macro BIWEIGHT_FUNC_MM 3
197 cglobal h264_biweight_%1_%3, 7, 8, %2
201 BIWEIGHT_STEPA 0, 1, 0
202 BIWEIGHT_STEPA 1, 2, mmsize/2
213 BIWEIGHT_FUNC_MM 8, 0, mmx2
215 BIWEIGHT_FUNC_MM 16, 8, sse2
217 %macro BIWEIGHT_FUNC_HALF_MM 3
218 cglobal h264_biweight_%1_%3, 7, 8, %2
224 BIWEIGHT_STEPA 0, 1, 0
225 BIWEIGHT_STEPA 1, 2, r2
242 BIWEIGHT_FUNC_HALF_MM 4, 0, mmx2
244 BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
246 %macro BIWEIGHT_SSSE3_SETUP 0
276 %macro BIWEIGHT_SSSE3_OP 0
287 cglobal h264_biweight_16_ssse3, 7, 8, 8
306 cglobal h264_biweight_8_ssse3, 7, 8, 8