1 ;*****************************************************************************
2 ;* MMX/SSE2/AVX-optimized 10-bit H.264 weighted prediction code
3 ;*****************************************************************************
4 ;* Copyright (C) 2005-2011 x264 project
6 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
8 ;* This file is part of FFmpeg.
10 ;* FFmpeg is free software; you can redistribute it and/or
11 ;* modify it under the terms of the GNU Lesser General Public
12 ;* License as published by the Free Software Foundation; either
13 ;* version 2.1 of the License, or (at your option) any later version.
15 ;* FFmpeg is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;* Lesser General Public License for more details.
20 ;* You should have received a copy of the GNU Lesser General Public
21 ;* License along with FFmpeg; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 ;******************************************************************************
25 %include "libavutil/x86/x86util.asm"
34 %define pw_pixel_max pw_1023
38 ;-----------------------------------------------------------------------------
39 ; void ff_h264_weight_16_10(uint8_t *dst, int stride, int height,
40 ; int log2_denom, int weight, int offset);
41 ;-----------------------------------------------------------------------------
42 %macro WEIGHT_PROLOGUE 0
55 pslld m0, m2 ; 1<<log2_denom
57 shl r5, 19 ; *8, move to upper half of dword
58 lea r5, [r5+r4*2+0x10000]
59 movd m3, r5d ; weight<<1 | 1+(offset<<(3))
61 mova m4, [pw_pixel_max]
62 paddw m2, [sq_1] ; log2_denom+1
92 %macro WEIGHT_FUNC_DBL 0
93 cglobal h264_weight_16_10
113 %macro WEIGHT_FUNC_MM 0
114 cglobal h264_weight_8_10
132 %macro WEIGHT_FUNC_HALF_MM 0
133 cglobal h264_weight_4_10
154 ;-----------------------------------------------------------------------------
155 ; void ff_h264_biweight_16_10(uint8_t *dst, uint8_t *src, int stride,
156 ; int height, int log2_denom, int weightd,
157 ; int weights, int offset);
158 ;-----------------------------------------------------------------------------
165 %macro BIWEIGHT_PROLOGUE 0
176 %macro BIWEIGHT_SETUP 0
177 lea t0, [t0*4+1] ; (offset<<2)+1
181 movd m4, r5d ; weightd | weights
182 movd m5, t0d ; (offset+1)|1
183 movd m6, r4m ; log2_denom
184 pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
188 mova m3, [pw_pixel_max]
224 %macro BIWEIGHT_FUNC_DBL 0
225 cglobal h264_biweight_16_10
245 %macro BIWEIGHT_FUNC 0
246 cglobal h264_biweight_8_10
264 %macro BIWEIGHT_FUNC_HALF 0
265 cglobal h264_biweight_4_10