1 ;*****************************************************************************
2 ;* MMX/SSE2/AVX-optimized 10-bit H.264 weighted prediction code
3 ;*****************************************************************************
4 ;* Copyright (C) 2005-2011 x264 project
6 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
8 ;* This file is part of FFmpeg.
10 ;* FFmpeg is free software; you can redistribute it and/or
11 ;* modify it under the terms of the GNU Lesser General Public
12 ;* License as published by the Free Software Foundation; either
13 ;* version 2.1 of the License, or (at your option) any later version.
15 ;* FFmpeg is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;* Lesser General Public License for more details.
20 ;* You should have received a copy of the GNU Lesser General Public
21 ;* License along with FFmpeg; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 ;******************************************************************************
25 %include "libavutil/x86/x86util.asm"
29 pw_pixel_max: times 8 dw ((1 << 10)-1)
37 ;-----------------------------------------------------------------------------
38 ; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
39 ; int weight, int offset);
40 ;-----------------------------------------------------------------------------
41 %macro WEIGHT_PROLOGUE 0
54 pslld m0, m2 ; 1<<log2_denom
56 shl r5, 19 ; *8, move to upper half of dword
57 lea r5, [r5+r4*2+0x10000]
58 movd m3, r5d ; weight<<1 | 1+(offset<<(3))
60 mova m4, [pw_pixel_max]
61 paddw m2, [sq_1] ; log2_denom+1
91 %macro WEIGHT_FUNC_DBL 0
92 cglobal h264_weight_16_10
112 %macro WEIGHT_FUNC_MM 0
113 cglobal h264_weight_8_10
131 %macro WEIGHT_FUNC_HALF_MM 0
132 cglobal h264_weight_4_10
153 ;-----------------------------------------------------------------------------
154 ; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
155 ; int log2_denom, int weightd, int weights, int offset);
156 ;-----------------------------------------------------------------------------
163 %macro BIWEIGHT_PROLOGUE 0
174 %macro BIWEIGHT_SETUP 0
175 lea t0, [t0*4+1] ; (offset<<2)+1
179 movd m4, r5d ; weightd | weights
180 movd m5, t0d ; (offset+1)|1
181 movd m6, r4m ; log2_denom
182 pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
186 mova m3, [pw_pixel_max]
222 %macro BIWEIGHT_FUNC_DBL 0
223 cglobal h264_biweight_16_10
243 %macro BIWEIGHT_FUNC 0
244 cglobal h264_biweight_8_10
262 %macro BIWEIGHT_FUNC_HALF 0
263 cglobal h264_biweight_4_10