1 ;*****************************************************************************
2 ;* MMX/SSE2/AVX-optimized 10-bit H.264 weighted prediction code
3 ;*****************************************************************************
4 ;* Copyright (C) 2005-2011 x264 project
6 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
8 ;* This file is part of Libav.
10 ;* Libav is free software; you can redistribute it and/or
11 ;* modify it under the terms of the GNU Lesser General Public
12 ;* License as published by the Free Software Foundation; either
13 ;* version 2.1 of the License, or (at your option) any later version.
15 ;* Libav is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;* Lesser General Public License for more details.
20 ;* You should have received a copy of the GNU Lesser General Public
21 ;* License along with Libav; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 ;******************************************************************************
26 %include "x86util.asm"
30 pw_pixel_max: times 8 dw ((1 << 10)-1)
38 ;-----------------------------------------------------------------------------
39 ; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
40 ; int weight, int offset);
41 ;-----------------------------------------------------------------------------
42 %macro WEIGHT_PROLOGUE 0
55 pslld m0, m2 ; 1<<log2_denom
57 shl r5, 19 ; *8, move to upper half of dword
58 lea r5, [r5+r4*2+0x10000]
59 movd m3, r5d ; weight<<1 | 1+(offset<<(3))
61 mova m4, [pw_pixel_max]
62 paddw m2, [sq_1] ; log2_denom+1
92 %macro WEIGHT_FUNC_DBL 1
93 cglobal h264_weight_16_10_%1
112 %macro WEIGHT_FUNC_MM 1
113 cglobal h264_weight_8_10_%1
130 %macro WEIGHT_FUNC_HALF_MM 1
131 cglobal h264_weight_4_10_%1
147 WEIGHT_FUNC_HALF_MM sse2
148 WEIGHT_FUNC_HALF_MM sse4
151 ;-----------------------------------------------------------------------------
152 ; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
153 ; int log2_denom, int weightd, int weights, int offset);
154 ;-----------------------------------------------------------------------------
161 %macro BIWEIGHT_PROLOGUE 0
172 %macro BIWEIGHT_SETUP 1
173 lea t0, [t0*4+1] ; (offset<<2)+1
177 movd m4, r5d ; weightd | weights
178 movd m5, t0d ; (offset+1)|1
179 movd m6, r4m ; log2_denom
180 pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
184 mova m3, [pw_pixel_max]
220 %macro BIWEIGHT_FUNC_DBL 1
221 cglobal h264_biweight_16_10_%1
237 BIWEIGHT_FUNC_DBL sse2
238 BIWEIGHT_FUNC_DBL sse4
240 %macro BIWEIGHT_FUNC 1
241 cglobal h264_biweight_8_10_%1
258 %macro BIWEIGHT_FUNC_HALF 1
259 cglobal h264_biweight_4_10_%1
276 BIWEIGHT_FUNC_HALF sse2
277 BIWEIGHT_FUNC_HALF sse4