1 ;*****************************************************************************
2 ;* MMX/SSE2/AVX-optimized 10-bit H.264 intra prediction code
3 ;*****************************************************************************
4 ;* Copyright (C) 2005-2011 x264 project
6 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
8 ;* This file is part of Libav.
10 ;* Libav is free software; you can redistribute it and/or
11 ;* modify it under the terms of the GNU Lesser General Public
12 ;* License as published by the Free Software Foundation; either
13 ;* version 2.1 of the License, or (at your option) any later version.
15 ;* Libav is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;* Lesser General Public License for more details.
20 ;* You should have received a copy of the GNU Lesser General Public
21 ;* License along with Libav; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 ;******************************************************************************
26 %include "x86util.asm"
35 %macro PRED4x4_LOWPASS 4
41 ;-----------------------------------------------------------------------------
42 ; void pred4x4_down_right(pixel *src, const pixel *topright, int stride)
43 ;-----------------------------------------------------------------------------
45 cglobal pred4x4_down_right_10_%1, 3,3
49 movhps m2, [r0+r2*1-8]
54 PALIGNR m3, m1, 10, m1
56 movhps m4, [r1+r2*1-8]
57 PALIGNR m3, m4, 14, m4
59 movhps m4, [r1+r2*2-8]
60 PALIGNR m3, m4, 14, m4
61 PRED4x4_LOWPASS m0, m3, m1, m2
73 %define PALIGNR PALIGNR_MMX
75 %define PALIGNR PALIGNR_SSSE3
82 ;-----------------------------------------------------------------------------
83 ; void pred4x4_vertical_right(pixel *src, const pixel *topright, int stride)
84 ;-----------------------------------------------------------------------------
86 cglobal pred4x4_vertical_right_10_%1, 3,3,6
89 movq m5, [r0] ; ........t3t2t1t0
91 PALIGNR m0, m5, m1, 14, m1 ; ......t3t2t1t0lt
93 movhps m1, [r0+r2*1-8]
94 PALIGNR m0, m1, 14, m1 ; ....t3t2t1t0ltl0
96 movhps m2, [r0+r2*2-8]
97 PALIGNR m0, m2, 14, m2 ; ..t3t2t1t0ltl0l1
99 movhps m3, [r1+r2*1-8]
100 PALIGNR m0, m3, 14, m3 ; t3t2t1t0ltl0l1l2
101 PRED4x4_LOWPASS m3, m1, m0, m2
106 PALIGNR m5, m1, 14, m2
109 PALIGNR m3, m1, 14, m1
115 %define PALIGNR PALIGNR_MMX
117 %define PALIGNR PALIGNR_SSSE3
124 ;-----------------------------------------------------------------------------
125 ; void pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride)
126 ;-----------------------------------------------------------------------------
128 cglobal pred4x4_horizontal_down_10_%1, 3,3
131 movq m0, [r0-8] ; lt ..
133 pslldq m0, 2 ; t2 t1 t0 lt .. .. .. ..
134 movq m1, [r1+r2*2-8] ; l3
136 punpcklwd m1, m3 ; l2 l3
137 movq m2, [r0+r2*2-8] ; l1
139 punpcklwd m2, m3 ; l0 l1
140 punpckhdq m1, m2 ; l0 l1 l2 l3
141 punpckhqdq m1, m0 ; t2 t1 t0 lt l0 l1 l2 l3
142 psrldq m0, m1, 4 ; .. .. t2 t1 t0 lt l0 l1
143 psrldq m2, m1, 2 ; .. t2 t1 t0 lt l0 l1 l2
145 PRED4x4_LOWPASS m3, m1, m0, m2
148 PALIGNR m3, m5, 12, m4
158 %define PALIGNR PALIGNR_MMX
160 %define PALIGNR PALIGNR_SSSE3
167 ;-----------------------------------------------------------------------------
168 ; void pred4x4_dc(pixel *src, const pixel *topright, int stride)
169 ;-----------------------------------------------------------------------------
170 %macro HADDD 2 ; sum junk
188 cglobal pred4x4_dc_10_mmxext, 3,3
192 paddw m2, [r0+r2*2-8]
193 paddw m2, [r1+r2*1-8]
194 paddw m2, [r1+r2*2-8]
208 ;-----------------------------------------------------------------------------
209 ; void pred4x4_down_left(pixel *src, const pixel *topright, int stride)
210 ;-----------------------------------------------------------------------------
213 cglobal pred4x4_down_left_10_%1, 3,3
221 PRED4x4_LOWPASS m0, m5, m3, m1
240 ;-----------------------------------------------------------------------------
241 ; void pred4x4_vertical_left(pixel *src, const pixel *topright, int stride)
242 ;-----------------------------------------------------------------------------
244 cglobal pred4x4_vertical_left_10_%1, 3,3
251 PRED4x4_LOWPASS m0, m1, m2, m3
269 ;-----------------------------------------------------------------------------
270 ; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
271 ;-----------------------------------------------------------------------------
273 cglobal pred4x4_horizontal_up_10_mmxext, 3,3
277 punpckhwd m0, [r0+r2*2-8]
279 punpckhwd m1, [r1+r2*2-8]
284 pshufw m2, m0, 11111001b
288 pshufw m5, m0, 11111110b
289 PRED4x4_LOWPASS m3, m0, m5, m1
303 ;-----------------------------------------------------------------------------
304 ; void pred8x8_vertical(pixel *src, int stride)
305 ;-----------------------------------------------------------------------------
307 cglobal pred8x8_vertical_10_sse2, 2,2
319 ;-----------------------------------------------------------------------------
320 ; void pred8x8_horizontal(pixel *src, int stride)
321 ;-----------------------------------------------------------------------------
323 cglobal pred8x8_horizontal_10_sse2, 2,3