1 ;******************************************************************************
2 ;* H.264 intra prediction asm optimizations
3 ;* Copyright (c) 2010 Jason Garrett-Glaser
5 ;* This file is part of FFmpeg.
7 ;* FFmpeg is free software; you can redistribute it and/or
8 ;* modify it under the terms of the GNU Lesser General Public
9 ;* License as published by the Free Software Foundation; either
10 ;* version 2.1 of the License, or (at your option) any later version.
12 ;* FFmpeg is distributed in the hope that it will be useful,
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;* Lesser General Public License for more details.
17 ;* You should have received a copy of the GNU Lesser General Public
18 ;* License along with FFmpeg; if not, write to the Free Software
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 ;******************************************************************************
26 tm_shuf: times 8 db 0x03, 0x80
32 ;-----------------------------------------------------------------------------
33 ; void pred16x16_vertical(uint8_t *src, int stride)
34 ;-----------------------------------------------------------------------------
36 cglobal pred16x16_vertical_mmx, 2,3
51 cglobal pred16x16_vertical_sse, 2,3
56 movaps [r0+r1*1], xmm0
57 movaps [r0+r1*2], xmm0
59 movaps [r0+r1*1], xmm0
60 movaps [r0+r1*2], xmm0
66 ;-----------------------------------------------------------------------------
67 ; void pred16x16_horizontal(uint8_t *src, int stride)
68 ;-----------------------------------------------------------------------------
71 cglobal pred16x16_horizontal_%1, 2,3
113 ;-----------------------------------------------------------------------------
114 ; void pred16x16_dc(uint8_t *src, int stride)
115 ;-----------------------------------------------------------------------------
117 %macro PRED16x16_DC 2
118 cglobal pred16x16_dc_%1, 2,7
126 movzx r5d, byte [r0+r1*1]
131 movzx r2d, byte [r0+r1*0]
132 movzx r3d, byte [r0+r1*1]
137 movzx r2d, byte [r0+r1*0]
183 PRED16x16_DC mmxext, movq
185 PRED16x16_DC sse, movaps
186 PRED16x16_DC sse2, movdqa
187 PRED16x16_DC ssse3, movdqa
189 ;-----------------------------------------------------------------------------
190 ; void pred16x16_tm_vp8(uint8_t *src, int stride)
191 ;-----------------------------------------------------------------------------
193 %macro PRED16x16_TM_MMX 1
194 cglobal pred16x16_tm_vp8_%1, 2,5
205 movzx r3d, byte [r0-1]
208 movzx r2d, byte [r0+r1-1]
235 PRED16x16_TM_MMX mmxext
237 cglobal pred16x16_tm_vp8_sse2, 2,6,6
244 movzx r4d, byte [r0-1]
247 movzx r2d, byte [r0+r1*1-1]
248 movzx r3d, byte [r0+r1*2-1]
253 pshuflw xmm2, xmm2, 0
254 pshuflw xmm4, xmm4, 0
255 punpcklqdq xmm2, xmm2
256 punpcklqdq xmm4, xmm4
265 movdqa [r0+r1*1], xmm2
266 movdqa [r0+r1*2], xmm4
272 ;-----------------------------------------------------------------------------
273 ; void pred8x8_vertical(uint8_t *src, int stride)
274 ;-----------------------------------------------------------------------------
276 cglobal pred8x8_vertical_mmx, 2,2
288 ;-----------------------------------------------------------------------------
289 ; void pred8x8_horizontal(uint8_t *src, int stride)
290 ;-----------------------------------------------------------------------------
293 cglobal pred8x8_horizontal_%1, 2,3
330 ;-----------------------------------------------------------------------------
331 ; void pred8x8_dc_rv40(uint8_t *src, int stride)
332 ;-----------------------------------------------------------------------------
334 cglobal pred8x8_dc_rv40_mmxext, 2,7
340 movzx r5d, byte [r0+r1*1]
344 movzx r2d, byte [r0+r1*0]
345 movzx r3d, byte [r0+r1*1]
350 movzx r2d, byte [r0+r1*0]
366 ;-----------------------------------------------------------------------------
367 ; void pred8x8_tm_vp8(uint8_t *src, int stride)
368 ;-----------------------------------------------------------------------------
370 %macro PRED8x8_TM_MMX 1
371 cglobal pred8x8_tm_vp8_%1, 2,6
378 movzx r4d, byte [r0-1]
381 movzx r2d, byte [r0+r1*1-1]
382 movzx r3d, byte [r0+r1*2-1]
413 PRED8x8_TM_MMX mmxext
415 cglobal pred8x8_tm_vp8_sse2, 2,6,4
420 movzx r4d, byte [r0-1]
423 movzx r2d, byte [r0+r1*1-1]
424 movzx r3d, byte [r0+r1*2-1]
429 pshuflw xmm2, xmm2, 0
430 pshuflw xmm3, xmm3, 0
431 punpcklqdq xmm2, xmm2
432 punpcklqdq xmm3, xmm3
437 movhps [r0+r1*2], xmm2
443 cglobal pred8x8_tm_vp8_ssse3, 2,3,6
445 movdqa xmm4, [tm_shuf]
453 movd xmm2, [r0+r1*1-4]
454 movd xmm3, [r0+r1*2-4]
463 movhps [r0+r1*2], xmm2
469 ;-----------------------------------------------------------------------------
470 ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
471 ;-----------------------------------------------------------------------------
473 cglobal pred4x4_dc_mmxext, 3,5
479 movzx r1d, byte [r0+r2*1-1]
482 movzx r1d, byte [r0+r2*2-1]
485 movzx r1d, byte [r0+r2*1-1]
487 movzx r1d, byte [r0+r2*2-1]