1 ;*****************************************************************************
2 ;* mc.asm: h264 encoder library
3 ;*****************************************************************************
4 ;* Copyright (C) 2003 x264 project
5 ;* $Id: mc.asm,v 1.3 2004/06/18 01:59:58 chenm001 Exp $
7 ;* Authors: Min Chen <chenm001.163.com> (converted to nasm)
8 ;* Laurent Aimar <fenrir@via.ecp.fr> (init algorithm)
10 ;* This program is free software; you can redistribute it and/or modify
11 ;* it under the terms of the GNU General Public License as published by
12 ;* the Free Software Foundation; either version 2 of the License, or
13 ;* (at your option) any later version.
15 ;* This program is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;* GNU General Public License for more details.
20 ;* You should have received a copy of the GNU General Public License
21 ;* along with this program; if not, write to the Free Software
22 ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 ;*****************************************************************************
25 ;*****************************************************************************
27 ;* Revision history: *
29 ;* 2004.05.17 portab mc_copy_w4/8/16 (CM) *
31 ;*****************************************************************************
35 ;=============================================================================
36 ; Macros and other preprocessor constants
37 ;=============================================================================
48 ;=============================================================================
49 ; Local Data (Read Only)
50 ;=============================================================================
55 SECTION .rodata data align=16
58 ;-----------------------------------------------------------------------------
59 ; Various memory constants (trigonometric values or rounding values)
60 ;-----------------------------------------------------------------------------
64 ;=============================================================================
66 ;=============================================================================
70 cglobal x264_pixel_avg_w4_mmxext
71 cglobal x264_pixel_avg_w8_mmxext
72 cglobal x264_pixel_avg_w16_mmxext
73 cglobal x264_pixel_avg_w16_sse2
75 cglobal x264_mc_copy_w4_mmxext
76 cglobal x264_mc_copy_w8_mmxext
77 cglobal x264_mc_copy_w16_mmxext
78 cglobal x264_mc_copy_w16_sse2
82 ;-----------------------------------------------------------------------------
83 ; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int i_dst_stride,
84 ; uint8_t *src1, int i_src1_stride,
85 ; uint8_t *src2, int i_src2_stride,
87 ;-----------------------------------------------------------------------------
88 x264_pixel_avg_w4_mmxext:
94 mov edi, [esp+20] ; dst
95 mov ebx, [esp+28] ; src1
96 mov ecx, [esp+36] ; src2
97 mov esi, [esp+24] ; i_dst_stride
98 mov eax, [esp+32] ; i_src1_stride
99 mov edx, [esp+40] ; i_src2_stride
100 mov ebp, [esp+44] ; i_height
125 ;-----------------------------------------------------------------------------
126 ; void x264_pixel_avg_w8_mmxext( uint8_t *dst, int i_dst_stride,
127 ; uint8_t *src1, int i_src1_stride,
128 ; uint8_t *src2, int i_src2_stride,
130 ;-----------------------------------------------------------------------------
131 x264_pixel_avg_w8_mmxext:
137 mov edi, [esp+20] ; dst
138 mov ebx, [esp+28] ; src1
139 mov ecx, [esp+36] ; src2
140 mov esi, [esp+24] ; i_dst_stride
141 mov eax, [esp+32] ; i_src1_stride
142 mov edx, [esp+40] ; i_src2_stride
143 mov ebp, [esp+44] ; i_height
164 ;-----------------------------------------------------------------------------
165 ; void x264_pixel_avg_w16_mmxext( uint8_t *dst, int i_dst_stride,
166 ; uint8_t *src1, int i_src1_stride,
167 ; uint8_t *src2, int i_src2_stride,
169 ;-----------------------------------------------------------------------------
170 x264_pixel_avg_w16_mmxext:
176 mov edi, [esp+20] ; dst
177 mov ebx, [esp+28] ; src1
178 mov ecx, [esp+36] ; src2
179 mov esi, [esp+24] ; i_dst_stride
180 mov eax, [esp+32] ; i_src1_stride
181 mov edx, [esp+40] ; i_src2_stride
182 mov ebp, [esp+44] ; i_height
204 ;-----------------------------------------------------------------------------
205 ; void x264_pixel_avg_w16_sse2( uint8_t *dst, int i_dst_stride,
206 ; uint8_t *src1, int i_src1_stride,
207 ; uint8_t *src2, int i_src2_stride,
209 ;-----------------------------------------------------------------------------
210 x264_pixel_avg_w16_sse2:
216 mov edi, [esp+20] ; dst
217 mov ebx, [esp+28] ; src1
218 mov ecx, [esp+36] ; src2
219 mov esi, [esp+24] ; i_dst_stride
220 mov eax, [esp+32] ; i_src1_stride
221 mov edx, [esp+40] ; i_src2_stride
222 mov ebp, [esp+44] ; i_height
244 ;-----------------------------------------------------------------------------
245 ; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride,
246 ; uint8_t *dst, int i_dst_stride, int i_height )
247 ;-----------------------------------------------------------------------------
248 x264_mc_copy_w4_mmxext:
253 mov esi, [esp+16] ; src
254 mov edi, [esp+24] ; dst
255 mov ebx, [esp+20] ; i_src_stride
256 mov edx, [esp+28] ; i_dst_stride
257 mov ecx, [esp+32] ; i_height
278 ;-----------------------------------------------------------------------------
279 ; void x264_mc_copy_w8_mmxext( uint8_t *src, int i_src_stride,
280 ; uint8_t *dst, int i_dst_stride, int i_height )
281 ;-----------------------------------------------------------------------------
282 x264_mc_copy_w8_mmxext:
287 mov esi, [esp+16] ; src
288 mov edi, [esp+24] ; dst
289 mov ebx, [esp+20] ; i_src_stride
290 mov edx, [esp+28] ; i_dst_stride
291 mov ecx, [esp+32] ; i_height
298 movq mm2, [esi+ebx*2]
299 movq [edi+edx*2], mm2
318 ;-----------------------------------------------------------------------------
319 ; void x264_mc_copy_w16_mmxext( uint8_t *src, int i_src_stride,
320 ; uint8_t *dst, int i_dst_stride, int i_height )
321 ;-----------------------------------------------------------------------------
322 x264_mc_copy_w16_mmxext:
327 mov esi, [esp+16] ; src
328 mov edi, [esp+24] ; dst
329 mov ebx, [esp+20] ; i_src_stride
330 mov edx, [esp+28] ; i_dst_stride
331 mov ecx, [esp+32] ; i_height
340 movq mm3, [esi+ebx+8]
342 movq [edi+edx+8], mm3
343 movq mm4, [esi+ebx*2]
344 movq mm5, [esi+ebx*2+8]
345 movq [edi+edx*2], mm4
346 movq [edi+edx*2+8], mm5
350 movq mm7, [esi+ebx+8]
352 movq [edi+edx+8], mm7
365 ;-----------------------------------------------------------------------------
366 ; void x264_mc_copy_w16_sse2( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
367 ;-----------------------------------------------------------------------------
368 x264_mc_copy_w16_sse2:
373 mov esi, [esp+16] ; src
374 mov edi, [esp+24] ; dst
375 mov ebx, [esp+20] ; i_src_stride
376 mov edx, [esp+28] ; i_dst_stride
377 mov ecx, [esp+32] ; i_height
382 movdqu xmm1, [esi+ebx]
384 movdqu [edi+edx], xmm1