1 /*****************************************************************************
2 * block_mmx.h: Macroblock copy functions in MMX assembly
3 *****************************************************************************
4 * Copyright (C) 1999, 2000, 2001 VideoLAN
5 * $Id: block_mmx.h,v 1.1 2001/09/05 16:07:49 massiot Exp $
7 * Authors: Michel Lespinasse <walken@zoy.org>
8 * Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
26 * InitBlock: placeholder because we don't need a crop table, MMX does it for us
27 *****************************************************************************/
28 static __inline__ void InitBlock( )
33 /*****************************************************************************
34 * AddBlock: add a block
35 *****************************************************************************/
36 #define ADD_MMX(offset,r1,r2,r3,r4) \
37 movq_m2r (*(p_data+2*i_incr), r1); \
38 packuswb_r2r (r4, r3); \
41 movq_r2m (r3, *p_data); \
42 punpcklbw_r2r (mm0, r1); \
43 paddsw_m2r (*(p_block+offset), r1); \
44 punpckhbw_r2r (mm0, r2); \
45 paddsw_m2r (*(p_block+offset+4), r2);
47 static __inline__ void AddBlock( dctelem_t * p_block, yuv_data_t * p_data,
50 movq_m2r (*p_data, mm1);
52 movq_m2r (*(p_data + i_incr), mm3);
54 punpcklbw_r2r (mm0, mm1);
56 paddsw_m2r (*(p_block+0*8), mm1);
57 punpckhbw_r2r (mm0, mm2);
58 paddsw_m2r (*(p_block+0*8+4), mm2);
59 punpcklbw_r2r (mm0, mm3);
60 paddsw_m2r (*(p_block+1*8), mm3);
61 packuswb_r2r (mm2, mm1);
62 punpckhbw_r2r (mm0, mm4);
63 movq_r2m (mm1, *p_data);
64 paddsw_m2r (*(p_block+1*8+4), mm4);
65 ADD_MMX (2*8, mm1, mm2, mm3, mm4);
66 ADD_MMX (3*8, mm3, mm4, mm1, mm2);
67 ADD_MMX (4*8, mm1, mm2, mm3, mm4);
68 ADD_MMX (5*8, mm3, mm4, mm1, mm2);
69 ADD_MMX (6*8, mm1, mm2, mm3, mm4);
70 ADD_MMX (7*8, mm3, mm4, mm1, mm2);
71 packuswb_r2r (mm4, mm3);
72 movq_r2m (mm3, *(p_data + i_incr));
75 /*****************************************************************************
76 * CopyBlock: copy a block
77 *****************************************************************************/
78 #define COPY_MMX(offset,r0,r1,r2) \
79 movq_m2r (*(p_block+offset), r0); \
81 movq_m2r (*(p_block+offset+4), r1); \
82 movq_r2m (r2, *p_data); \
83 packuswb_r2r (r1, r0);
85 static __inline__ void CopyBlock( dctelem_t * p_block, yuv_data_t * p_data,
88 movq_m2r (*(p_block+0*8), mm0);
89 movq_m2r (*(p_block+0*8+4), mm1);
90 movq_m2r (*(p_block+1*8), mm2);
91 packuswb_r2r (mm1, mm0);
92 movq_m2r (*(p_block+1*8+4), mm3);
93 movq_r2m (mm0, *p_data);
94 packuswb_r2r (mm3, mm2);
95 COPY_MMX (2*8, mm0, mm1, mm2);
96 COPY_MMX (3*8, mm2, mm3, mm0);
97 COPY_MMX (4*8, mm0, mm1, mm2);
98 COPY_MMX (5*8, mm2, mm3, mm0);
99 COPY_MMX (6*8, mm0, mm1, mm2);
100 COPY_MMX (7*8, mm2, mm3, mm0);
101 movq_r2m (mm2, *(p_data + i_incr));