1 /*****************************************************************************
2 * vdec_block_mmx.c: Macroblock copy functions in MMX assembly
3 *****************************************************************************
4 * Copyright (C) 1999, 2000, 2001 VideoLAN
5 * $Id: vdec_block_mmx.c,v 1.6 2001/08/22 17:21:45 massiot Exp $
7 * Authors: Michel Lespinasse <walken@zoy.org>
8 * Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /* MODULE_NAME defined in Makefile together with -DBUILTIN */
27 # include "modules_inner.h"
29 # define _M( foo ) foo
32 /*****************************************************************************
34 *****************************************************************************/
46 #include "input_ext-dec.h"
49 #include "video_output.h"
51 #include "vdec_idct.h"
54 #include "modules_export.h"
58 /*****************************************************************************
59 * vdec_InitDecode: initialize video decoder thread
60 *****************************************************************************/
61 void _M( vdec_InitDecode ) ( )
66 /*****************************************************************************
67 * vdec_AddBlock : add a block
68 *****************************************************************************/
69 #define ADD_MMX(offset,r1,r2,r3,r4) \
70 movq_m2r (*(p_data+2*i_incr), r1); \
71 packuswb_r2r (r4, r3); \
74 movq_r2m (r3, *p_data); \
75 punpcklbw_r2r (mm0, r1); \
76 paddsw_m2r (*(p_block+offset), r1); \
77 punpckhbw_r2r (mm0, r2); \
78 paddsw_m2r (*(p_block+offset+4), r2);
80 void _M( vdec_AddBlock ) ( dctelem_t * p_block, yuv_data_t * p_data,
83 movq_m2r (*p_data, mm1);
85 movq_m2r (*(p_data + i_incr), mm3);
87 punpcklbw_r2r (mm0, mm1);
89 paddsw_m2r (*(p_block+0*8), mm1);
90 punpckhbw_r2r (mm0, mm2);
91 paddsw_m2r (*(p_block+0*8+4), mm2);
92 punpcklbw_r2r (mm0, mm3);
93 paddsw_m2r (*(p_block+1*8), mm3);
94 packuswb_r2r (mm2, mm1);
95 punpckhbw_r2r (mm0, mm4);
96 movq_r2m (mm1, *p_data);
97 paddsw_m2r (*(p_block+1*8+4), mm4);
98 ADD_MMX (2*8, mm1, mm2, mm3, mm4);
99 ADD_MMX (3*8, mm3, mm4, mm1, mm2);
100 ADD_MMX (4*8, mm1, mm2, mm3, mm4);
101 ADD_MMX (5*8, mm3, mm4, mm1, mm2);
102 ADD_MMX (6*8, mm1, mm2, mm3, mm4);
103 ADD_MMX (7*8, mm3, mm4, mm1, mm2);
104 packuswb_r2r (mm4, mm3);
105 movq_r2m (mm3, *(p_data + i_incr));
108 /*****************************************************************************
109 * vdec_CopyBlock : copy a block
110 *****************************************************************************/
111 #define COPY_MMX(offset,r0,r1,r2) \
112 movq_m2r (*(p_block+offset), r0); \
114 movq_m2r (*(p_block+offset+4), r1); \
115 movq_r2m (r2, *p_data); \
116 packuswb_r2r (r1, r0);
118 void _M( vdec_CopyBlock ) ( dctelem_t * p_block, yuv_data_t * p_data,
121 movq_m2r (*(p_block+0*8), mm0);
122 movq_m2r (*(p_block+0*8+4), mm1);
123 movq_m2r (*(p_block+1*8), mm2);
124 packuswb_r2r (mm1, mm0);
125 movq_m2r (*(p_block+1*8+4), mm3);
126 movq_r2m (mm0, *p_data);
127 packuswb_r2r (mm3, mm2);
128 COPY_MMX (2*8, mm0, mm1, mm2);
129 COPY_MMX (3*8, mm2, mm3, mm0);
130 COPY_MMX (4*8, mm0, mm1, mm2);
131 COPY_MMX (5*8, mm2, mm3, mm0);
132 COPY_MMX (6*8, mm0, mm1, mm2);
133 COPY_MMX (7*8, mm2, mm3, mm0);
134 movq_r2m (mm2, *(p_data + i_incr));