]> git.sesse.net Git - vlc/blob - plugins/idct/vdec_block_mmx.c
* Fixed an alignment issue in the ifo parser. The Matrix problem should
[vlc] / plugins / idct / vdec_block_mmx.c
1 /*****************************************************************************
2  * vdec_block_mmx.c: Macroblock copy functions in MMX assembly
3  *****************************************************************************
4  * Copyright (C) 1999, 2000, 2001 VideoLAN
5  * $Id: vdec_block_mmx.c,v 1.4 2001/06/20 07:43:48 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /* MODULE_NAME defined in Makefile together with -DBUILTIN */
25 #ifdef BUILTIN
26 #   include "modules_inner.h"
27 #else
28 #   define _M( foo ) foo
29 #endif
30
31 /*****************************************************************************
32  * Preamble
33  *****************************************************************************/
34 #include "defs.h"
35
36 #include <string.h>
37
38 #include "config.h"
39 #include "common.h"
40 #include "threads.h"
41 #include "mtime.h"
42
43 #include "intf_msg.h"
44
45 #include "input_ext-dec.h"
46
47 #include "video.h"
48 #include "video_output.h"
49
50 #include "vdec_motion.h"
51 #include "video_decoder.h"
52
53 #include "vpar_blocks.h"
54
55 #include "vpar_headers.h"
56 #include "vpar_synchro.h"
57 #include "video_parser.h"
58 #include "video_fifo.h"
59
60 #include "vdec_block.h"
61
62 #include "modules.h"
63 #include "modules_export.h"
64
65 /*****************************************************************************
66  * vdec_InitDecode: initialize video decoder thread
67  *****************************************************************************/
68 void _M( vdec_InitDecode ) ( vdec_thread_t *p_vdec )
69 {
70     ;
71 }
72
73 /*****************************************************************************
74  * AddBlock : add a block
75  *****************************************************************************/
76 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
77                                  yuv_data_t * p_data, int i_incr )
78 {
79     asm __volatile__ ( 
80             "pxor       %%mm7,%%mm7\n\t"
81
82             "movq       (%0),%%mm1\n\t"
83             "movq       %%mm1,%%mm2\n\t"
84             "punpckhbw  %%mm7,%%mm1\n\t"
85             "punpcklbw  %%mm7,%%mm2\n\t"
86             "paddw      (%2),%%mm2\n\t"
87             "paddw      8(%2),%%mm1\n\t"
88             "packuswb   %%mm1,%%mm2\n\t"
89             "movq       %%mm2,(%0)\n\t"
90             "addl       %3,%0\n\t"
91
92             "movq       (%0),%%mm1\n\t"
93             "movq       %%mm1,%%mm2\n\t"
94             "punpckhbw  %%mm7,%%mm1\n\t"
95             "punpcklbw  %%mm7,%%mm2\n\t"
96             "paddw      16(%2),%%mm2\n\t"
97             "paddw      24(%2),%%mm1\n\t"
98             "packuswb   %%mm1,%%mm2\n\t"
99             "movq       %%mm2,(%0)\n\t"
100             "addl       %3,%0\n\t"
101
102             "movq       (%0),%%mm1\n\t"
103             "movq       %%mm1,%%mm2\n\t"
104             "punpckhbw  %%mm7,%%mm1\n\t"
105             "punpcklbw  %%mm7,%%mm2\n\t"
106             "paddw      32(%2),%%mm2\n\t"
107             "paddw      40(%2),%%mm1\n\t"
108             "packuswb   %%mm1,%%mm2\n\t"
109             "movq       %%mm2,(%0)\n\t"
110             "addl       %3,%0\n\t"
111
112             "movq       (%0),%%mm1\n\t"
113             "movq       %%mm1,%%mm2\n\t"
114             "punpckhbw  %%mm7,%%mm1\n\t"
115             "punpcklbw  %%mm7,%%mm2\n\t"
116             "paddw      48(%2),%%mm2\n\t"
117             "paddw      56(%2),%%mm1\n\t"
118             "packuswb   %%mm1,%%mm2\n\t"
119             "movq       %%mm2,(%0)\n\t"
120             "addl       %3,%0\n\t"
121
122             "movq       (%0),%%mm1\n\t"
123             "movq       %%mm1,%%mm2\n\t"
124             "punpckhbw  %%mm7,%%mm1\n\t"
125             "punpcklbw  %%mm7,%%mm2\n\t"
126             "paddw      64(%2),%%mm2\n\t"
127             "paddw      72(%2),%%mm1\n\t"
128             "packuswb   %%mm1,%%mm2\n\t"
129             "movq       %%mm2,(%0)\n\t"
130             "addl       %3,%0\n\t"
131
132             "movq       (%0),%%mm1\n\t"
133             "movq       %%mm1,%%mm2\n\t"
134             "punpckhbw  %%mm7,%%mm1\n\t"
135             "punpcklbw  %%mm7,%%mm2\n\t"
136             "paddw      80(%2),%%mm2\n\t"
137             "paddw      88(%2),%%mm1\n\t"
138             "packuswb   %%mm1,%%mm2\n\t"
139             "movq       %%mm2,(%0)\n\t"
140             "addl       %3,%0\n\t"
141
142             "movq       (%0),%%mm1\n\t"
143             "movq       %%mm1,%%mm2\n\t"
144             "punpckhbw  %%mm7,%%mm1\n\t"
145             "punpcklbw  %%mm7,%%mm2\n\t"
146             "paddw      96(%2),%%mm2\n\t"
147             "paddw      104(%2),%%mm1\n\t"
148             "packuswb   %%mm1,%%mm2\n\t"
149             "movq       %%mm2,(%0)\n\t"
150             "addl       %3,%0\n\t"
151
152             "movq       (%0),%%mm1\n\t"
153             "movq       %%mm1,%%mm2\n\t"
154             "punpckhbw  %%mm7,%%mm1\n\t"
155             "punpcklbw  %%mm7,%%mm2\n\t"
156             "paddw      112(%2),%%mm2\n\t"
157             "paddw      120(%2),%%mm1\n\t"
158             "packuswb   %%mm1,%%mm2\n\t"
159             "movq       %%mm2,(%0)\n\t"
160
161             //"emms"
162             : "=r" (p_data)
163             : "0" (p_data), "r" (p_block), "r" (i_incr + 8) );
164 }
165
166 /*****************************************************************************
167  * CopyBlock : copy a block
168  *****************************************************************************/
169 static  __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
170                                    yuv_data_t * p_data, int i_incr )
171 {
172     asm __volatile__ (
173             "movq         (%2),%%mm0\n\t"
174             "packuswb   8(%2),%%mm0\n\t"
175             "movq        %%mm0,(%0)\n\t"
176             "addl           %3,%0\n\t"
177
178             "movq        16(%2),%%mm0\n\t"
179             "packuswb   24(%2),%%mm0\n\t"
180             "movq        %%mm0,(%0)\n\t"
181             "addl           %3,%0\n\t"
182
183             "movq        32(%2),%%mm0\n\t"
184             "packuswb   40(%2),%%mm0\n\t"
185             "movq        %%mm0,(%0)\n\t"
186             "addl           %3,%0\n\t"
187
188             "movq        48(%2),%%mm0\n\t"
189             "packuswb   56(%2),%%mm0\n\t"
190             "movq        %%mm0,(%0)\n\t"
191             "addl           %3,%0\n\t"
192
193             "movq        64(%2),%%mm0\n\t"
194             "packuswb   72(%2),%%mm0\n\t"
195             "movq        %%mm0,(%0)\n\t"
196             "addl           %3,%0\n\t"
197
198             "movq        80(%2),%%mm0\n\t"
199             "packuswb   88(%2),%%mm0\n\t"
200             "movq        %%mm0,(%0)\n\t"
201             "addl           %3,%0\n\t"
202
203             "movq        96(%2),%%mm0\n\t"
204             "packuswb   104(%2),%%mm0\n\t"
205             "movq        %%mm0,(%0)\n\t"
206             "addl           %3,%0\n\t"
207
208             "movq        112(%2),%%mm0\n\t"
209             "packuswb   120(%2),%%mm0\n\t"
210             "movq        %%mm0,(%0)\n\t"
211
212             //"emms"
213             : "=r" (p_data)
214             : "0" (p_data), "r" (p_block), "r" (i_incr + 8) );
215 }
216
217 void _M( vdec_DecodeMacroblockC ) ( vdec_thread_t *p_vdec,
218                                     macroblock_t * p_mb )
219 {
220     if( !(p_mb->i_mb_type & MB_INTRA) )
221     {
222         /*
223          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
224          */
225         if( p_mb->pf_motion == 0 )
226         {
227             intf_WarnMsg( 2, "pf_motion set to NULL" );
228         }
229         else
230         {
231             p_mb->pf_motion( p_mb );
232         }
233
234         DECODEBLOCKSC( AddBlock )
235     }
236     else
237     {
238         DECODEBLOCKSC( CopyBlock )
239     }
240 }
241
242 void _M( vdec_DecodeMacroblockBW ) ( vdec_thread_t *p_vdec,
243                                      macroblock_t * p_mb )
244 {
245     if( !(p_mb->i_mb_type & MB_INTRA) )
246     {
247         /*
248          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
249          */
250         if( p_mb->pf_motion == 0 )
251         {
252             intf_WarnMsg( 2, "pf_motion set to NULL" );
253         }
254         else
255         {
256             p_mb->pf_motion( p_mb );
257         }
258
259         DECODEBLOCKSBW( AddBlock )
260     }
261     else
262     {
263         DECODEBLOCKSBW( CopyBlock )
264     }
265 }
266