]> git.sesse.net Git - vlc/blob - plugins/idct/vdec_block_mmx.c
d6e36cc5bc3f92e9dbc9f632dbfc4b3b7a450917
[vlc] / plugins / idct / vdec_block_mmx.c
1 /*****************************************************************************
2  * vdec_block_mmx.c: Macroblock copy functions in MMX assembly
3  *****************************************************************************
4  * Copyright (C) 1999, 2000, 2001 VideoLAN
5  * $Id: vdec_block_mmx.c,v 1.5 2001/07/17 09:48:07 massiot Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /* MODULE_NAME defined in Makefile together with -DBUILTIN */
25 #ifdef BUILTIN
26 #   include "modules_inner.h"
27 #else
28 #   define _M( foo ) foo
29 #endif
30
31 /*****************************************************************************
32  * Preamble
33  *****************************************************************************/
34 #include "defs.h"
35
36 #include <string.h>
37
38 #include "config.h"
39 #include "common.h"
40 #include "threads.h"
41 #include "mtime.h"
42
43 #include "intf_msg.h"
44
45 #include "input_ext-dec.h"
46
47 #include "video.h"
48 #include "video_output.h"
49
50 #include "vdec_ext-plugins.h"
51
52 #include "vdec_block.h"
53
54 #include "modules.h"
55 #include "modules_export.h"
56
57 /*****************************************************************************
58  * vdec_InitDecode: initialize video decoder thread
59  *****************************************************************************/
60 void _M( vdec_InitDecode ) ( vdec_thread_t *p_vdec )
61 {
62     ;
63 }
64
65 /*****************************************************************************
66  * AddBlock : add a block
67  *****************************************************************************/
68 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
69                                  yuv_data_t * p_data, int i_incr )
70 {
71     asm __volatile__ ( 
72             "pxor       %%mm7,%%mm7\n\t"
73
74             "movq       (%0),%%mm1\n\t"
75             "movq       %%mm1,%%mm2\n\t"
76             "punpckhbw  %%mm7,%%mm1\n\t"
77             "punpcklbw  %%mm7,%%mm2\n\t"
78             "paddw      (%2),%%mm2\n\t"
79             "paddw      8(%2),%%mm1\n\t"
80             "packuswb   %%mm1,%%mm2\n\t"
81             "movq       %%mm2,(%0)\n\t"
82             "addl       %3,%0\n\t"
83
84             "movq       (%0),%%mm1\n\t"
85             "movq       %%mm1,%%mm2\n\t"
86             "punpckhbw  %%mm7,%%mm1\n\t"
87             "punpcklbw  %%mm7,%%mm2\n\t"
88             "paddw      16(%2),%%mm2\n\t"
89             "paddw      24(%2),%%mm1\n\t"
90             "packuswb   %%mm1,%%mm2\n\t"
91             "movq       %%mm2,(%0)\n\t"
92             "addl       %3,%0\n\t"
93
94             "movq       (%0),%%mm1\n\t"
95             "movq       %%mm1,%%mm2\n\t"
96             "punpckhbw  %%mm7,%%mm1\n\t"
97             "punpcklbw  %%mm7,%%mm2\n\t"
98             "paddw      32(%2),%%mm2\n\t"
99             "paddw      40(%2),%%mm1\n\t"
100             "packuswb   %%mm1,%%mm2\n\t"
101             "movq       %%mm2,(%0)\n\t"
102             "addl       %3,%0\n\t"
103
104             "movq       (%0),%%mm1\n\t"
105             "movq       %%mm1,%%mm2\n\t"
106             "punpckhbw  %%mm7,%%mm1\n\t"
107             "punpcklbw  %%mm7,%%mm2\n\t"
108             "paddw      48(%2),%%mm2\n\t"
109             "paddw      56(%2),%%mm1\n\t"
110             "packuswb   %%mm1,%%mm2\n\t"
111             "movq       %%mm2,(%0)\n\t"
112             "addl       %3,%0\n\t"
113
114             "movq       (%0),%%mm1\n\t"
115             "movq       %%mm1,%%mm2\n\t"
116             "punpckhbw  %%mm7,%%mm1\n\t"
117             "punpcklbw  %%mm7,%%mm2\n\t"
118             "paddw      64(%2),%%mm2\n\t"
119             "paddw      72(%2),%%mm1\n\t"
120             "packuswb   %%mm1,%%mm2\n\t"
121             "movq       %%mm2,(%0)\n\t"
122             "addl       %3,%0\n\t"
123
124             "movq       (%0),%%mm1\n\t"
125             "movq       %%mm1,%%mm2\n\t"
126             "punpckhbw  %%mm7,%%mm1\n\t"
127             "punpcklbw  %%mm7,%%mm2\n\t"
128             "paddw      80(%2),%%mm2\n\t"
129             "paddw      88(%2),%%mm1\n\t"
130             "packuswb   %%mm1,%%mm2\n\t"
131             "movq       %%mm2,(%0)\n\t"
132             "addl       %3,%0\n\t"
133
134             "movq       (%0),%%mm1\n\t"
135             "movq       %%mm1,%%mm2\n\t"
136             "punpckhbw  %%mm7,%%mm1\n\t"
137             "punpcklbw  %%mm7,%%mm2\n\t"
138             "paddw      96(%2),%%mm2\n\t"
139             "paddw      104(%2),%%mm1\n\t"
140             "packuswb   %%mm1,%%mm2\n\t"
141             "movq       %%mm2,(%0)\n\t"
142             "addl       %3,%0\n\t"
143
144             "movq       (%0),%%mm1\n\t"
145             "movq       %%mm1,%%mm2\n\t"
146             "punpckhbw  %%mm7,%%mm1\n\t"
147             "punpcklbw  %%mm7,%%mm2\n\t"
148             "paddw      112(%2),%%mm2\n\t"
149             "paddw      120(%2),%%mm1\n\t"
150             "packuswb   %%mm1,%%mm2\n\t"
151             "movq       %%mm2,(%0)\n\t"
152
153             //"emms"
154             : "=r" (p_data)
155             : "0" (p_data), "r" (p_block), "r" (i_incr + 8) );
156 }
157
158 /*****************************************************************************
159  * CopyBlock : copy a block
160  *****************************************************************************/
161 static  __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
162                                    yuv_data_t * p_data, int i_incr )
163 {
164     asm __volatile__ (
165             "movq         (%2),%%mm0\n\t"
166             "packuswb   8(%2),%%mm0\n\t"
167             "movq        %%mm0,(%0)\n\t"
168             "addl           %3,%0\n\t"
169
170             "movq        16(%2),%%mm0\n\t"
171             "packuswb   24(%2),%%mm0\n\t"
172             "movq        %%mm0,(%0)\n\t"
173             "addl           %3,%0\n\t"
174
175             "movq        32(%2),%%mm0\n\t"
176             "packuswb   40(%2),%%mm0\n\t"
177             "movq        %%mm0,(%0)\n\t"
178             "addl           %3,%0\n\t"
179
180             "movq        48(%2),%%mm0\n\t"
181             "packuswb   56(%2),%%mm0\n\t"
182             "movq        %%mm0,(%0)\n\t"
183             "addl           %3,%0\n\t"
184
185             "movq        64(%2),%%mm0\n\t"
186             "packuswb   72(%2),%%mm0\n\t"
187             "movq        %%mm0,(%0)\n\t"
188             "addl           %3,%0\n\t"
189
190             "movq        80(%2),%%mm0\n\t"
191             "packuswb   88(%2),%%mm0\n\t"
192             "movq        %%mm0,(%0)\n\t"
193             "addl           %3,%0\n\t"
194
195             "movq        96(%2),%%mm0\n\t"
196             "packuswb   104(%2),%%mm0\n\t"
197             "movq        %%mm0,(%0)\n\t"
198             "addl           %3,%0\n\t"
199
200             "movq        112(%2),%%mm0\n\t"
201             "packuswb   120(%2),%%mm0\n\t"
202             "movq        %%mm0,(%0)\n\t"
203
204             //"emms"
205             : "=r" (p_data)
206             : "0" (p_data), "r" (p_block), "r" (i_incr + 8) );
207 }
208
209 void _M( vdec_DecodeMacroblockC ) ( vdec_thread_t *p_vdec,
210                                     macroblock_t * p_mb )
211 {
212     if( !(p_mb->i_mb_type & MB_INTRA) )
213     {
214         /*
215          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
216          */
217         if( p_mb->pf_motion == 0 )
218         {
219             intf_WarnMsg( 2, "pf_motion set to NULL" );
220         }
221         else
222         {
223             p_mb->pf_motion( p_mb );
224         }
225
226         DECODEBLOCKSC( AddBlock )
227     }
228     else
229     {
230         DECODEBLOCKSC( CopyBlock )
231     }
232 }
233
234 void _M( vdec_DecodeMacroblockBW ) ( vdec_thread_t *p_vdec,
235                                      macroblock_t * p_mb )
236 {
237     if( !(p_mb->i_mb_type & MB_INTRA) )
238     {
239         /*
240          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
241          */
242         if( p_mb->pf_motion == 0 )
243         {
244             intf_WarnMsg( 2, "pf_motion set to NULL" );
245         }
246         else
247         {
248             p_mb->pf_motion( p_mb );
249         }
250
251         DECODEBLOCKSBW( AddBlock )
252     }
253     else
254     {
255         DECODEBLOCKSBW( CopyBlock )
256     }
257 }
258