]> git.sesse.net Git - vlc/blob - plugins/idct/vdec_block_mmx.c
786ed20e52b22f7a4d9e50b60cd0fbca12d61c86
[vlc] / plugins / idct / vdec_block_mmx.c
1 /*****************************************************************************
2  * vdec_block_mmx.c: Macroblock copy functions in MMX assembly
3  *****************************************************************************
4  * Copyright (C) 1999, 2000, 2001 VideoLAN
5  * $Id: vdec_block_mmx.c,v 1.1 2001/05/06 04:32:02 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /* MODULE_NAME defined in Makefile together with -DBUILTIN */
25 #ifdef BUILTIN
26 #   include "modules_inner.h"
27 #else
28 #   define _M( foo ) foo
29 #endif
30
31 /*****************************************************************************
32  * Preamble
33  *****************************************************************************/
34 #include "defs.h"
35
36 #include "config.h"
37 #include "common.h"
38 #include "threads.h"
39 #include "mtime.h"
40
41 #include "intf_msg.h"
42
43 #include "input_ext-dec.h"
44
45 #include "video.h"
46 #include "video_output.h"
47
48 #include "vdec_motion.h"
49 #include "video_decoder.h"
50
51 #include "vpar_blocks.h"
52
53 #include "vpar_headers.h"
54 #include "vpar_synchro.h"
55 #include "video_parser.h"
56 #include "video_fifo.h"
57
58 #include "vdec_block.h"
59
60 /*****************************************************************************
61  * vdec_Init: initialize video decoder thread
62  *****************************************************************************/
63 void _M( vdec_Init ) ( vdec_thread_t *p_vdec )
64 {
65     ;
66 }
67
68 /*****************************************************************************
69  * AddBlock : add a block
70  *****************************************************************************/
71 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
72                                  yuv_data_t * p_data, int i_incr )
73 {
74     asm __volatile__ ( 
75             "pxor       %%mm7,%%mm7\n\t"
76
77             "movq       (%0),%%mm1\n\t"
78             "movq       %%mm1,%%mm2\n\t"
79             "punpckhbw  %%mm7,%%mm1\n\t"
80             "punpcklbw  %%mm7,%%mm2\n\t"
81             "paddw      (%2),%%mm2\n\t"
82             "paddw      8(%2),%%mm1\n\t"
83             "packuswb   %%mm1,%%mm2\n\t"
84             "movq       %%mm2,(%0)\n\t"
85             "addl       %3,%0\n\t"
86
87             "movq       (%0),%%mm1\n\t"
88             "movq       %%mm1,%%mm2\n\t"
89             "punpckhbw  %%mm7,%%mm1\n\t"
90             "punpcklbw  %%mm7,%%mm2\n\t"
91             "paddw      16(%2),%%mm2\n\t"
92             "paddw      24(%2),%%mm1\n\t"
93             "packuswb   %%mm1,%%mm2\n\t"
94             "movq       %%mm2,(%0)\n\t"
95             "addl       %3,%0\n\t"
96
97             "movq       (%0),%%mm1\n\t"
98             "movq       %%mm1,%%mm2\n\t"
99             "punpckhbw  %%mm7,%%mm1\n\t"
100             "punpcklbw  %%mm7,%%mm2\n\t"
101             "paddw      32(%2),%%mm2\n\t"
102             "paddw      40(%2),%%mm1\n\t"
103             "packuswb   %%mm1,%%mm2\n\t"
104             "movq       %%mm2,(%0)\n\t"
105             "addl       %3,%0\n\t"
106
107             "movq       (%0),%%mm1\n\t"
108             "movq       %%mm1,%%mm2\n\t"
109             "punpckhbw  %%mm7,%%mm1\n\t"
110             "punpcklbw  %%mm7,%%mm2\n\t"
111             "paddw      48(%2),%%mm2\n\t"
112             "paddw      56(%2),%%mm1\n\t"
113             "packuswb   %%mm1,%%mm2\n\t"
114             "movq       %%mm2,(%0)\n\t"
115             "addl       %3,%0\n\t"
116
117             "movq       (%0),%%mm1\n\t"
118             "movq       %%mm1,%%mm2\n\t"
119             "punpckhbw  %%mm7,%%mm1\n\t"
120             "punpcklbw  %%mm7,%%mm2\n\t"
121             "paddw      64(%2),%%mm2\n\t"
122             "paddw      72(%2),%%mm1\n\t"
123             "packuswb   %%mm1,%%mm2\n\t"
124             "movq       %%mm2,(%0)\n\t"
125             "addl       %3,%0\n\t"
126
127             "movq       (%0),%%mm1\n\t"
128             "movq       %%mm1,%%mm2\n\t"
129             "punpckhbw  %%mm7,%%mm1\n\t"
130             "punpcklbw  %%mm7,%%mm2\n\t"
131             "paddw      80(%2),%%mm2\n\t"
132             "paddw      88(%2),%%mm1\n\t"
133             "packuswb   %%mm1,%%mm2\n\t"
134             "movq       %%mm2,(%0)\n\t"
135             "addl       %3,%0\n\t"
136
137             "movq       (%0),%%mm1\n\t"
138             "movq       %%mm1,%%mm2\n\t"
139             "punpckhbw  %%mm7,%%mm1\n\t"
140             "punpcklbw  %%mm7,%%mm2\n\t"
141             "paddw      96(%2),%%mm2\n\t"
142             "paddw      104(%2),%%mm1\n\t"
143             "packuswb   %%mm1,%%mm2\n\t"
144             "movq       %%mm2,(%0)\n\t"
145             "addl       %3,%0\n\t"
146
147             "movq       (%0),%%mm1\n\t"
148             "movq       %%mm1,%%mm2\n\t"
149             "punpckhbw  %%mm7,%%mm1\n\t"
150             "punpcklbw  %%mm7,%%mm2\n\t"
151             "paddw      112(%2),%%mm2\n\t"
152             "paddw      120(%2),%%mm1\n\t"
153             "packuswb   %%mm1,%%mm2\n\t"
154             "movq       %%mm2,(%0)\n\t"
155
156             //"emms"
157             : "=r" (p_data)
158             : "0" (p_data), "r" (p_block), "r" (i_incr + 8) );
159 }
160
161 /*****************************************************************************
162  * CopyBlock : copy a block
163  *****************************************************************************/
164 static  __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
165                                    yuv_data_t * p_data, int i_incr )
166 {
167     asm __volatile__ (
168             "movq         (%2),%%mm0\n\t"
169             "packuswb   8(%2),%%mm0\n\t"
170             "movq        %%mm0,(%0)\n\t"
171             "addl           %3,%0\n\t"
172
173             "movq        16(%2),%%mm0\n\t"
174             "packuswb   24(%2),%%mm0\n\t"
175             "movq        %%mm0,(%0)\n\t"
176             "addl           %3,%0\n\t"
177
178             "movq        32(%2),%%mm0\n\t"
179             "packuswb   40(%2),%%mm0\n\t"
180             "movq        %%mm0,(%0)\n\t"
181             "addl           %3,%0\n\t"
182
183             "movq        48(%2),%%mm0\n\t"
184             "packuswb   56(%2),%%mm0\n\t"
185             "movq        %%mm0,(%0)\n\t"
186             "addl           %3,%0\n\t"
187
188             "movq        64(%2),%%mm0\n\t"
189             "packuswb   72(%2),%%mm0\n\t"
190             "movq        %%mm0,(%0)\n\t"
191             "addl           %3,%0\n\t"
192
193             "movq        80(%2),%%mm0\n\t"
194             "packuswb   88(%2),%%mm0\n\t"
195             "movq        %%mm0,(%0)\n\t"
196             "addl           %3,%0\n\t"
197
198             "movq        96(%2),%%mm0\n\t"
199             "packuswb   104(%2),%%mm0\n\t"
200             "movq        %%mm0,(%0)\n\t"
201             "addl           %3,%0\n\t"
202
203             "movq        112(%2),%%mm0\n\t"
204             "packuswb   120(%2),%%mm0\n\t"
205             "movq        %%mm0,(%0)\n\t"
206
207             //"emms"
208             : "=r" (p_data)
209             : "0" (p_data), "r" (p_block), "r" (i_incr + 8) );
210 }
211
212 void _M( vdec_DecodeMacroblockC ) ( vdec_thread_t *p_vdec,
213                                     macroblock_t * p_mb )
214 {
215     if( !(p_mb->i_mb_type & MB_INTRA) )
216     {
217         /*
218          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
219          */
220         if( p_mb->pf_motion == 0 )
221         {
222             intf_WarnMsg( 2, "pf_motion set to NULL" );
223         }
224         else
225         {
226             p_mb->pf_motion( p_mb );
227         }
228
229         DECODEBLOCKSC( AddBlock )
230     }
231     else
232     {
233         DECODEBLOCKSC( CopyBlock )
234     }
235
236     /*
237      * Decoding is finished, release the macroblock and free
238      * unneeded memory.
239      */
240     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
241 }
242
243 void _M( vdec_DecodeMacroblockBW ) ( vdec_thread_t *p_vdec,
244                                      macroblock_t * p_mb )
245 {
246     if( !(p_mb->i_mb_type & MB_INTRA) )
247     {
248         /*
249          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
250          */
251         if( p_mb->pf_motion == 0 )
252         {
253             intf_WarnMsg( 2, "pf_motion set to NULL" );
254         }
255         else
256         {
257             p_mb->pf_motion( p_mb );
258         }
259
260         DECODEBLOCKSBW( AddBlock )
261     }
262     else
263     {
264         DECODEBLOCKSBW( CopyBlock )
265     }
266
267     /*
268      * Decoding is finished, release the macroblock and free
269      * unneeded memory.
270      */
271     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
272 }
273