]> git.sesse.net Git - vlc/blob - src/video_decoder/vdec_motion_inner_mmxext.c
* Borrowed LiViD's MMX and MMX EXT IDCT.
[vlc] / src / video_decoder / vdec_motion_inner_mmxext.c
1 /*****************************************************************************
2  * vdec_motion_inner_mmx.c : motion compensation inner routines optimized in
3  *                           MMX
4  *****************************************************************************
5  * Copyright (C) 1999, 2000 VideoLAN
6  * $Id: vdec_motion_inner_mmxext.c,v 1.1 2001/01/16 17:59:23 massiot Exp $
7  *
8  * Authors: Christophe Massiot <massiot@via.ecp.fr>, largerly inspired by the
9  *          work done by the livid project <http://www.linuxvideo.org/>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
24  *****************************************************************************/
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include "defs.h"
30
31 #include "config.h"
32 #include "common.h"
33 #include "threads.h"
34 #include "mtime.h"
35 #include "plugins.h"
36
37 #include "intf_msg.h"
38
39 #include "stream_control.h"
40 #include "input_ext-dec.h"
41
42 #include "video.h"
43 #include "video_output.h"
44
45 #include "vdec_idct.h"
46 #include "video_decoder.h"
47 #include "vdec_motion.h"
48
49 #include "vpar_blocks.h"
50 #include "vpar_headers.h"
51 #include "vpar_synchro.h"
52 #include "video_parser.h"
53 #include "video_fifo.h"
54
55 #include "attributes.h"
56 #include "mmx.h"
57
58 /* OK, I know, this code has been taken from livid's mpeg2dec --Meuuh */
59
60 static mmx_t mask_one = {0x0101010101010101LL};
61
62 /*
63  * Useful functions
64  */
65
66 #define pavg_r2r(src,dest)      pavgb_r2r (src, dest);
67 #define pavg_m2r(src,dest)      pavgb_m2r (src, dest);
68
69 #define __MotionComponent_x_y_copy(width,height)                            \
70 void MotionComponent_x_y_copy_##width##_##height(yuv_data_t * p_src,        \
71                                                  yuv_data_t * p_dest,       \
72                                                  int i_stride)              \
73 {                                                                           \
74     int i_y;                                                                \
75                                                                             \
76         pxor_r2r (mm0, mm0);                                                \
77         pxor_r2r (mm1, mm1);                                                \
78         pxor_r2r (mm2, mm2);                                                \
79         pxor_r2r (mm3, mm3);                                                \
80         pxor_r2r (mm4, mm4);                                                \
81         pxor_r2r (mm5, mm5);                                                \
82         pxor_r2r (mm6, mm6);                                                \
83         pxor_r2r (mm7, mm7);                                                \
84                                                                             \
85     for( i_y = 0; i_y < height; i_y ++ )                                    \
86     {                                                                       \
87         movq_m2r( *p_src, mm0 );     /* load 8 ref bytes */                 \
88         if( width == 16 )                                                   \
89             movq_m2r( *(p_src + 8), mm1 );                                  \
90         p_src += i_stride;                                                  \
91                                                                             \
92         movq_r2m( mm0, *p_dest );    /* store 8 bytes at curr */            \
93         if( width == 16 )                                                   \
94             movq_r2m( mm1, *(p_dest + 8) );                                 \
95         p_dest += i_stride;                                                 \
96     }                                                                       \
97 }
98
99 #define __MotionComponent_X_y_copy(width,height)                            \
100 void MotionComponent_X_y_copy_##width##_##height(yuv_data_t * p_src,        \
101                                                  yuv_data_t * p_dest,       \
102                                                  int i_stride)              \
103 {                                                                           \
104     int i_y;                                                                \
105                                                                             \
106     for( i_y = 0; i_y < height; i_y ++ )                                    \
107     {                                                                       \
108         movq_m2r (*p_src, mm0);                                             \
109         if( width == 16 )                                                   \
110             movq_m2r (*(p_src + 8), mm1);                                   \
111         pavg_m2r (*(p_src + 1), mm0);                                       \
112         if( width == 16 )                                                   \
113             pavg_m2r (*(p_src + 9), mm1);                                   \
114         movq_r2m (mm0, *p_dest);                                            \
115         p_src += i_stride;                                                  \
116         if( width == 16 )                                                   \
117             movq_r2m (mm1, *(p_dest + 8));                                  \
118         p_dest += i_stride;                                                 \
119     }                                                                       \
120 }
121
122 #define __MotionComponent_x_Y_copy(width,height)                            \
123 void MotionComponent_x_Y_copy_##width##_##height(yuv_data_t * p_src,        \
124                                                  yuv_data_t * p_dest,       \
125                                                  int i_stride)              \
126 {                                                                           \
127     int i_y;                                                                \
128     yuv_data_t * p_next_src = p_src + i_stride;                             \
129                                                                             \
130     for( i_y = 0; i_y < height; i_y ++ )                                    \
131     {                                                                       \
132         movq_m2r (*p_src, mm0);                                             \
133         if( width == 16 )                                                   \
134             movq_m2r (*(p_src + 8), mm1);                                   \
135         pavg_m2r (*(p_next_src), mm0);                                      \
136         if( width == 16 )                                                   \
137             pavg_m2r (*(p_next_src + 8), mm1);                              \
138         movq_r2m (mm0, *p_dest);                                            \
139         p_src += i_stride;                                                  \
140         p_next_src += i_stride;                                             \
141         if( width == 16 )                                                   \
142             movq_r2m (mm1, *(p_dest + 8));                                  \
143         p_dest += i_stride;                                                 \
144     }                                                                       \
145 }
146
147 #define __MotionComponent_X_Y_copy(width,height)                            \
148 void MotionComponent_X_Y_copy_##width##_##height(yuv_data_t * p_src,        \
149                                                  yuv_data_t * p_dest,       \
150                                                  int i_stride)              \
151 {                                                                           \
152     int i_y;                                                                \
153                                                                             \
154     if( width == 16 )                                                       \
155     {                                                                       \
156         for( i_y = 0; i_y < height; i_y ++ )                                \
157         {                                                                   \
158             movq_m2r (*p_src, mm0);                                         \
159             movq_m2r (*(p_src+i_stride+1), mm1);                            \
160             movq_r2r (mm0, mm7);                                            \
161             movq_m2r (*(p_src+1), mm2);                                     \
162             pxor_r2r (mm1, mm7);                                            \
163             movq_m2r (*(p_src + i_stride), mm3);                            \
164             movq_r2r (mm2, mm6);                                            \
165             pxor_r2r (mm3, mm6);                                            \
166             pavg_r2r (mm1, mm0);                                            \
167             pavg_r2r (mm3, mm2);                                            \
168             por_r2r (mm6, mm7);                                             \
169             movq_r2r (mm0, mm6);                                            \
170             pxor_r2r (mm2, mm6);                                            \
171             pand_r2r (mm6, mm7);                                            \
172             pand_m2r (mask_one, mm7);                                       \
173             pavg_r2r (mm2, mm0);                                            \
174             psubusb_r2r (mm7, mm0);                                         \
175             movq_r2m (mm0, *p_dest);                                        \
176                                                                             \
177             movq_m2r (*(p_src+8), mm0);                                     \
178             movq_m2r (*(p_src+i_stride+9), mm1);                            \
179             movq_r2r (mm0, mm7);                                            \
180             movq_m2r (*(p_src+9), mm2);                                     \
181             pxor_r2r (mm1, mm7);                                            \
182             movq_m2r (*(p_src+i_stride+8), mm3);                            \
183             movq_r2r (mm2, mm6);                                            \
184             pxor_r2r (mm3, mm6);                                            \
185             pavg_r2r (mm1, mm0);                                            \
186             pavg_r2r (mm3, mm2);                                            \
187             por_r2r (mm6, mm7);                                             \
188             movq_r2r (mm0, mm6);                                            \
189             pxor_r2r (mm2, mm6);                                            \
190             pand_r2r (mm6, mm7);                                            \
191             pand_m2r (mask_one, mm7);                                       \
192             pavg_r2r (mm2, mm0);                                            \
193             psubusb_r2r (mm7, mm0);                                         \
194             p_src += i_stride;                                              \
195             movq_r2m (mm0, *(p_dest+8));                                    \
196             p_dest += i_stride;                                             \
197         }                                                                   \
198     }                                                                       \
199     else                                                                    \
200     {                                                                       \
201         movq_m2r (*p_src, mm0);                                             \
202         movq_m2r (*(p_src+1), mm1);                                         \
203         movq_r2r (mm0, mm7);                                                \
204         pxor_r2r (mm1, mm7);                                                \
205         pavg_r2r (mm1, mm0);                                                \
206         p_src += i_stride;                                                  \
207                                                                             \
208         for( i_y = 0; i_y < height; i_y ++ )                                \
209         {                                                                   \
210             movq_m2r (*p_src, mm2);                                         \
211             movq_r2r (mm0, mm5);                                            \
212             movq_m2r (*(p_src+1), mm3);                                     \
213             movq_r2r (mm2, mm6);                                            \
214             pxor_r2r (mm3, mm6);                                            \
215             pavg_r2r (mm3, mm2);                                            \
216             por_r2r (mm6, mm7);                                             \
217             pxor_r2r (mm2, mm5);                                            \
218             pand_r2r (mm5, mm7);                                            \
219             pavg_r2r (mm2, mm0);                                            \
220             pand_m2r (mask_one, mm7);                                       \
221             psubusb_r2r (mm7, mm0);                                         \
222             p_src += i_stride;                                              \
223             movq_r2m (mm0, *p_dest);                                        \
224             p_dest += i_stride;                                             \
225             movq_r2r (mm6, mm7);                                            \
226             movq_r2r (mm2, mm0);                                            \
227         }                                                                   \
228     }                                                                       \
229 }
230
231 #define __MotionComponent_x_y_avg(width,height)                             \
232 void MotionComponent_x_y_avg_##width##_##height(yuv_data_t * p_src,         \
233                                                 yuv_data_t * p_dest,        \
234                                                 int i_stride)               \
235 {                                                                           \
236     int i_y;                                                                \
237                                                                             \
238     for( i_y = 0; i_y < height; i_y ++ )                                    \
239     {                                                                       \
240         movq_m2r( *p_src, mm0 );                                            \
241         if( width == 16 )                                                   \
242             movq_m2r( *(p_src + 8), mm1 );                                  \
243         pavg_m2r( *p_dest, mm0 );                                           \
244         if( width == 16 )                                                   \
245             pavg_m2r( *(p_dest + 8), mm1 );                                 \
246         movq_r2m( mm0, *p_dest );                                           \
247         p_src += i_stride;                                                  \
248         if( width == 16 )                                                   \
249             movq_r2m( mm1, *(p_dest + 8) );                                 \
250         p_dest += i_stride;                                                 \
251     }                                                                       \
252 }
253
254 #define __MotionComponent_X_y_avg(width,height)                             \
255 void MotionComponent_X_y_avg_##width##_##height(yuv_data_t * p_src,         \
256                                                 yuv_data_t * p_dest,        \
257                                                 int i_stride)               \
258 {                                                                           \
259     int i_y;                                                                \
260                                                                             \
261     for( i_y = 0; i_y < height; i_y ++ )                                    \
262     {                                                                       \
263         movq_m2r (*p_src, mm0);                                             \
264         if( width == 16 )                                                   \
265             movq_m2r (*(p_src + 8), mm1);                                   \
266         pavg_m2r (*(p_src + 1), mm0);                                       \
267         if( width == 16 )                                                   \
268             pavg_m2r (*(p_src + 9), mm1);                                   \
269         pavg_m2r (*p_dest, mm0);                                            \
270         if( width == 16 )                                                   \
271             pavg_m2r (*(p_dest + 8), mm1);                                  \
272         p_src += i_stride;                                                  \
273         movq_r2m (mm0, *p_dest);                                            \
274         if( width == 16 )                                                   \
275             movq_r2m (mm1, *(p_dest + 8));                                  \
276         p_dest += i_stride;                                                 \
277     }                                                                       \
278 }
279
280 #define __MotionComponent_x_Y_avg(width,height)                             \
281 void MotionComponent_x_Y_avg_##width##_##height(yuv_data_t * p_src,         \
282                                                 yuv_data_t * p_dest,        \
283                                                 int i_stride)               \
284 {                                                                           \
285     int i_y;                                                                \
286     yuv_data_t * p_next_src = p_src + i_stride;                             \
287                                                                             \
288     for( i_y = 0; i_y < height; i_y ++ )                                    \
289     {                                                                       \
290         movq_m2r (*p_src, mm0);                                             \
291         if( width == 16 )                                                   \
292             movq_m2r (*(p_src + 8), mm1);                                   \
293         pavg_m2r (*(p_next_src), mm0);                                      \
294         if( width == 16 )                                                   \
295             pavg_m2r (*(p_next_src + 8), mm1);                              \
296         pavg_m2r (*p_dest, mm0);                                            \
297         if( width == 16 )                                                   \
298             pavg_m2r (*(p_dest + 8), mm1);                                  \
299         p_src += i_stride;                                                  \
300         p_next_src += i_stride;                                             \
301         movq_r2m (mm0, *p_dest);                                            \
302         if( width == 16 )                                                   \
303             movq_r2m (mm1, *(p_dest + 8));                                  \
304         p_dest += i_stride;                                                 \
305     }                                                                       \
306 }
307
308 #define __MotionComponent_X_Y_avg(width,height)                             \
309 void MotionComponent_X_Y_avg_##width##_##height(yuv_data_t * p_src,         \
310                                                 yuv_data_t * p_dest,        \
311                                                 int i_stride)               \
312 {                                                                           \
313     int i_y;                                                                \
314                                                                             \
315     if( width == 16 )                                                       \
316     {                                                                       \
317         for( i_y = 0; i_y < height; i_y ++ )                                \
318         {                                                                   \
319             movq_m2r (*p_src, mm0);                                         \
320             movq_m2r (*(p_src+i_stride+1), mm1);                            \
321             movq_r2r (mm0, mm7);                                            \
322             movq_m2r (*(p_src+1), mm2);                                     \
323             pxor_r2r (mm1, mm7);                                            \
324             movq_m2r (*(p_src+i_stride), mm3);                              \
325             movq_r2r (mm2, mm6);                                            \
326             pxor_r2r (mm3, mm6);                                            \
327             pavg_r2r (mm1, mm0);                                            \
328             pavg_r2r (mm3, mm2);                                            \
329             por_r2r (mm6, mm7);                                             \
330             movq_r2r (mm0, mm6);                                            \
331             pxor_r2r (mm2, mm6);                                            \
332             pand_r2r (mm6, mm7);                                            \
333             pand_m2r (mask_one, mm7);                                       \
334             pavg_r2r (mm2, mm0);                                            \
335             psubusb_r2r (mm7, mm0);                                         \
336             movq_m2r (*p_dest, mm1);                                        \
337             pavg_r2r (mm1, mm0);                                            \
338             movq_r2m (mm0, *p_dest);                                        \
339                                                                             \
340             movq_m2r (*(p_src+8), mm0);                                     \
341             movq_m2r (*(p_src+i_stride+9), mm1);                            \
342             movq_r2r (mm0, mm7);                                            \
343             movq_m2r (*(p_src+9), mm2);                                     \
344             pxor_r2r (mm1, mm7);                                            \
345             movq_m2r (*(p_src+i_stride+8), mm3);                            \
346             movq_r2r (mm2, mm6);                                            \
347             pxor_r2r (mm3, mm6);                                            \
348             pavg_r2r (mm1, mm0);                                            \
349             pavg_r2r (mm3, mm2);                                            \
350             por_r2r (mm6, mm7);                                             \
351             movq_r2r (mm0, mm6);                                            \
352             pxor_r2r (mm2, mm6);                                            \
353             pand_r2r (mm6, mm7);                                            \
354             pand_m2r (mask_one, mm7);                                       \
355             pavg_r2r (mm2, mm0);                                            \
356             psubusb_r2r (mm7, mm0);                                         \
357             movq_m2r (*(p_dest+8), mm1);                                    \
358             pavg_r2r (mm1, mm0);                                            \
359             p_src += i_stride;                                              \
360             movq_r2m (mm0, *(p_dest+8));                                    \
361             p_dest += i_stride;                                             \
362         }                                                                   \
363     }                                                                       \
364     else                                                                    \
365     {                                                                       \
366         for( i_y = 0; i_y < height; i_y ++ )                                \
367         {                                                                   \
368             movq_m2r (*p_src, mm0);                                         \
369             movq_m2r (*(p_src+i_stride+1), mm1);                            \
370             movq_r2r (mm0, mm7);                                            \
371             movq_m2r (*(p_src+1), mm2);                                     \
372             pxor_r2r (mm1, mm7);                                            \
373             movq_m2r (*(p_src+i_stride), mm3);                              \
374             movq_r2r (mm2, mm6);                                            \
375             pxor_r2r (mm3, mm6);                                            \
376             pavg_r2r (mm1, mm0);                                            \
377             pavg_r2r (mm3, mm2);                                            \
378             por_r2r (mm6, mm7);                                             \
379             movq_r2r (mm0, mm6);                                            \
380             pxor_r2r (mm2, mm6);                                            \
381             pand_r2r (mm6, mm7);                                            \
382             pand_m2r (mask_one, mm7);                                       \
383             pavg_r2r (mm2, mm0);                                            \
384             psubusb_r2r (mm7, mm0);                                         \
385             movq_m2r (*p_dest, mm1);                                        \
386             pavg_r2r (mm1, mm0);                                            \
387             p_src += i_stride;                                              \
388             movq_r2m (mm0, *p_dest);                                        \
389             p_dest += i_stride;                                             \
390         }                                                                   \
391     }                                                                       \
392 }
393
394 #define __MotionComponents(width,height)                                    \
395 __MotionComponent_x_y_copy(width,height)                                    \
396 __MotionComponent_X_y_copy(width,height)                                    \
397 __MotionComponent_x_Y_copy(width,height)                                    \
398 __MotionComponent_X_Y_copy(width,height)                                    \
399 __MotionComponent_x_y_avg(width,height)                                     \
400 __MotionComponent_X_y_avg(width,height)                                     \
401 __MotionComponent_x_Y_avg(width,height)                                     \
402 __MotionComponent_X_Y_avg(width,height)
403
404 __MotionComponents (16,16)      /* 444, 422, 420 */
405 __MotionComponents (16,8)       /* 444, 422, 420 */
406 __MotionComponents (8,8)        /* 422, 420 */
407 __MotionComponents (8,4)        /* 420 */
408 #if 0
409 __MotionComponents (8,16)       /* 422 */
410 #endif