]> git.sesse.net Git - vlc/blob - plugins/motion/vdec_motion_inner_mmxext.c
The motion compensation routines are now modules as well ; choose your
[vlc] / plugins / motion / vdec_motion_inner_mmxext.c
1 /*****************************************************************************
2  * vdec_motion_inner_mmxext.c : motion compensation inner routines optimized
3  *                              in MMX EXT
4  *****************************************************************************
5  * Copyright (C) 1999, 2000 VideoLAN
6  * $Id: vdec_motion_inner_mmxext.c,v 1.1 2001/01/18 05:13:22 sam Exp $
7  *
8  * Authors: Christophe Massiot <massiot@via.ecp.fr>, largerly inspired by the
9  *          work done by the livid project <http://www.linuxvideo.org/>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
24  *****************************************************************************/
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include "defs.h"
30
31 #include "config.h"
32 #include "common.h"
33 #include "threads.h"
34 #include "mtime.h"
35
36 #include "video.h"
37
38 #include "attributes.h"
39 #include "mmx.h"
40
41 /* OK, I know, this code has been taken from livid's mpeg2dec --Meuuh */
42
43 static mmx_t mask_one = {0x0101010101010101LL};
44
45 /*
46  * Useful functions
47  */
48
49 #define pavg_r2r(src,dest)      pavgb_r2r (src, dest);
50 #define pavg_m2r(src,dest)      pavgb_m2r (src, dest);
51
52 #define __MotionComponent_x_y_copy(width,height)                            \
53 void MotionComponent_x_y_copy_##width##_##height(yuv_data_t * p_src,        \
54                                                  yuv_data_t * p_dest,       \
55                                                  int i_stride)              \
56 {                                                                           \
57     int i_y;                                                                \
58                                                                             \
59         pxor_r2r (mm0, mm0);                                                \
60         pxor_r2r (mm1, mm1);                                                \
61         pxor_r2r (mm2, mm2);                                                \
62         pxor_r2r (mm3, mm3);                                                \
63         pxor_r2r (mm4, mm4);                                                \
64         pxor_r2r (mm5, mm5);                                                \
65         pxor_r2r (mm6, mm6);                                                \
66         pxor_r2r (mm7, mm7);                                                \
67                                                                             \
68     for( i_y = 0; i_y < height; i_y ++ )                                    \
69     {                                                                       \
70         movq_m2r( *p_src, mm0 );     /* load 8 ref bytes */                 \
71         if( width == 16 )                                                   \
72             movq_m2r( *(p_src + 8), mm1 );                                  \
73         p_src += i_stride;                                                  \
74                                                                             \
75         movq_r2m( mm0, *p_dest );    /* store 8 bytes at curr */            \
76         if( width == 16 )                                                   \
77             movq_r2m( mm1, *(p_dest + 8) );                                 \
78         p_dest += i_stride;                                                 \
79     }                                                                       \
80 }
81
82 #define __MotionComponent_X_y_copy(width,height)                            \
83 void MotionComponent_X_y_copy_##width##_##height(yuv_data_t * p_src,        \
84                                                  yuv_data_t * p_dest,       \
85                                                  int i_stride)              \
86 {                                                                           \
87     int i_y;                                                                \
88                                                                             \
89     for( i_y = 0; i_y < height; i_y ++ )                                    \
90     {                                                                       \
91         movq_m2r (*p_src, mm0);                                             \
92         if( width == 16 )                                                   \
93             movq_m2r (*(p_src + 8), mm1);                                   \
94         pavg_m2r (*(p_src + 1), mm0);                                       \
95         if( width == 16 )                                                   \
96             pavg_m2r (*(p_src + 9), mm1);                                   \
97         movq_r2m (mm0, *p_dest);                                            \
98         p_src += i_stride;                                                  \
99         if( width == 16 )                                                   \
100             movq_r2m (mm1, *(p_dest + 8));                                  \
101         p_dest += i_stride;                                                 \
102     }                                                                       \
103 }
104
105 #define __MotionComponent_x_Y_copy(width,height)                            \
106 void MotionComponent_x_Y_copy_##width##_##height(yuv_data_t * p_src,        \
107                                                  yuv_data_t * p_dest,       \
108                                                  int i_stride)              \
109 {                                                                           \
110     int i_y;                                                                \
111     yuv_data_t * p_next_src = p_src + i_stride;                             \
112                                                                             \
113     for( i_y = 0; i_y < height; i_y ++ )                                    \
114     {                                                                       \
115         movq_m2r (*p_src, mm0);                                             \
116         if( width == 16 )                                                   \
117             movq_m2r (*(p_src + 8), mm1);                                   \
118         pavg_m2r (*(p_next_src), mm0);                                      \
119         if( width == 16 )                                                   \
120             pavg_m2r (*(p_next_src + 8), mm1);                              \
121         movq_r2m (mm0, *p_dest);                                            \
122         p_src += i_stride;                                                  \
123         p_next_src += i_stride;                                             \
124         if( width == 16 )                                                   \
125             movq_r2m (mm1, *(p_dest + 8));                                  \
126         p_dest += i_stride;                                                 \
127     }                                                                       \
128 }
129
130 #define __MotionComponent_X_Y_copy(width,height)                            \
131 void MotionComponent_X_Y_copy_##width##_##height(yuv_data_t * p_src,        \
132                                                  yuv_data_t * p_dest,       \
133                                                  int i_stride)              \
134 {                                                                           \
135     int i_y;                                                                \
136                                                                             \
137     if( width == 16 )                                                       \
138     {                                                                       \
139         for( i_y = 0; i_y < height; i_y ++ )                                \
140         {                                                                   \
141             movq_m2r (*p_src, mm0);                                         \
142             movq_m2r (*(p_src+i_stride+1), mm1);                            \
143             movq_r2r (mm0, mm7);                                            \
144             movq_m2r (*(p_src+1), mm2);                                     \
145             pxor_r2r (mm1, mm7);                                            \
146             movq_m2r (*(p_src + i_stride), mm3);                            \
147             movq_r2r (mm2, mm6);                                            \
148             pxor_r2r (mm3, mm6);                                            \
149             pavg_r2r (mm1, mm0);                                            \
150             pavg_r2r (mm3, mm2);                                            \
151             por_r2r (mm6, mm7);                                             \
152             movq_r2r (mm0, mm6);                                            \
153             pxor_r2r (mm2, mm6);                                            \
154             pand_r2r (mm6, mm7);                                            \
155             pand_m2r (mask_one, mm7);                                       \
156             pavg_r2r (mm2, mm0);                                            \
157             psubusb_r2r (mm7, mm0);                                         \
158             movq_r2m (mm0, *p_dest);                                        \
159                                                                             \
160             movq_m2r (*(p_src+8), mm0);                                     \
161             movq_m2r (*(p_src+i_stride+9), mm1);                            \
162             movq_r2r (mm0, mm7);                                            \
163             movq_m2r (*(p_src+9), mm2);                                     \
164             pxor_r2r (mm1, mm7);                                            \
165             movq_m2r (*(p_src+i_stride+8), mm3);                            \
166             movq_r2r (mm2, mm6);                                            \
167             pxor_r2r (mm3, mm6);                                            \
168             pavg_r2r (mm1, mm0);                                            \
169             pavg_r2r (mm3, mm2);                                            \
170             por_r2r (mm6, mm7);                                             \
171             movq_r2r (mm0, mm6);                                            \
172             pxor_r2r (mm2, mm6);                                            \
173             pand_r2r (mm6, mm7);                                            \
174             pand_m2r (mask_one, mm7);                                       \
175             pavg_r2r (mm2, mm0);                                            \
176             psubusb_r2r (mm7, mm0);                                         \
177             p_src += i_stride;                                              \
178             movq_r2m (mm0, *(p_dest+8));                                    \
179             p_dest += i_stride;                                             \
180         }                                                                   \
181     }                                                                       \
182     else                                                                    \
183     {                                                                       \
184         movq_m2r (*p_src, mm0);                                             \
185         movq_m2r (*(p_src+1), mm1);                                         \
186         movq_r2r (mm0, mm7);                                                \
187         pxor_r2r (mm1, mm7);                                                \
188         pavg_r2r (mm1, mm0);                                                \
189         p_src += i_stride;                                                  \
190                                                                             \
191         for( i_y = 0; i_y < height; i_y ++ )                                \
192         {                                                                   \
193             movq_m2r (*p_src, mm2);                                         \
194             movq_r2r (mm0, mm5);                                            \
195             movq_m2r (*(p_src+1), mm3);                                     \
196             movq_r2r (mm2, mm6);                                            \
197             pxor_r2r (mm3, mm6);                                            \
198             pavg_r2r (mm3, mm2);                                            \
199             por_r2r (mm6, mm7);                                             \
200             pxor_r2r (mm2, mm5);                                            \
201             pand_r2r (mm5, mm7);                                            \
202             pavg_r2r (mm2, mm0);                                            \
203             pand_m2r (mask_one, mm7);                                       \
204             psubusb_r2r (mm7, mm0);                                         \
205             p_src += i_stride;                                              \
206             movq_r2m (mm0, *p_dest);                                        \
207             p_dest += i_stride;                                             \
208             movq_r2r (mm6, mm7);                                            \
209             movq_r2r (mm2, mm0);                                            \
210         }                                                                   \
211     }                                                                       \
212 }
213
214 #define __MotionComponent_x_y_avg(width,height)                             \
215 void MotionComponent_x_y_avg_##width##_##height(yuv_data_t * p_src,         \
216                                                 yuv_data_t * p_dest,        \
217                                                 int i_stride)               \
218 {                                                                           \
219     int i_y;                                                                \
220                                                                             \
221     for( i_y = 0; i_y < height; i_y ++ )                                    \
222     {                                                                       \
223         movq_m2r( *p_src, mm0 );                                            \
224         if( width == 16 )                                                   \
225             movq_m2r( *(p_src + 8), mm1 );                                  \
226         pavg_m2r( *p_dest, mm0 );                                           \
227         if( width == 16 )                                                   \
228             pavg_m2r( *(p_dest + 8), mm1 );                                 \
229         movq_r2m( mm0, *p_dest );                                           \
230         p_src += i_stride;                                                  \
231         if( width == 16 )                                                   \
232             movq_r2m( mm1, *(p_dest + 8) );                                 \
233         p_dest += i_stride;                                                 \
234     }                                                                       \
235 }
236
237 #define __MotionComponent_X_y_avg(width,height)                             \
238 void MotionComponent_X_y_avg_##width##_##height(yuv_data_t * p_src,         \
239                                                 yuv_data_t * p_dest,        \
240                                                 int i_stride)               \
241 {                                                                           \
242     int i_y;                                                                \
243                                                                             \
244     for( i_y = 0; i_y < height; i_y ++ )                                    \
245     {                                                                       \
246         movq_m2r (*p_src, mm0);                                             \
247         if( width == 16 )                                                   \
248             movq_m2r (*(p_src + 8), mm1);                                   \
249         pavg_m2r (*(p_src + 1), mm0);                                       \
250         if( width == 16 )                                                   \
251             pavg_m2r (*(p_src + 9), mm1);                                   \
252         pavg_m2r (*p_dest, mm0);                                            \
253         if( width == 16 )                                                   \
254             pavg_m2r (*(p_dest + 8), mm1);                                  \
255         p_src += i_stride;                                                  \
256         movq_r2m (mm0, *p_dest);                                            \
257         if( width == 16 )                                                   \
258             movq_r2m (mm1, *(p_dest + 8));                                  \
259         p_dest += i_stride;                                                 \
260     }                                                                       \
261 }
262
263 #define __MotionComponent_x_Y_avg(width,height)                             \
264 void MotionComponent_x_Y_avg_##width##_##height(yuv_data_t * p_src,         \
265                                                 yuv_data_t * p_dest,        \
266                                                 int i_stride)               \
267 {                                                                           \
268     int i_y;                                                                \
269     yuv_data_t * p_next_src = p_src + i_stride;                             \
270                                                                             \
271     for( i_y = 0; i_y < height; i_y ++ )                                    \
272     {                                                                       \
273         movq_m2r (*p_src, mm0);                                             \
274         if( width == 16 )                                                   \
275             movq_m2r (*(p_src + 8), mm1);                                   \
276         pavg_m2r (*(p_next_src), mm0);                                      \
277         if( width == 16 )                                                   \
278             pavg_m2r (*(p_next_src + 8), mm1);                              \
279         pavg_m2r (*p_dest, mm0);                                            \
280         if( width == 16 )                                                   \
281             pavg_m2r (*(p_dest + 8), mm1);                                  \
282         p_src += i_stride;                                                  \
283         p_next_src += i_stride;                                             \
284         movq_r2m (mm0, *p_dest);                                            \
285         if( width == 16 )                                                   \
286             movq_r2m (mm1, *(p_dest + 8));                                  \
287         p_dest += i_stride;                                                 \
288     }                                                                       \
289 }
290
291 #define __MotionComponent_X_Y_avg(width,height)                             \
292 void MotionComponent_X_Y_avg_##width##_##height(yuv_data_t * p_src,         \
293                                                 yuv_data_t * p_dest,        \
294                                                 int i_stride)               \
295 {                                                                           \
296     int i_y;                                                                \
297                                                                             \
298     if( width == 16 )                                                       \
299     {                                                                       \
300         for( i_y = 0; i_y < height; i_y ++ )                                \
301         {                                                                   \
302             movq_m2r (*p_src, mm0);                                         \
303             movq_m2r (*(p_src+i_stride+1), mm1);                            \
304             movq_r2r (mm0, mm7);                                            \
305             movq_m2r (*(p_src+1), mm2);                                     \
306             pxor_r2r (mm1, mm7);                                            \
307             movq_m2r (*(p_src+i_stride), mm3);                              \
308             movq_r2r (mm2, mm6);                                            \
309             pxor_r2r (mm3, mm6);                                            \
310             pavg_r2r (mm1, mm0);                                            \
311             pavg_r2r (mm3, mm2);                                            \
312             por_r2r (mm6, mm7);                                             \
313             movq_r2r (mm0, mm6);                                            \
314             pxor_r2r (mm2, mm6);                                            \
315             pand_r2r (mm6, mm7);                                            \
316             pand_m2r (mask_one, mm7);                                       \
317             pavg_r2r (mm2, mm0);                                            \
318             psubusb_r2r (mm7, mm0);                                         \
319             movq_m2r (*p_dest, mm1);                                        \
320             pavg_r2r (mm1, mm0);                                            \
321             movq_r2m (mm0, *p_dest);                                        \
322                                                                             \
323             movq_m2r (*(p_src+8), mm0);                                     \
324             movq_m2r (*(p_src+i_stride+9), mm1);                            \
325             movq_r2r (mm0, mm7);                                            \
326             movq_m2r (*(p_src+9), mm2);                                     \
327             pxor_r2r (mm1, mm7);                                            \
328             movq_m2r (*(p_src+i_stride+8), mm3);                            \
329             movq_r2r (mm2, mm6);                                            \
330             pxor_r2r (mm3, mm6);                                            \
331             pavg_r2r (mm1, mm0);                                            \
332             pavg_r2r (mm3, mm2);                                            \
333             por_r2r (mm6, mm7);                                             \
334             movq_r2r (mm0, mm6);                                            \
335             pxor_r2r (mm2, mm6);                                            \
336             pand_r2r (mm6, mm7);                                            \
337             pand_m2r (mask_one, mm7);                                       \
338             pavg_r2r (mm2, mm0);                                            \
339             psubusb_r2r (mm7, mm0);                                         \
340             movq_m2r (*(p_dest+8), mm1);                                    \
341             pavg_r2r (mm1, mm0);                                            \
342             p_src += i_stride;                                              \
343             movq_r2m (mm0, *(p_dest+8));                                    \
344             p_dest += i_stride;                                             \
345         }                                                                   \
346     }                                                                       \
347     else                                                                    \
348     {                                                                       \
349         for( i_y = 0; i_y < height; i_y ++ )                                \
350         {                                                                   \
351             movq_m2r (*p_src, mm0);                                         \
352             movq_m2r (*(p_src+i_stride+1), mm1);                            \
353             movq_r2r (mm0, mm7);                                            \
354             movq_m2r (*(p_src+1), mm2);                                     \
355             pxor_r2r (mm1, mm7);                                            \
356             movq_m2r (*(p_src+i_stride), mm3);                              \
357             movq_r2r (mm2, mm6);                                            \
358             pxor_r2r (mm3, mm6);                                            \
359             pavg_r2r (mm1, mm0);                                            \
360             pavg_r2r (mm3, mm2);                                            \
361             por_r2r (mm6, mm7);                                             \
362             movq_r2r (mm0, mm6);                                            \
363             pxor_r2r (mm2, mm6);                                            \
364             pand_r2r (mm6, mm7);                                            \
365             pand_m2r (mask_one, mm7);                                       \
366             pavg_r2r (mm2, mm0);                                            \
367             psubusb_r2r (mm7, mm0);                                         \
368             movq_m2r (*p_dest, mm1);                                        \
369             pavg_r2r (mm1, mm0);                                            \
370             p_src += i_stride;                                              \
371             movq_r2m (mm0, *p_dest);                                        \
372             p_dest += i_stride;                                             \
373         }                                                                   \
374     }                                                                       \
375 }
376
377 #define __MotionComponents(width,height)                                    \
378 __MotionComponent_x_y_copy(width,height)                                    \
379 __MotionComponent_X_y_copy(width,height)                                    \
380 __MotionComponent_x_Y_copy(width,height)                                    \
381 __MotionComponent_X_Y_copy(width,height)                                    \
382 __MotionComponent_x_y_avg(width,height)                                     \
383 __MotionComponent_X_y_avg(width,height)                                     \
384 __MotionComponent_x_Y_avg(width,height)                                     \
385 __MotionComponent_X_Y_avg(width,height)
386
387 __MotionComponents (16,16)      /* 444, 422, 420 */
388 __MotionComponents (16,8)       /* 444, 422, 420 */
389 __MotionComponents (8,8)        /* 422, 420 */
390 __MotionComponents (8,4)        /* 420 */
391 #if 0
392 __MotionComponents (8,16)       /* 422 */
393 #endif