]> git.sesse.net Git - vlc/blob - plugins/motion/vdec_motion_inner_mmxext.c
* SDL compilation fix for FreeBSD.
[vlc] / plugins / motion / vdec_motion_inner_mmxext.c
1 /*****************************************************************************
2  * vdec_motion_inner_mmxext.c : motion compensation inner routines optimized
3  *                              in MMX EXT
4  *****************************************************************************
5  * Copyright (C) 1999, 2000 VideoLAN
6  * $Id: vdec_motion_inner_mmxext.c,v 1.3 2001/06/07 22:14:55 sam Exp $
7  *
8  * Authors: Christophe Massiot <massiot@via.ecp.fr>, largerly inspired by the
9  *          work done by the livid project <http://www.linuxvideo.org/>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
24  *****************************************************************************/
25
26 #define MODULE_NAME motionmmxext
27 #include "modules_inner.h"
28
29 /*****************************************************************************
30  * Preamble
31  *****************************************************************************/
32 #include "defs.h"
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38
39 #include "video.h"
40
41 #include "attributes.h"
42 #include "mmx.h"
43
44 /* OK, I know, this code has been taken from livid's mpeg2dec --Meuuh */
45
46 static mmx_t mask_one = {0x0101010101010101LL};
47
48 /*
49  * Useful functions
50  */
51
52 #define pavg_r2r(src,dest)      pavgb_r2r (src, dest);
53 #define pavg_m2r(src,dest)      pavgb_m2r (src, dest);
54
55 #define __MotionComponent_x_y_copy(width,height)                            \
56 void _M(MotionComponent_x_y_copy_##width##_##height)(yuv_data_t * p_src,    \
57                                                  yuv_data_t * p_dest,       \
58                                                  int i_stride)              \
59 {                                                                           \
60     int i_y;                                                                \
61                                                                             \
62         pxor_r2r (mm0, mm0);                                                \
63         pxor_r2r (mm1, mm1);                                                \
64         pxor_r2r (mm2, mm2);                                                \
65         pxor_r2r (mm3, mm3);                                                \
66         pxor_r2r (mm4, mm4);                                                \
67         pxor_r2r (mm5, mm5);                                                \
68         pxor_r2r (mm6, mm6);                                                \
69         pxor_r2r (mm7, mm7);                                                \
70                                                                             \
71     for( i_y = 0; i_y < height; i_y ++ )                                    \
72     {                                                                       \
73         movq_m2r( *p_src, mm0 );     /* load 8 ref bytes */                 \
74         if( width == 16 )                                                   \
75             movq_m2r( *(p_src + 8), mm1 );                                  \
76         p_src += i_stride;                                                  \
77                                                                             \
78         movq_r2m( mm0, *p_dest );    /* store 8 bytes at curr */            \
79         if( width == 16 )                                                   \
80             movq_r2m( mm1, *(p_dest + 8) );                                 \
81         p_dest += i_stride;                                                 \
82     }                                                                       \
83 }
84
85 #define __MotionComponent_X_y_copy(width,height)                            \
86 void _M(MotionComponent_X_y_copy_##width##_##height)(yuv_data_t * p_src,    \
87                                                  yuv_data_t * p_dest,       \
88                                                  int i_stride)              \
89 {                                                                           \
90     int i_y;                                                                \
91                                                                             \
92     for( i_y = 0; i_y < height; i_y ++ )                                    \
93     {                                                                       \
94         movq_m2r (*p_src, mm0);                                             \
95         if( width == 16 )                                                   \
96             movq_m2r (*(p_src + 8), mm1);                                   \
97         pavg_m2r (*(p_src + 1), mm0);                                       \
98         if( width == 16 )                                                   \
99             pavg_m2r (*(p_src + 9), mm1);                                   \
100         movq_r2m (mm0, *p_dest);                                            \
101         p_src += i_stride;                                                  \
102         if( width == 16 )                                                   \
103             movq_r2m (mm1, *(p_dest + 8));                                  \
104         p_dest += i_stride;                                                 \
105     }                                                                       \
106 }
107
108 #define __MotionComponent_x_Y_copy(width,height)                            \
109 void _M(MotionComponent_x_Y_copy_##width##_##height)(yuv_data_t * p_src,    \
110                                                  yuv_data_t * p_dest,       \
111                                                  int i_stride)              \
112 {                                                                           \
113     int i_y;                                                                \
114     yuv_data_t * p_next_src = p_src + i_stride;                             \
115                                                                             \
116     for( i_y = 0; i_y < height; i_y ++ )                                    \
117     {                                                                       \
118         movq_m2r (*p_src, mm0);                                             \
119         if( width == 16 )                                                   \
120             movq_m2r (*(p_src + 8), mm1);                                   \
121         pavg_m2r (*(p_next_src), mm0);                                      \
122         if( width == 16 )                                                   \
123             pavg_m2r (*(p_next_src + 8), mm1);                              \
124         movq_r2m (mm0, *p_dest);                                            \
125         p_src += i_stride;                                                  \
126         p_next_src += i_stride;                                             \
127         if( width == 16 )                                                   \
128             movq_r2m (mm1, *(p_dest + 8));                                  \
129         p_dest += i_stride;                                                 \
130     }                                                                       \
131 }
132
133 #define __MotionComponent_X_Y_copy(width,height)                            \
134 void _M(MotionComponent_X_Y_copy_##width##_##height)(yuv_data_t * p_src,    \
135                                                  yuv_data_t * p_dest,       \
136                                                  int i_stride)              \
137 {                                                                           \
138     int i_y;                                                                \
139                                                                             \
140     if( width == 16 )                                                       \
141     {                                                                       \
142         for( i_y = 0; i_y < height; i_y ++ )                                \
143         {                                                                   \
144             movq_m2r (*p_src, mm0);                                         \
145             movq_m2r (*(p_src+i_stride+1), mm1);                            \
146             movq_r2r (mm0, mm7);                                            \
147             movq_m2r (*(p_src+1), mm2);                                     \
148             pxor_r2r (mm1, mm7);                                            \
149             movq_m2r (*(p_src + i_stride), mm3);                            \
150             movq_r2r (mm2, mm6);                                            \
151             pxor_r2r (mm3, mm6);                                            \
152             pavg_r2r (mm1, mm0);                                            \
153             pavg_r2r (mm3, mm2);                                            \
154             por_r2r (mm6, mm7);                                             \
155             movq_r2r (mm0, mm6);                                            \
156             pxor_r2r (mm2, mm6);                                            \
157             pand_r2r (mm6, mm7);                                            \
158             pand_m2r (mask_one, mm7);                                       \
159             pavg_r2r (mm2, mm0);                                            \
160             psubusb_r2r (mm7, mm0);                                         \
161             movq_r2m (mm0, *p_dest);                                        \
162                                                                             \
163             movq_m2r (*(p_src+8), mm0);                                     \
164             movq_m2r (*(p_src+i_stride+9), mm1);                            \
165             movq_r2r (mm0, mm7);                                            \
166             movq_m2r (*(p_src+9), mm2);                                     \
167             pxor_r2r (mm1, mm7);                                            \
168             movq_m2r (*(p_src+i_stride+8), mm3);                            \
169             movq_r2r (mm2, mm6);                                            \
170             pxor_r2r (mm3, mm6);                                            \
171             pavg_r2r (mm1, mm0);                                            \
172             pavg_r2r (mm3, mm2);                                            \
173             por_r2r (mm6, mm7);                                             \
174             movq_r2r (mm0, mm6);                                            \
175             pxor_r2r (mm2, mm6);                                            \
176             pand_r2r (mm6, mm7);                                            \
177             pand_m2r (mask_one, mm7);                                       \
178             pavg_r2r (mm2, mm0);                                            \
179             psubusb_r2r (mm7, mm0);                                         \
180             p_src += i_stride;                                              \
181             movq_r2m (mm0, *(p_dest+8));                                    \
182             p_dest += i_stride;                                             \
183         }                                                                   \
184     }                                                                       \
185     else                                                                    \
186     {                                                                       \
187         movq_m2r (*p_src, mm0);                                             \
188         movq_m2r (*(p_src+1), mm1);                                         \
189         movq_r2r (mm0, mm7);                                                \
190         pxor_r2r (mm1, mm7);                                                \
191         pavg_r2r (mm1, mm0);                                                \
192         p_src += i_stride;                                                  \
193                                                                             \
194         for( i_y = 0; i_y < height; i_y ++ )                                \
195         {                                                                   \
196             movq_m2r (*p_src, mm2);                                         \
197             movq_r2r (mm0, mm5);                                            \
198             movq_m2r (*(p_src+1), mm3);                                     \
199             movq_r2r (mm2, mm6);                                            \
200             pxor_r2r (mm3, mm6);                                            \
201             pavg_r2r (mm3, mm2);                                            \
202             por_r2r (mm6, mm7);                                             \
203             pxor_r2r (mm2, mm5);                                            \
204             pand_r2r (mm5, mm7);                                            \
205             pavg_r2r (mm2, mm0);                                            \
206             pand_m2r (mask_one, mm7);                                       \
207             psubusb_r2r (mm7, mm0);                                         \
208             p_src += i_stride;                                              \
209             movq_r2m (mm0, *p_dest);                                        \
210             p_dest += i_stride;                                             \
211             movq_r2r (mm6, mm7);                                            \
212             movq_r2r (mm2, mm0);                                            \
213         }                                                                   \
214     }                                                                       \
215 }
216
217 #define __MotionComponent_x_y_avg(width,height)                             \
218 void _M(MotionComponent_x_y_avg_##width##_##height)(yuv_data_t * p_src,     \
219                                                 yuv_data_t * p_dest,        \
220                                                 int i_stride)               \
221 {                                                                           \
222     int i_y;                                                                \
223                                                                             \
224     for( i_y = 0; i_y < height; i_y ++ )                                    \
225     {                                                                       \
226         movq_m2r( *p_src, mm0 );                                            \
227         if( width == 16 )                                                   \
228             movq_m2r( *(p_src + 8), mm1 );                                  \
229         pavg_m2r( *p_dest, mm0 );                                           \
230         if( width == 16 )                                                   \
231             pavg_m2r( *(p_dest + 8), mm1 );                                 \
232         movq_r2m( mm0, *p_dest );                                           \
233         p_src += i_stride;                                                  \
234         if( width == 16 )                                                   \
235             movq_r2m( mm1, *(p_dest + 8) );                                 \
236         p_dest += i_stride;                                                 \
237     }                                                                       \
238 }
239
240 #define __MotionComponent_X_y_avg(width,height)                             \
241 void _M(MotionComponent_X_y_avg_##width##_##height)(yuv_data_t * p_src,     \
242                                                 yuv_data_t * p_dest,        \
243                                                 int i_stride)               \
244 {                                                                           \
245     int i_y;                                                                \
246                                                                             \
247     for( i_y = 0; i_y < height; i_y ++ )                                    \
248     {                                                                       \
249         movq_m2r (*p_src, mm0);                                             \
250         if( width == 16 )                                                   \
251             movq_m2r (*(p_src + 8), mm1);                                   \
252         pavg_m2r (*(p_src + 1), mm0);                                       \
253         if( width == 16 )                                                   \
254             pavg_m2r (*(p_src + 9), mm1);                                   \
255         pavg_m2r (*p_dest, mm0);                                            \
256         if( width == 16 )                                                   \
257             pavg_m2r (*(p_dest + 8), mm1);                                  \
258         p_src += i_stride;                                                  \
259         movq_r2m (mm0, *p_dest);                                            \
260         if( width == 16 )                                                   \
261             movq_r2m (mm1, *(p_dest + 8));                                  \
262         p_dest += i_stride;                                                 \
263     }                                                                       \
264 }
265
266 #define __MotionComponent_x_Y_avg(width,height)                             \
267 void _M(MotionComponent_x_Y_avg_##width##_##height)(yuv_data_t * p_src,     \
268                                                 yuv_data_t * p_dest,        \
269                                                 int i_stride)               \
270 {                                                                           \
271     int i_y;                                                                \
272     yuv_data_t * p_next_src = p_src + i_stride;                             \
273                                                                             \
274     for( i_y = 0; i_y < height; i_y ++ )                                    \
275     {                                                                       \
276         movq_m2r (*p_src, mm0);                                             \
277         if( width == 16 )                                                   \
278             movq_m2r (*(p_src + 8), mm1);                                   \
279         pavg_m2r (*(p_next_src), mm0);                                      \
280         if( width == 16 )                                                   \
281             pavg_m2r (*(p_next_src + 8), mm1);                              \
282         pavg_m2r (*p_dest, mm0);                                            \
283         if( width == 16 )                                                   \
284             pavg_m2r (*(p_dest + 8), mm1);                                  \
285         p_src += i_stride;                                                  \
286         p_next_src += i_stride;                                             \
287         movq_r2m (mm0, *p_dest);                                            \
288         if( width == 16 )                                                   \
289             movq_r2m (mm1, *(p_dest + 8));                                  \
290         p_dest += i_stride;                                                 \
291     }                                                                       \
292 }
293
294 #define __MotionComponent_X_Y_avg(width,height)                             \
295 void _M(MotionComponent_X_Y_avg_##width##_##height)(yuv_data_t * p_src,     \
296                                                 yuv_data_t * p_dest,        \
297                                                 int i_stride)               \
298 {                                                                           \
299     int i_y;                                                                \
300                                                                             \
301     if( width == 16 )                                                       \
302     {                                                                       \
303         for( i_y = 0; i_y < height; i_y ++ )                                \
304         {                                                                   \
305             movq_m2r (*p_src, mm0);                                         \
306             movq_m2r (*(p_src+i_stride+1), mm1);                            \
307             movq_r2r (mm0, mm7);                                            \
308             movq_m2r (*(p_src+1), mm2);                                     \
309             pxor_r2r (mm1, mm7);                                            \
310             movq_m2r (*(p_src+i_stride), mm3);                              \
311             movq_r2r (mm2, mm6);                                            \
312             pxor_r2r (mm3, mm6);                                            \
313             pavg_r2r (mm1, mm0);                                            \
314             pavg_r2r (mm3, mm2);                                            \
315             por_r2r (mm6, mm7);                                             \
316             movq_r2r (mm0, mm6);                                            \
317             pxor_r2r (mm2, mm6);                                            \
318             pand_r2r (mm6, mm7);                                            \
319             pand_m2r (mask_one, mm7);                                       \
320             pavg_r2r (mm2, mm0);                                            \
321             psubusb_r2r (mm7, mm0);                                         \
322             movq_m2r (*p_dest, mm1);                                        \
323             pavg_r2r (mm1, mm0);                                            \
324             movq_r2m (mm0, *p_dest);                                        \
325                                                                             \
326             movq_m2r (*(p_src+8), mm0);                                     \
327             movq_m2r (*(p_src+i_stride+9), mm1);                            \
328             movq_r2r (mm0, mm7);                                            \
329             movq_m2r (*(p_src+9), mm2);                                     \
330             pxor_r2r (mm1, mm7);                                            \
331             movq_m2r (*(p_src+i_stride+8), mm3);                            \
332             movq_r2r (mm2, mm6);                                            \
333             pxor_r2r (mm3, mm6);                                            \
334             pavg_r2r (mm1, mm0);                                            \
335             pavg_r2r (mm3, mm2);                                            \
336             por_r2r (mm6, mm7);                                             \
337             movq_r2r (mm0, mm6);                                            \
338             pxor_r2r (mm2, mm6);                                            \
339             pand_r2r (mm6, mm7);                                            \
340             pand_m2r (mask_one, mm7);                                       \
341             pavg_r2r (mm2, mm0);                                            \
342             psubusb_r2r (mm7, mm0);                                         \
343             movq_m2r (*(p_dest+8), mm1);                                    \
344             pavg_r2r (mm1, mm0);                                            \
345             p_src += i_stride;                                              \
346             movq_r2m (mm0, *(p_dest+8));                                    \
347             p_dest += i_stride;                                             \
348         }                                                                   \
349     }                                                                       \
350     else                                                                    \
351     {                                                                       \
352         for( i_y = 0; i_y < height; i_y ++ )                                \
353         {                                                                   \
354             movq_m2r (*p_src, mm0);                                         \
355             movq_m2r (*(p_src+i_stride+1), mm1);                            \
356             movq_r2r (mm0, mm7);                                            \
357             movq_m2r (*(p_src+1), mm2);                                     \
358             pxor_r2r (mm1, mm7);                                            \
359             movq_m2r (*(p_src+i_stride), mm3);                              \
360             movq_r2r (mm2, mm6);                                            \
361             pxor_r2r (mm3, mm6);                                            \
362             pavg_r2r (mm1, mm0);                                            \
363             pavg_r2r (mm3, mm2);                                            \
364             por_r2r (mm6, mm7);                                             \
365             movq_r2r (mm0, mm6);                                            \
366             pxor_r2r (mm2, mm6);                                            \
367             pand_r2r (mm6, mm7);                                            \
368             pand_m2r (mask_one, mm7);                                       \
369             pavg_r2r (mm2, mm0);                                            \
370             psubusb_r2r (mm7, mm0);                                         \
371             movq_m2r (*p_dest, mm1);                                        \
372             pavg_r2r (mm1, mm0);                                            \
373             p_src += i_stride;                                              \
374             movq_r2m (mm0, *p_dest);                                        \
375             p_dest += i_stride;                                             \
376         }                                                                   \
377     }                                                                       \
378 }
379
380 #define __MotionComponents(width,height)                                    \
381 __MotionComponent_x_y_copy(width,height)                                    \
382 __MotionComponent_X_y_copy(width,height)                                    \
383 __MotionComponent_x_Y_copy(width,height)                                    \
384 __MotionComponent_X_Y_copy(width,height)                                    \
385 __MotionComponent_x_y_avg(width,height)                                     \
386 __MotionComponent_X_y_avg(width,height)                                     \
387 __MotionComponent_x_Y_avg(width,height)                                     \
388 __MotionComponent_X_Y_avg(width,height)
389
390 __MotionComponents (16,16)      /* 444, 422, 420 */
391 __MotionComponents (16,8)       /* 444, 422, 420 */
392 __MotionComponents (8,8)        /* 422, 420 */
393 __MotionComponents (8,4)        /* 420 */
394 #if 0
395 __MotionComponents (8,16)       /* 422 */
396 #endif