]> git.sesse.net Git - vlc/blob - plugins/motion/vdec_motion_inner_mmxext.c
f93720db71f53ffcf4339a236e428a7e1ce9e6d2
[vlc] / plugins / motion / vdec_motion_inner_mmxext.c
1 /*****************************************************************************
2  * vdec_motion_inner_mmxext.c : motion compensation inner routines optimized
3  *                              in MMX EXT
4  *****************************************************************************
5  * Copyright (C) 1999, 2000 VideoLAN
6  * $Id: vdec_motion_inner_mmxext.c,v 1.2 2001/06/07 15:27:44 sam Exp $
7  *
8  * Authors: Christophe Massiot <massiot@via.ecp.fr>, largerly inspired by the
9  *          work done by the livid project <http://www.linuxvideo.org/>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
24  *****************************************************************************/
25
26 #include "modules_inner.h"
27
28 /*****************************************************************************
29  * Preamble
30  *****************************************************************************/
31 #include "defs.h"
32
33 #include "config.h"
34 #include "common.h"
35 #include "threads.h"
36 #include "mtime.h"
37
38 #include "video.h"
39
40 #include "attributes.h"
41 #include "mmx.h"
42
43 /* OK, I know, this code has been taken from livid's mpeg2dec --Meuuh */
44
45 static mmx_t mask_one = {0x0101010101010101LL};
46
47 /*
48  * Useful functions
49  */
50
51 #define pavg_r2r(src,dest)      pavgb_r2r (src, dest);
52 #define pavg_m2r(src,dest)      pavgb_m2r (src, dest);
53
54 #define __MotionComponent_x_y_copy(width,height)                            \
55 void _M(MotionComponent_x_y_copy_##width##_##height)(yuv_data_t * p_src,    \
56                                                  yuv_data_t * p_dest,       \
57                                                  int i_stride)              \
58 {                                                                           \
59     int i_y;                                                                \
60                                                                             \
61         pxor_r2r (mm0, mm0);                                                \
62         pxor_r2r (mm1, mm1);                                                \
63         pxor_r2r (mm2, mm2);                                                \
64         pxor_r2r (mm3, mm3);                                                \
65         pxor_r2r (mm4, mm4);                                                \
66         pxor_r2r (mm5, mm5);                                                \
67         pxor_r2r (mm6, mm6);                                                \
68         pxor_r2r (mm7, mm7);                                                \
69                                                                             \
70     for( i_y = 0; i_y < height; i_y ++ )                                    \
71     {                                                                       \
72         movq_m2r( *p_src, mm0 );     /* load 8 ref bytes */                 \
73         if( width == 16 )                                                   \
74             movq_m2r( *(p_src + 8), mm1 );                                  \
75         p_src += i_stride;                                                  \
76                                                                             \
77         movq_r2m( mm0, *p_dest );    /* store 8 bytes at curr */            \
78         if( width == 16 )                                                   \
79             movq_r2m( mm1, *(p_dest + 8) );                                 \
80         p_dest += i_stride;                                                 \
81     }                                                                       \
82 }
83
84 #define __MotionComponent_X_y_copy(width,height)                            \
85 void _M(MotionComponent_X_y_copy_##width##_##height)(yuv_data_t * p_src,    \
86                                                  yuv_data_t * p_dest,       \
87                                                  int i_stride)              \
88 {                                                                           \
89     int i_y;                                                                \
90                                                                             \
91     for( i_y = 0; i_y < height; i_y ++ )                                    \
92     {                                                                       \
93         movq_m2r (*p_src, mm0);                                             \
94         if( width == 16 )                                                   \
95             movq_m2r (*(p_src + 8), mm1);                                   \
96         pavg_m2r (*(p_src + 1), mm0);                                       \
97         if( width == 16 )                                                   \
98             pavg_m2r (*(p_src + 9), mm1);                                   \
99         movq_r2m (mm0, *p_dest);                                            \
100         p_src += i_stride;                                                  \
101         if( width == 16 )                                                   \
102             movq_r2m (mm1, *(p_dest + 8));                                  \
103         p_dest += i_stride;                                                 \
104     }                                                                       \
105 }
106
107 #define __MotionComponent_x_Y_copy(width,height)                            \
108 void _M(MotionComponent_x_Y_copy_##width##_##height)(yuv_data_t * p_src,    \
109                                                  yuv_data_t * p_dest,       \
110                                                  int i_stride)              \
111 {                                                                           \
112     int i_y;                                                                \
113     yuv_data_t * p_next_src = p_src + i_stride;                             \
114                                                                             \
115     for( i_y = 0; i_y < height; i_y ++ )                                    \
116     {                                                                       \
117         movq_m2r (*p_src, mm0);                                             \
118         if( width == 16 )                                                   \
119             movq_m2r (*(p_src + 8), mm1);                                   \
120         pavg_m2r (*(p_next_src), mm0);                                      \
121         if( width == 16 )                                                   \
122             pavg_m2r (*(p_next_src + 8), mm1);                              \
123         movq_r2m (mm0, *p_dest);                                            \
124         p_src += i_stride;                                                  \
125         p_next_src += i_stride;                                             \
126         if( width == 16 )                                                   \
127             movq_r2m (mm1, *(p_dest + 8));                                  \
128         p_dest += i_stride;                                                 \
129     }                                                                       \
130 }
131
132 #define __MotionComponent_X_Y_copy(width,height)                            \
133 void _M(MotionComponent_X_Y_copy_##width##_##height)(yuv_data_t * p_src,    \
134                                                  yuv_data_t * p_dest,       \
135                                                  int i_stride)              \
136 {                                                                           \
137     int i_y;                                                                \
138                                                                             \
139     if( width == 16 )                                                       \
140     {                                                                       \
141         for( i_y = 0; i_y < height; i_y ++ )                                \
142         {                                                                   \
143             movq_m2r (*p_src, mm0);                                         \
144             movq_m2r (*(p_src+i_stride+1), mm1);                            \
145             movq_r2r (mm0, mm7);                                            \
146             movq_m2r (*(p_src+1), mm2);                                     \
147             pxor_r2r (mm1, mm7);                                            \
148             movq_m2r (*(p_src + i_stride), mm3);                            \
149             movq_r2r (mm2, mm6);                                            \
150             pxor_r2r (mm3, mm6);                                            \
151             pavg_r2r (mm1, mm0);                                            \
152             pavg_r2r (mm3, mm2);                                            \
153             por_r2r (mm6, mm7);                                             \
154             movq_r2r (mm0, mm6);                                            \
155             pxor_r2r (mm2, mm6);                                            \
156             pand_r2r (mm6, mm7);                                            \
157             pand_m2r (mask_one, mm7);                                       \
158             pavg_r2r (mm2, mm0);                                            \
159             psubusb_r2r (mm7, mm0);                                         \
160             movq_r2m (mm0, *p_dest);                                        \
161                                                                             \
162             movq_m2r (*(p_src+8), mm0);                                     \
163             movq_m2r (*(p_src+i_stride+9), mm1);                            \
164             movq_r2r (mm0, mm7);                                            \
165             movq_m2r (*(p_src+9), mm2);                                     \
166             pxor_r2r (mm1, mm7);                                            \
167             movq_m2r (*(p_src+i_stride+8), mm3);                            \
168             movq_r2r (mm2, mm6);                                            \
169             pxor_r2r (mm3, mm6);                                            \
170             pavg_r2r (mm1, mm0);                                            \
171             pavg_r2r (mm3, mm2);                                            \
172             por_r2r (mm6, mm7);                                             \
173             movq_r2r (mm0, mm6);                                            \
174             pxor_r2r (mm2, mm6);                                            \
175             pand_r2r (mm6, mm7);                                            \
176             pand_m2r (mask_one, mm7);                                       \
177             pavg_r2r (mm2, mm0);                                            \
178             psubusb_r2r (mm7, mm0);                                         \
179             p_src += i_stride;                                              \
180             movq_r2m (mm0, *(p_dest+8));                                    \
181             p_dest += i_stride;                                             \
182         }                                                                   \
183     }                                                                       \
184     else                                                                    \
185     {                                                                       \
186         movq_m2r (*p_src, mm0);                                             \
187         movq_m2r (*(p_src+1), mm1);                                         \
188         movq_r2r (mm0, mm7);                                                \
189         pxor_r2r (mm1, mm7);                                                \
190         pavg_r2r (mm1, mm0);                                                \
191         p_src += i_stride;                                                  \
192                                                                             \
193         for( i_y = 0; i_y < height; i_y ++ )                                \
194         {                                                                   \
195             movq_m2r (*p_src, mm2);                                         \
196             movq_r2r (mm0, mm5);                                            \
197             movq_m2r (*(p_src+1), mm3);                                     \
198             movq_r2r (mm2, mm6);                                            \
199             pxor_r2r (mm3, mm6);                                            \
200             pavg_r2r (mm3, mm2);                                            \
201             por_r2r (mm6, mm7);                                             \
202             pxor_r2r (mm2, mm5);                                            \
203             pand_r2r (mm5, mm7);                                            \
204             pavg_r2r (mm2, mm0);                                            \
205             pand_m2r (mask_one, mm7);                                       \
206             psubusb_r2r (mm7, mm0);                                         \
207             p_src += i_stride;                                              \
208             movq_r2m (mm0, *p_dest);                                        \
209             p_dest += i_stride;                                             \
210             movq_r2r (mm6, mm7);                                            \
211             movq_r2r (mm2, mm0);                                            \
212         }                                                                   \
213     }                                                                       \
214 }
215
216 #define __MotionComponent_x_y_avg(width,height)                             \
217 void _M(MotionComponent_x_y_avg_##width##_##height)(yuv_data_t * p_src,     \
218                                                 yuv_data_t * p_dest,        \
219                                                 int i_stride)               \
220 {                                                                           \
221     int i_y;                                                                \
222                                                                             \
223     for( i_y = 0; i_y < height; i_y ++ )                                    \
224     {                                                                       \
225         movq_m2r( *p_src, mm0 );                                            \
226         if( width == 16 )                                                   \
227             movq_m2r( *(p_src + 8), mm1 );                                  \
228         pavg_m2r( *p_dest, mm0 );                                           \
229         if( width == 16 )                                                   \
230             pavg_m2r( *(p_dest + 8), mm1 );                                 \
231         movq_r2m( mm0, *p_dest );                                           \
232         p_src += i_stride;                                                  \
233         if( width == 16 )                                                   \
234             movq_r2m( mm1, *(p_dest + 8) );                                 \
235         p_dest += i_stride;                                                 \
236     }                                                                       \
237 }
238
239 #define __MotionComponent_X_y_avg(width,height)                             \
240 void _M(MotionComponent_X_y_avg_##width##_##height)(yuv_data_t * p_src,     \
241                                                 yuv_data_t * p_dest,        \
242                                                 int i_stride)               \
243 {                                                                           \
244     int i_y;                                                                \
245                                                                             \
246     for( i_y = 0; i_y < height; i_y ++ )                                    \
247     {                                                                       \
248         movq_m2r (*p_src, mm0);                                             \
249         if( width == 16 )                                                   \
250             movq_m2r (*(p_src + 8), mm1);                                   \
251         pavg_m2r (*(p_src + 1), mm0);                                       \
252         if( width == 16 )                                                   \
253             pavg_m2r (*(p_src + 9), mm1);                                   \
254         pavg_m2r (*p_dest, mm0);                                            \
255         if( width == 16 )                                                   \
256             pavg_m2r (*(p_dest + 8), mm1);                                  \
257         p_src += i_stride;                                                  \
258         movq_r2m (mm0, *p_dest);                                            \
259         if( width == 16 )                                                   \
260             movq_r2m (mm1, *(p_dest + 8));                                  \
261         p_dest += i_stride;                                                 \
262     }                                                                       \
263 }
264
265 #define __MotionComponent_x_Y_avg(width,height)                             \
266 void _M(MotionComponent_x_Y_avg_##width##_##height)(yuv_data_t * p_src,     \
267                                                 yuv_data_t * p_dest,        \
268                                                 int i_stride)               \
269 {                                                                           \
270     int i_y;                                                                \
271     yuv_data_t * p_next_src = p_src + i_stride;                             \
272                                                                             \
273     for( i_y = 0; i_y < height; i_y ++ )                                    \
274     {                                                                       \
275         movq_m2r (*p_src, mm0);                                             \
276         if( width == 16 )                                                   \
277             movq_m2r (*(p_src + 8), mm1);                                   \
278         pavg_m2r (*(p_next_src), mm0);                                      \
279         if( width == 16 )                                                   \
280             pavg_m2r (*(p_next_src + 8), mm1);                              \
281         pavg_m2r (*p_dest, mm0);                                            \
282         if( width == 16 )                                                   \
283             pavg_m2r (*(p_dest + 8), mm1);                                  \
284         p_src += i_stride;                                                  \
285         p_next_src += i_stride;                                             \
286         movq_r2m (mm0, *p_dest);                                            \
287         if( width == 16 )                                                   \
288             movq_r2m (mm1, *(p_dest + 8));                                  \
289         p_dest += i_stride;                                                 \
290     }                                                                       \
291 }
292
293 #define __MotionComponent_X_Y_avg(width,height)                             \
294 void _M(MotionComponent_X_Y_avg_##width##_##height)(yuv_data_t * p_src,     \
295                                                 yuv_data_t * p_dest,        \
296                                                 int i_stride)               \
297 {                                                                           \
298     int i_y;                                                                \
299                                                                             \
300     if( width == 16 )                                                       \
301     {                                                                       \
302         for( i_y = 0; i_y < height; i_y ++ )                                \
303         {                                                                   \
304             movq_m2r (*p_src, mm0);                                         \
305             movq_m2r (*(p_src+i_stride+1), mm1);                            \
306             movq_r2r (mm0, mm7);                                            \
307             movq_m2r (*(p_src+1), mm2);                                     \
308             pxor_r2r (mm1, mm7);                                            \
309             movq_m2r (*(p_src+i_stride), mm3);                              \
310             movq_r2r (mm2, mm6);                                            \
311             pxor_r2r (mm3, mm6);                                            \
312             pavg_r2r (mm1, mm0);                                            \
313             pavg_r2r (mm3, mm2);                                            \
314             por_r2r (mm6, mm7);                                             \
315             movq_r2r (mm0, mm6);                                            \
316             pxor_r2r (mm2, mm6);                                            \
317             pand_r2r (mm6, mm7);                                            \
318             pand_m2r (mask_one, mm7);                                       \
319             pavg_r2r (mm2, mm0);                                            \
320             psubusb_r2r (mm7, mm0);                                         \
321             movq_m2r (*p_dest, mm1);                                        \
322             pavg_r2r (mm1, mm0);                                            \
323             movq_r2m (mm0, *p_dest);                                        \
324                                                                             \
325             movq_m2r (*(p_src+8), mm0);                                     \
326             movq_m2r (*(p_src+i_stride+9), mm1);                            \
327             movq_r2r (mm0, mm7);                                            \
328             movq_m2r (*(p_src+9), mm2);                                     \
329             pxor_r2r (mm1, mm7);                                            \
330             movq_m2r (*(p_src+i_stride+8), mm3);                            \
331             movq_r2r (mm2, mm6);                                            \
332             pxor_r2r (mm3, mm6);                                            \
333             pavg_r2r (mm1, mm0);                                            \
334             pavg_r2r (mm3, mm2);                                            \
335             por_r2r (mm6, mm7);                                             \
336             movq_r2r (mm0, mm6);                                            \
337             pxor_r2r (mm2, mm6);                                            \
338             pand_r2r (mm6, mm7);                                            \
339             pand_m2r (mask_one, mm7);                                       \
340             pavg_r2r (mm2, mm0);                                            \
341             psubusb_r2r (mm7, mm0);                                         \
342             movq_m2r (*(p_dest+8), mm1);                                    \
343             pavg_r2r (mm1, mm0);                                            \
344             p_src += i_stride;                                              \
345             movq_r2m (mm0, *(p_dest+8));                                    \
346             p_dest += i_stride;                                             \
347         }                                                                   \
348     }                                                                       \
349     else                                                                    \
350     {                                                                       \
351         for( i_y = 0; i_y < height; i_y ++ )                                \
352         {                                                                   \
353             movq_m2r (*p_src, mm0);                                         \
354             movq_m2r (*(p_src+i_stride+1), mm1);                            \
355             movq_r2r (mm0, mm7);                                            \
356             movq_m2r (*(p_src+1), mm2);                                     \
357             pxor_r2r (mm1, mm7);                                            \
358             movq_m2r (*(p_src+i_stride), mm3);                              \
359             movq_r2r (mm2, mm6);                                            \
360             pxor_r2r (mm3, mm6);                                            \
361             pavg_r2r (mm1, mm0);                                            \
362             pavg_r2r (mm3, mm2);                                            \
363             por_r2r (mm6, mm7);                                             \
364             movq_r2r (mm0, mm6);                                            \
365             pxor_r2r (mm2, mm6);                                            \
366             pand_r2r (mm6, mm7);                                            \
367             pand_m2r (mask_one, mm7);                                       \
368             pavg_r2r (mm2, mm0);                                            \
369             psubusb_r2r (mm7, mm0);                                         \
370             movq_m2r (*p_dest, mm1);                                        \
371             pavg_r2r (mm1, mm0);                                            \
372             p_src += i_stride;                                              \
373             movq_r2m (mm0, *p_dest);                                        \
374             p_dest += i_stride;                                             \
375         }                                                                   \
376     }                                                                       \
377 }
378
379 #define __MotionComponents(width,height)                                    \
380 __MotionComponent_x_y_copy(width,height)                                    \
381 __MotionComponent_X_y_copy(width,height)                                    \
382 __MotionComponent_x_Y_copy(width,height)                                    \
383 __MotionComponent_X_Y_copy(width,height)                                    \
384 __MotionComponent_x_y_avg(width,height)                                     \
385 __MotionComponent_X_y_avg(width,height)                                     \
386 __MotionComponent_x_Y_avg(width,height)                                     \
387 __MotionComponent_X_Y_avg(width,height)
388
389 __MotionComponents (16,16)      /* 444, 422, 420 */
390 __MotionComponents (16,8)       /* 444, 422, 420 */
391 __MotionComponents (8,8)        /* 422, 420 */
392 __MotionComponents (8,4)        /* 420 */
393 #if 0
394 __MotionComponents (8,16)       /* 422 */
395 #endif