]> git.sesse.net Git - vlc/blob - modules/video_chroma/i422_yuy2.c
let gcc choose how to reference memory addresses in i420_rgx mmx asm
[vlc] / modules / video_chroma / i422_yuy2.c
1 /*****************************************************************************
2  * i422_yuy2.c : Planar YUV 4:2:2 to Packed YUV conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *          Damien Fouilleul <damienf@videolan.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
32
33 #include <vlc/vlc.h>
34 #include <vlc_vout.h>
35
36 #include "i422_yuy2.h"
37
38 #define SRC_FOURCC  "I422"
39 #if defined (MODULE_NAME_IS_i422_yuy2)
40 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
41 #else
42 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
43 #endif
44
45 /*****************************************************************************
46  * Local and extern prototypes.
47  *****************************************************************************/
48 static int  Activate ( vlc_object_t * );
49
50 static void I422_YUY2           ( vout_thread_t *, picture_t *, picture_t * );
51 static void I422_YVYU           ( vout_thread_t *, picture_t *, picture_t * );
52 static void I422_UYVY           ( vout_thread_t *, picture_t *, picture_t * );
53 static void I422_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
54 static void I422_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
55 #if defined (MODULE_NAME_IS_i422_yuy2)
56 static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
57 static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
58 #endif
59
60 /*****************************************************************************
61  * Module descriptor
62  *****************************************************************************/
63 vlc_module_begin();
64 #if defined (MODULE_NAME_IS_i422_yuy2)
65     set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
66     set_capability( "chroma", 80 );
67 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
68     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
69     set_capability( "chroma", 100 );
70     add_requirement( MMX );
71 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
72     set_description( _("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) );
73     set_capability( "chroma", 120 );
74     add_requirement( SSE2 );
75 #endif
76     set_callbacks( Activate, NULL );
77 vlc_module_end();
78
79 /*****************************************************************************
80  * Activate: allocate a chroma function
81  *****************************************************************************
82  * This function allocates and initializes a chroma function
83  *****************************************************************************/
84 static int Activate( vlc_object_t *p_this )
85 {
86     vout_thread_t *p_vout = (vout_thread_t *)p_this;
87
88     if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 )
89     {
90         return -1;
91     }
92
93     switch( p_vout->render.i_chroma )
94     {
95         case VLC_FOURCC('I','4','2','2'):
96             switch( p_vout->output.i_chroma )
97             {
98                 case VLC_FOURCC('Y','U','Y','2'):
99                 case VLC_FOURCC('Y','U','N','V'):
100                     p_vout->chroma.pf_convert = I422_YUY2;
101                     break;
102
103                 case VLC_FOURCC('Y','V','Y','U'):
104                     p_vout->chroma.pf_convert = I422_YVYU;
105                     break;
106
107                 case VLC_FOURCC('U','Y','V','Y'):
108                 case VLC_FOURCC('U','Y','N','V'):
109                 case VLC_FOURCC('Y','4','2','2'):
110                     p_vout->chroma.pf_convert = I422_UYVY;
111                     break;
112
113                 case VLC_FOURCC('I','U','Y','V'):
114                     p_vout->chroma.pf_convert = I422_IUYV;
115                     break;
116
117                 case VLC_FOURCC('c','y','u','v'):
118                     p_vout->chroma.pf_convert = I422_cyuv;
119                     break;
120
121 #if defined (MODULE_NAME_IS_i422_yuy2)
122                 case VLC_FOURCC('Y','2','1','1'):
123                     p_vout->chroma.pf_convert = I422_Y211;
124                     break;
125 #endif
126
127                 default:
128                     return -1;
129             }
130             break;
131
132         default:
133             return -1;
134     }
135     return 0;
136 }
137
138 /* Following functions are local */
139
140 /*****************************************************************************
141  * I422_YUY2: planar YUV 4:2:2 to packed YUY2 4:2:2
142  *****************************************************************************/
143 static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
144                                               picture_t *p_dest )
145 {
146     uint8_t *p_line = p_dest->p->p_pixels;
147     uint8_t *p_y = p_source->Y_PIXELS;
148     uint8_t *p_u = p_source->U_PIXELS;
149     uint8_t *p_v = p_source->V_PIXELS;
150
151     int i_x, i_y;
152
153     const int i_source_margin = p_source->p[0].i_pitch
154                                  - p_source->p[0].i_visible_pitch;
155     const int i_source_margin_c = p_source->p[1].i_pitch
156                                  - p_source->p[1].i_visible_pitch;
157     const int i_dest_margin = p_dest->p->i_pitch
158                                - p_dest->p->i_visible_pitch;
159
160 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
161
162     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
163         ((intptr_t)p_line|(intptr_t)p_y))) )
164     {
165         /* use faster SSE2 aligned fetch and store */
166         for( i_y = p_vout->render.i_height ; i_y-- ; )
167         {
168             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
169             {
170                 SSE2_CALL( SSE2_YUV422_YUYV_ALIGNED );
171             }
172             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
173             {
174                 C_YUV422_YUYV( p_line, p_y, p_u, p_v );
175             }
176             p_y += i_source_margin;
177             p_u += i_source_margin_c;
178             p_v += i_source_margin_c;
179             p_line += i_dest_margin;
180         }
181     }
182     else {
183         /* use slower SSE2 unaligned fetch and store */
184         for( i_y = p_vout->render.i_height ; i_y-- ; )
185         {
186             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
187             {
188                 SSE2_CALL( SSE2_YUV422_YUYV_UNALIGNED );
189             }
190             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
191             {
192                 C_YUV422_YUYV( p_line, p_y, p_u, p_v );
193             }
194             p_y += i_source_margin;
195             p_u += i_source_margin_c;
196             p_v += i_source_margin_c;
197             p_line += i_dest_margin;
198         }
199     }
200     SSE2_END;
201
202 #else
203
204     for( i_y = p_vout->render.i_height ; i_y-- ; )
205     {
206         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
207         {
208 #if defined (MODULE_NAME_IS_i422_yuy2)
209             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
210             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
211             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
212             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
213 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
214             MMX_CALL( MMX_YUV422_YUYV );
215 #endif
216         }
217         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
218         {
219             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
220         }
221         p_y += i_source_margin;
222         p_u += i_source_margin_c;
223         p_v += i_source_margin_c;
224         p_line += i_dest_margin;
225     }
226 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
227     MMX_END;
228 #endif
229
230 #endif
231 }
232
233 /*****************************************************************************
234  * I422_YVYU: planar YUV 4:2:2 to packed YVYU 4:2:2
235  *****************************************************************************/
236 static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
237                                               picture_t *p_dest )
238 {
239     uint8_t *p_line = p_dest->p->p_pixels;
240     uint8_t *p_y = p_source->Y_PIXELS;
241     uint8_t *p_u = p_source->U_PIXELS;
242     uint8_t *p_v = p_source->V_PIXELS;
243
244     int i_x, i_y;
245
246     const int i_source_margin = p_source->p[0].i_pitch
247                                  - p_source->p[0].i_visible_pitch;
248     const int i_source_margin_c = p_source->p[1].i_pitch
249                                  - p_source->p[1].i_visible_pitch;
250     const int i_dest_margin = p_dest->p->i_pitch
251                                - p_dest->p->i_visible_pitch;
252
253 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
254
255     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
256         ((intptr_t)p_line|(intptr_t)p_y))) )
257     {
258         /* use faster SSE2 aligned fetch and store */
259         for( i_y = p_vout->render.i_height ; i_y-- ; )
260         {
261             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
262             {
263                 SSE2_CALL( SSE2_YUV422_YVYU_ALIGNED );
264             }
265             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
266             {
267                 C_YUV422_YVYU( p_line, p_y, p_u, p_v );
268             }
269             p_y += i_source_margin;
270             p_u += i_source_margin_c;
271             p_v += i_source_margin_c;
272             p_line += i_dest_margin;
273         }
274     }
275     else {
276         /* use slower SSE2 unaligned fetch and store */
277         for( i_y = p_vout->render.i_height ; i_y-- ; )
278         {
279             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
280             {
281                 SSE2_CALL( SSE2_YUV422_YVYU_UNALIGNED );
282             }
283             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
284             {
285                 C_YUV422_YVYU( p_line, p_y, p_u, p_v );
286             }
287             p_y += i_source_margin;
288             p_u += i_source_margin_c;
289             p_v += i_source_margin_c;
290             p_line += i_dest_margin;
291         }
292     }
293     SSE2_END;
294
295 #else
296
297     for( i_y = p_vout->render.i_height ; i_y-- ; )
298     {
299         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
300         {
301 #if defined (MODULE_NAME_IS_i422_yuy2)
302             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
303             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
304             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
305             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
306 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
307             MMX_CALL( MMX_YUV422_YVYU );
308 #endif
309         }
310         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
311         {
312             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
313         }
314         p_y += i_source_margin;
315         p_u += i_source_margin_c;
316         p_v += i_source_margin_c;
317         p_line += i_dest_margin;
318     }
319 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
320     MMX_END;
321 #endif
322
323 #endif
324 }
325
326 /*****************************************************************************
327  * I422_UYVY: planar YUV 4:2:2 to packed UYVY 4:2:2
328  *****************************************************************************/
329 static void I422_UYVY( vout_thread_t *p_vout, picture_t *p_source,
330                                               picture_t *p_dest )
331 {
332     uint8_t *p_line = p_dest->p->p_pixels;
333     uint8_t *p_y = p_source->Y_PIXELS;
334     uint8_t *p_u = p_source->U_PIXELS;
335     uint8_t *p_v = p_source->V_PIXELS;
336
337     int i_x, i_y;
338
339     const int i_source_margin = p_source->p[0].i_pitch
340                                  - p_source->p[0].i_visible_pitch;
341     const int i_source_margin_c = p_source->p[1].i_pitch
342                                  - p_source->p[1].i_visible_pitch;
343     const int i_dest_margin = p_dest->p->i_pitch
344                                - p_dest->p->i_visible_pitch;
345
346 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
347
348     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
349         ((intptr_t)p_line|(intptr_t)p_y))) )
350     {
351         /* use faster SSE2 aligned fetch and store */
352         for( i_y = p_vout->render.i_height ; i_y-- ; )
353         {
354             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
355             {
356                 SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
357             }
358             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
359             {
360                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
361             }
362             p_y += i_source_margin;
363             p_u += i_source_margin_c;
364             p_v += i_source_margin_c;
365             p_line += i_dest_margin;
366         }
367     }
368     else {
369         /* use slower SSE2 unaligned fetch and store */
370         for( i_y = p_vout->render.i_height ; i_y-- ; )
371         {
372             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
373             {
374                 SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
375             }
376             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
377             {
378                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
379             }
380             p_y += i_source_margin;
381             p_u += i_source_margin_c;
382             p_v += i_source_margin_c;
383             p_line += i_dest_margin;
384         }
385     }
386     SSE2_END;
387
388 #else
389
390     for( i_y = p_vout->render.i_height ; i_y-- ; )
391     {
392         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
393         {
394 #if defined (MODULE_NAME_IS_i422_yuy2)
395             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
396             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
397             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
398             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
399 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
400             MMX_CALL( MMX_YUV422_UYVY );
401 #endif
402         }
403         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
404         {
405             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
406         }
407         p_y += i_source_margin;
408         p_u += i_source_margin_c;
409         p_v += i_source_margin_c;
410         p_line += i_dest_margin;
411     }
412 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
413     MMX_END;
414 #endif
415
416 #endif
417 }
418
419 /*****************************************************************************
420  * I422_IUYV: planar YUV 4:2:2 to interleaved packed IUYV 4:2:2
421  *****************************************************************************/
422 static void I422_IUYV( vout_thread_t *p_vout, picture_t *p_source,
423                                               picture_t *p_dest )
424 {
425     VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
426     /* FIXME: TODO ! */
427     msg_Err( p_vout, "I422_IUYV unimplemented, please harass <sam@zoy.org>" );
428 }
429
430 /*****************************************************************************
431  * I422_cyuv: planar YUV 4:2:2 to upside-down packed UYVY 4:2:2
432  *****************************************************************************/
433 static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
434                                               picture_t *p_dest )
435 {
436     uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
437     uint8_t *p_y = p_source->Y_PIXELS;
438     uint8_t *p_u = p_source->U_PIXELS;
439     uint8_t *p_v = p_source->V_PIXELS;
440
441     int i_x, i_y;
442
443     const int i_source_margin = p_source->p[0].i_pitch
444                                  - p_source->p[0].i_visible_pitch;
445     const int i_source_margin_c = p_source->p[1].i_pitch
446                                  - p_source->p[1].i_visible_pitch;
447     const int i_dest_margin = p_dest->p->i_pitch
448                                - p_dest->p->i_visible_pitch;
449
450 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
451
452     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
453         ((intptr_t)p_line|(intptr_t)p_y))) )
454     {
455         /* use faster SSE2 aligned fetch and store */
456         for( i_y = p_vout->render.i_height ; i_y-- ; )
457         {
458             p_line -= 2 * p_dest->p->i_pitch;
459
460             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
461             {
462                 SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
463             }
464             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
465             {
466                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
467             }
468             p_y += i_source_margin;
469             p_u += i_source_margin_c;
470             p_v += i_source_margin_c;
471             p_line += i_dest_margin;
472         }
473     }
474     else {
475         /* use slower SSE2 unaligned fetch and store */
476         for( i_y = p_vout->render.i_height ; i_y-- ; )
477         {
478             p_line -= 2 * p_dest->p->i_pitch;
479
480             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
481             {
482                 SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
483             }
484             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
485             {
486                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
487             }
488             p_y += i_source_margin;
489             p_u += i_source_margin_c;
490             p_v += i_source_margin_c;
491             p_line += i_dest_margin;
492         }
493     }
494     SSE2_END;
495
496 #else
497
498     for( i_y = p_vout->render.i_height ; i_y-- ; )
499     {
500         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
501         {
502             p_line -= 2 * p_dest->p->i_pitch;
503
504 #if defined (MODULE_NAME_IS_i422_yuy2)
505             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
506             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
507             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
508             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
509 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
510             MMX_CALL( MMX_YUV422_UYVY );
511 #endif
512         }
513         p_y += i_source_margin;
514         p_u += i_source_margin_c;
515         p_v += i_source_margin_c;
516         p_line += i_dest_margin;
517     }
518 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
519     MMX_END;
520 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
521     SSE2_END;
522 #endif
523
524 #endif
525 }
526
527 /*****************************************************************************
528  * I422_Y211: planar YUV 4:2:2 to packed YUYV 2:1:1
529  *****************************************************************************/
530 #if defined (MODULE_NAME_IS_i422_yuy2)
531 static void I422_Y211( vout_thread_t *p_vout, picture_t *p_source,
532                                               picture_t *p_dest )
533 {
534     uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
535     uint8_t *p_y = p_source->Y_PIXELS;
536     uint8_t *p_u = p_source->U_PIXELS;
537     uint8_t *p_v = p_source->V_PIXELS;
538
539     int i_x, i_y;
540
541     for( i_y = p_vout->render.i_height ; i_y-- ; )
542     {
543         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
544         {
545             C_YUV422_Y211( p_line, p_y, p_u, p_v );
546             C_YUV422_Y211( p_line, p_y, p_u, p_v );
547         }
548     }
549 }
550 #endif