]> git.sesse.net Git - vlc/blob - modules/video_chroma/i422_yuy2.c
video chromas: finalize SSE2 improvements
[vlc] / modules / video_chroma / i422_yuy2.c
1 /*****************************************************************************
2  * i422_yuy2.c : YUV to YUV conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *          Damien Fouilleul <damienf@videolan.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  * 
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include <string.h>                                            /* strerror() */
29 #include <stdlib.h>                                      /* malloc(), free() */
30
31 #include <vlc/vlc.h>
32 #include <vlc_vout.h>
33
34 #include "i422_yuy2.h"
35
36 #define SRC_FOURCC  "I422"
37 #if defined (MODULE_NAME_IS_i422_yuy2)
38 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
39 #else
40 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
41 #endif
42
43 /*****************************************************************************
44  * Local and extern prototypes.
45  *****************************************************************************/
46 static int  Activate ( vlc_object_t * );
47
48 static void I422_YUY2           ( vout_thread_t *, picture_t *, picture_t * );
49 static void I422_YVYU           ( vout_thread_t *, picture_t *, picture_t * );
50 static void I422_UYVY           ( vout_thread_t *, picture_t *, picture_t * );
51 static void I422_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
52 static void I422_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
53 #if defined (MODULE_NAME_IS_i422_yuy2)
54 static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
55 static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
56 static void I422_YV12           ( vout_thread_t *, picture_t *, picture_t * );
57 #endif
58
59 /*****************************************************************************
60  * Module descriptor
61  *****************************************************************************/
62 vlc_module_begin();
63 #if defined (MODULE_NAME_IS_i422_yuy2)
64     set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
65     set_capability( "chroma", 80 );
66 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
67     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
68     set_capability( "chroma", 100 );
69     add_requirement( MMX );
70 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
71     set_description( _("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) );
72     set_capability( "chroma", 120 );
73     add_requirement( MMX );
74 #endif
75     set_callbacks( Activate, NULL );
76 vlc_module_end();
77
78 /*****************************************************************************
79  * Activate: allocate a chroma function
80  *****************************************************************************
81  * This function allocates and initializes a chroma function
82  *****************************************************************************/
83 static int Activate( vlc_object_t *p_this )
84 {
85     vout_thread_t *p_vout = (vout_thread_t *)p_this;
86
87     if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 )
88     {
89         return -1;
90     }
91
92     switch( p_vout->render.i_chroma )
93     {
94         case VLC_FOURCC('I','4','2','2'):
95             switch( p_vout->output.i_chroma )
96             {
97                 case VLC_FOURCC('Y','U','Y','2'):
98                 case VLC_FOURCC('Y','U','N','V'):
99                     p_vout->chroma.pf_convert = I422_YUY2;
100                     break;
101
102                 case VLC_FOURCC('Y','V','Y','U'):
103                     p_vout->chroma.pf_convert = I422_YVYU;
104                     break;
105
106                 case VLC_FOURCC('U','Y','V','Y'):
107                 case VLC_FOURCC('U','Y','N','V'):
108                 case VLC_FOURCC('Y','4','2','2'):
109                     p_vout->chroma.pf_convert = I422_UYVY;
110                     break;
111
112                 case VLC_FOURCC('I','U','Y','V'):
113                     p_vout->chroma.pf_convert = I422_IUYV;
114                     break;
115
116                 case VLC_FOURCC('c','y','u','v'):
117                     p_vout->chroma.pf_convert = I422_cyuv;
118                     break;
119
120 #if defined (MODULE_NAME_IS_i422_yuy2)
121                 case VLC_FOURCC('Y','2','1','1'):
122                     p_vout->chroma.pf_convert = I422_Y211;
123                     break;
124
125                 case VLC_FOURCC('Y','V','1','2'):
126                     p_vout->chroma.pf_convert = I422_YV12;
127                     break;
128 #endif
129
130                 default:
131                     return -1;
132             }
133             break;
134
135         default:
136             return -1;
137     }
138     return 0;
139 }
140
141 /* Following functions are local */
142
143 /*****************************************************************************
144  * I422_YUY2: planar YUV 4:2:2 to packed YUY2 4:2:2
145  *****************************************************************************/
146 static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
147                                               picture_t *p_dest )
148 {
149     uint8_t *p_line = p_dest->p->p_pixels;
150     uint8_t *p_y = p_source->Y_PIXELS;
151     uint8_t *p_u = p_source->U_PIXELS;
152     uint8_t *p_v = p_source->V_PIXELS;
153
154     int i_x, i_y;
155
156     const int i_source_margin = p_source->p[0].i_pitch
157                                  - p_source->p[0].i_visible_pitch;
158     const int i_source_margin_c = p_source->p[1].i_pitch
159                                  - p_source->p[1].i_visible_pitch;
160     const int i_dest_margin = p_dest->p->i_pitch
161                                - p_dest->p->i_visible_pitch;
162
163 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
164
165     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
166         ((int)p_line|(int)p_y))) )
167     {
168         /* use faster SSE2 aligned fetch and store */
169         for( i_y = p_vout->render.i_height ; i_y-- ; )
170         {
171             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
172             {
173                 SSE2_CALL( SSE2_YUV422_YUYV_ALIGNED );
174             }
175             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
176             {
177                 C_YUV422_YUYV( p_line, p_y, p_u, p_v );
178             }
179             p_y += i_source_margin;
180             p_u += i_source_margin_c;
181             p_v += i_source_margin_c;
182             p_line += i_dest_margin;
183         }
184     }
185     else {
186         /* use slower SSE2 unaligned fetch and store */
187         for( i_y = p_vout->render.i_height ; i_y-- ; )
188         {
189             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
190             {
191                 SSE2_CALL( SSE2_YUV422_YUYV_UNALIGNED );
192             }
193             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
194             {
195                 C_YUV422_YUYV( p_line, p_y, p_u, p_v );
196             }
197             p_y += i_source_margin;
198             p_u += i_source_margin_c;
199             p_v += i_source_margin_c;
200             p_line += i_dest_margin;
201         }
202     }
203     SSE2_END;
204
205 #else
206
207     for( i_y = p_vout->render.i_height ; i_y-- ; )
208     {
209         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
210         {
211 #if defined (MODULE_NAME_IS_i422_yuy2)
212             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
213             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
214             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
215             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
216 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
217             MMX_CALL( MMX_YUV422_YUYV );
218 #endif
219         }
220         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
221         {
222             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
223         }
224         p_y += i_source_margin;
225         p_u += i_source_margin_c;
226         p_v += i_source_margin_c;
227         p_line += i_dest_margin;
228     }
229 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
230     MMX_END;
231 #endif
232
233 #endif
234 }
235
236 /*****************************************************************************
237  * I422_YVYU: planar YUV 4:2:2 to packed YVYU 4:2:2
238  *****************************************************************************/
239 static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
240                                               picture_t *p_dest )
241 {
242     uint8_t *p_line = p_dest->p->p_pixels;
243     uint8_t *p_y = p_source->Y_PIXELS;
244     uint8_t *p_u = p_source->U_PIXELS;
245     uint8_t *p_v = p_source->V_PIXELS;
246
247     int i_x, i_y;
248
249     const int i_source_margin = p_source->p[0].i_pitch
250                                  - p_source->p[0].i_visible_pitch;
251     const int i_source_margin_c = p_source->p[1].i_pitch
252                                  - p_source->p[1].i_visible_pitch;
253     const int i_dest_margin = p_dest->p->i_pitch
254                                - p_dest->p->i_visible_pitch;
255
256 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
257
258     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
259         ((int)p_line|(int)p_y))) )
260     {
261         /* use faster SSE2 aligned fetch and store */
262         for( i_y = p_vout->render.i_height ; i_y-- ; )
263         {
264             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
265             {
266                 SSE2_CALL( SSE2_YUV422_YVYU_ALIGNED );
267             }
268             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
269             {
270                 C_YUV422_YVYU( p_line, p_y, p_u, p_v );
271             }
272             p_y += i_source_margin;
273             p_u += i_source_margin_c;
274             p_v += i_source_margin_c;
275             p_line += i_dest_margin;
276         }
277     }
278     else {
279         /* use slower SSE2 unaligned fetch and store */
280         for( i_y = p_vout->render.i_height ; i_y-- ; )
281         {
282             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
283             {
284                 SSE2_CALL( SSE2_YUV422_YVYU_UNALIGNED );
285             }
286             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
287             {
288                 C_YUV422_YVYU( p_line, p_y, p_u, p_v );
289             }
290             p_y += i_source_margin;
291             p_u += i_source_margin_c;
292             p_v += i_source_margin_c;
293             p_line += i_dest_margin;
294         }
295     }
296     SSE2_END;
297
298 #else
299
300     for( i_y = p_vout->render.i_height ; i_y-- ; )
301     {
302         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
303         {
304 #if defined (MODULE_NAME_IS_i422_yuy2)
305             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
306             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
307             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
308             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
309 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
310             MMX_CALL( MMX_YUV422_YVYU );
311 #endif
312         }
313         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
314         {
315             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
316         }
317         p_y += i_source_margin;
318         p_u += i_source_margin_c;
319         p_v += i_source_margin_c;
320         p_line += i_dest_margin;
321     }
322 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
323     MMX_END;
324 #endif
325
326 #endif
327 }
328
329 /*****************************************************************************
330  * I422_UYVY: planar YUV 4:2:2 to packed UYVY 4:2:2
331  *****************************************************************************/
332 static void I422_UYVY( vout_thread_t *p_vout, picture_t *p_source,
333                                               picture_t *p_dest )
334 {
335     uint8_t *p_line = p_dest->p->p_pixels;
336     uint8_t *p_y = p_source->Y_PIXELS;
337     uint8_t *p_u = p_source->U_PIXELS;
338     uint8_t *p_v = p_source->V_PIXELS;
339
340     int i_x, i_y;
341
342     const int i_source_margin = p_source->p[0].i_pitch
343                                  - p_source->p[0].i_visible_pitch;
344     const int i_source_margin_c = p_source->p[1].i_pitch
345                                  - p_source->p[1].i_visible_pitch;
346     const int i_dest_margin = p_dest->p->i_pitch
347                                - p_dest->p->i_visible_pitch;
348
349 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
350
351     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
352         ((int)p_line|(int)p_y))) )
353     {
354         /* use faster SSE2 aligned fetch and store */
355         for( i_y = p_vout->render.i_height ; i_y-- ; )
356         {
357             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
358             {
359                 SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
360             }
361             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
362             {
363                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
364             }
365             p_y += i_source_margin;
366             p_u += i_source_margin_c;
367             p_v += i_source_margin_c;
368             p_line += i_dest_margin;
369         }
370     }
371     else {
372         /* use slower SSE2 unaligned fetch and store */
373         for( i_y = p_vout->render.i_height ; i_y-- ; )
374         {
375             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
376             {
377                 SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
378             }
379             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
380             {
381                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
382             }
383             p_y += i_source_margin;
384             p_u += i_source_margin_c;
385             p_v += i_source_margin_c;
386             p_line += i_dest_margin;
387         }
388     }
389     SSE2_END;
390
391 #else
392
393     for( i_y = p_vout->render.i_height ; i_y-- ; )
394     {
395         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
396         {
397 #if defined (MODULE_NAME_IS_i422_yuy2)
398             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
399             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
400             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
401             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
402 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
403             MMX_CALL( MMX_YUV422_UYVY );
404 #endif
405         }
406         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
407         {
408             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
409         }
410         p_y += i_source_margin;
411         p_u += i_source_margin_c;
412         p_v += i_source_margin_c;
413         p_line += i_dest_margin;
414     }
415 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
416     MMX_END;
417 #endif
418
419 #endif
420 }
421
422 /*****************************************************************************
423  * I422_IUYV: planar YUV 4:2:2 to interleaved packed IUYV 4:2:2
424  *****************************************************************************/
425 static void I422_IUYV( vout_thread_t *p_vout, picture_t *p_source,
426                                               picture_t *p_dest )
427 {
428     /* FIXME: TODO ! */
429     msg_Err( p_vout, "I422_IUYV unimplemented, please harass <sam@zoy.org>" );
430 }
431
432 /*****************************************************************************
433  * I422_cyuv: planar YUV 4:2:2 to upside-down packed UYVY 4:2:2
434  *****************************************************************************/
435 static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
436                                               picture_t *p_dest )
437 {
438     uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
439     uint8_t *p_y = p_source->Y_PIXELS;
440     uint8_t *p_u = p_source->U_PIXELS;
441     uint8_t *p_v = p_source->V_PIXELS;
442
443     int i_x, i_y;
444
445     const int i_source_margin = p_source->p[0].i_pitch
446                                  - p_source->p[0].i_visible_pitch;
447     const int i_source_margin_c = p_source->p[1].i_pitch
448                                  - p_source->p[1].i_visible_pitch;
449     const int i_dest_margin = p_dest->p->i_pitch
450                                - p_dest->p->i_visible_pitch;
451
452 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
453
454     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
455         ((int)p_line|(int)p_y))) )
456     {
457         /* use faster SSE2 aligned fetch and store */
458         for( i_y = p_vout->render.i_height ; i_y-- ; )
459         {
460             p_line -= 2 * p_dest->p->i_pitch;
461
462             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
463             {
464                 SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
465             }
466             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
467             {
468                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
469             }
470             p_y += i_source_margin;
471             p_u += i_source_margin_c;
472             p_v += i_source_margin_c;
473             p_line += i_dest_margin;
474         }
475     }
476     else {
477         /* use slower SSE2 unaligned fetch and store */
478         for( i_y = p_vout->render.i_height ; i_y-- ; )
479         {
480             p_line -= 2 * p_dest->p->i_pitch;
481
482             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
483             {
484                 SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
485             }
486             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
487             {
488                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
489             }
490             p_y += i_source_margin;
491             p_u += i_source_margin_c;
492             p_v += i_source_margin_c;
493             p_line += i_dest_margin;
494         }
495     }
496     SSE2_END;
497
498 #else
499
500     for( i_y = p_vout->render.i_height ; i_y-- ; )
501     {
502         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
503         {
504             p_line -= 2 * p_dest->p->i_pitch;
505
506 #if defined (MODULE_NAME_IS_i422_yuy2)
507             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
508             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
509             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
510             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
511 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
512             MMX_CALL( MMX_YUV422_UYVY );
513 #endif
514         }
515         p_y += i_source_margin;
516         p_u += i_source_margin_c;
517         p_v += i_source_margin_c;
518         p_line += i_dest_margin;
519     }
520 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
521     MMX_END;
522 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
523     SSE2_END;
524 #endif
525
526 #endif
527 }
528
529 /*****************************************************************************
530  * I422_Y211: planar YUV 4:2:2 to packed YUYV 2:1:1
531  *****************************************************************************/
532 #if defined (MODULE_NAME_IS_i422_yuy2)
533 static void I422_Y211( vout_thread_t *p_vout, picture_t *p_source,
534                                               picture_t *p_dest )
535 {
536     uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
537     uint8_t *p_y = p_source->Y_PIXELS;
538     uint8_t *p_u = p_source->U_PIXELS;
539     uint8_t *p_v = p_source->V_PIXELS;
540
541     int i_x, i_y;
542
543     for( i_y = p_vout->render.i_height ; i_y-- ; )
544     {
545         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
546         {
547             C_YUV422_Y211( p_line, p_y, p_u, p_v );
548             C_YUV422_Y211( p_line, p_y, p_u, p_v );
549         }
550     }
551 }
552 #endif
553
554
555 /*****************************************************************************
556  * I422_YV12: planar YUV 4:2:2 to planar YV12
557  *****************************************************************************/
558 #if defined (MODULE_NAME_IS_i422_yuy2)
559 static void I422_YV12( vout_thread_t *p_vout, picture_t *p_source,
560                                               picture_t *p_dest )
561 {
562     uint16_t i_dpy = p_dest->p[Y_PLANE].i_pitch;
563     uint16_t i_spy = p_source->p[Y_PLANE].i_pitch;
564     uint16_t i_dpuv = p_dest->p[U_PLANE].i_pitch;
565     uint16_t i_spuv = p_source->p[U_PLANE].i_pitch;
566     uint16_t i_width = p_vout->render.i_width;
567     uint16_t i_y = p_vout->render.i_height;
568     uint8_t *p_dy = p_dest->Y_PIXELS + (i_y-1)*i_dpy;
569     uint8_t *p_y = p_source->Y_PIXELS + (i_y-1)*i_spy;
570     uint8_t *p_du = p_dest->U_PIXELS + (i_y/2-1)*i_dpuv;
571     uint8_t *p_u = p_source->U_PIXELS + (i_y-1)*i_spuv;
572     uint8_t *p_dv = p_dest->V_PIXELS + (i_y/2-1)*i_dpuv;
573     uint8_t *p_v = p_source->V_PIXELS + (i_y-1)*i_spuv;
574     i_y /= 2;
575
576     for ( ; i_y--; )
577     {
578         memcpy(p_dy, p_y, i_width); p_dy -= i_dpy; p_y -= i_spy;
579         memcpy(p_dy, p_y, i_width); p_dy -= i_dpy; p_y -= i_spy;
580         memcpy(p_du, p_u, i_width/2); p_du -= i_dpuv; p_u -= 2*i_spuv;
581         memcpy(p_dv, p_v, i_width/2); p_dv -= i_dpuv; p_v -= 2*i_spuv;
582     }
583
584 }
585 #endif