]> git.sesse.net Git - vlc/blob - modules/video_chroma/i422_yuy2.c
i422_yuy2: SSE2 improvements
[vlc] / modules / video_chroma / i422_yuy2.c
1 /*****************************************************************************
2  * i422_yuy2.c : YUV to YUV conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *          Damien Fouilleul <damienf@videolan.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  * 
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include <string.h>                                            /* strerror() */
29 #include <stdlib.h>                                      /* malloc(), free() */
30
31 #include <vlc/vlc.h>
32 #include <vlc_vout.h>
33
34 #include "i422_yuy2.h"
35
36 #define SRC_FOURCC  "I422"
37 #if defined (MODULE_NAME_IS_i422_yuy2)
38 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
39 #else
40 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
41 #endif
42
43 /*****************************************************************************
44  * Local and extern prototypes.
45  *****************************************************************************/
46 static int  Activate ( vlc_object_t * );
47
48 static void I422_YUY2           ( vout_thread_t *, picture_t *, picture_t * );
49 static void I422_YVYU           ( vout_thread_t *, picture_t *, picture_t * );
50 static void I422_UYVY           ( vout_thread_t *, picture_t *, picture_t * );
51 static void I422_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
52 static void I422_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
53 #if defined (MODULE_NAME_IS_i422_yuy2)
54 static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
55 static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
56 static void I422_YV12           ( vout_thread_t *, picture_t *, picture_t * );
57 #endif
58
59 /*****************************************************************************
60  * Module descriptor
61  *****************************************************************************/
62 vlc_module_begin();
63 #if defined (MODULE_NAME_IS_i422_yuy2)
64     set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
65     set_capability( "chroma", 80 );
66 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
67     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
68     set_capability( "chroma", 100 );
69     add_requirement( MMX );
70 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
71     set_description( _("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) );
72     set_capability( "chroma", 120 );
73     add_requirement( MMX );
74 #endif
75     set_callbacks( Activate, NULL );
76 vlc_module_end();
77
78 /*****************************************************************************
79  * Activate: allocate a chroma function
80  *****************************************************************************
81  * This function allocates and initializes a chroma function
82  *****************************************************************************/
83 static int Activate( vlc_object_t *p_this )
84 {
85     vout_thread_t *p_vout = (vout_thread_t *)p_this;
86
87     if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 )
88     {
89         return -1;
90     }
91
92     switch( p_vout->render.i_chroma )
93     {
94         case VLC_FOURCC('I','4','2','2'):
95             switch( p_vout->output.i_chroma )
96             {
97                 case VLC_FOURCC('Y','U','Y','2'):
98                 case VLC_FOURCC('Y','U','N','V'):
99                     p_vout->chroma.pf_convert = I422_YUY2;
100                     break;
101
102                 case VLC_FOURCC('Y','V','Y','U'):
103                     p_vout->chroma.pf_convert = I422_YVYU;
104                     break;
105
106                 case VLC_FOURCC('U','Y','V','Y'):
107                 case VLC_FOURCC('U','Y','N','V'):
108                 case VLC_FOURCC('Y','4','2','2'):
109                     p_vout->chroma.pf_convert = I422_UYVY;
110                     break;
111
112                 case VLC_FOURCC('I','U','Y','V'):
113                     p_vout->chroma.pf_convert = I422_IUYV;
114                     break;
115
116                 case VLC_FOURCC('c','y','u','v'):
117                     p_vout->chroma.pf_convert = I422_cyuv;
118                     break;
119
120 #if defined (MODULE_NAME_IS_i422_yuy2)
121                 case VLC_FOURCC('Y','2','1','1'):
122                     p_vout->chroma.pf_convert = I422_Y211;
123                     break;
124
125                 case VLC_FOURCC('Y','V','1','2'):
126                     p_vout->chroma.pf_convert = I422_YV12;
127                     break;
128 #endif
129
130                 default:
131                     return -1;
132             }
133             break;
134
135         default:
136             return -1;
137     }
138     
139     return 0; 
140 }
141
142 /* Following functions are local */
143
144 /*****************************************************************************
145  * I422_YUY2: planar YUV 4:2:2 to packed YUY2 4:2:2
146  *****************************************************************************/
147 static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
148                                               picture_t *p_dest )
149 {
150     uint8_t *p_line = p_dest->p->p_pixels;
151     uint8_t *p_y = p_source->Y_PIXELS;
152     uint8_t *p_u = p_source->U_PIXELS;
153     uint8_t *p_v = p_source->V_PIXELS;
154
155     int i_x, i_y;
156
157     const int i_source_margin = p_source->p[0].i_pitch
158                                  - p_source->p[0].i_visible_pitch;
159     const int i_source_margin_c = p_source->p[1].i_pitch
160                                  - p_source->p[1].i_visible_pitch;
161     const int i_dest_margin = p_dest->p->i_pitch
162                                - p_dest->p->i_visible_pitch;
163
164 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
165
166     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
167         ((int)p_line|(int)p_y))) )
168     {
169         /* use faster SSE2 aligned fetch and store */
170         for( i_y = p_vout->render.i_height ; i_y-- ; )
171         {
172             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
173             {
174                 SSE2_CALL( SSE2_YUV422_YUYV_ALIGNED );
175             }
176             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
177             {
178                 C_YUV422_YUYV( p_line, p_y, p_u, p_v );
179             }
180             p_y += i_source_margin;
181             p_u += i_source_margin_c;
182             p_v += i_source_margin_c;
183             p_line += i_dest_margin;
184         }
185     }
186     else {
187         /* use slower SSE2 unaligned fetch and store */
188         for( i_y = p_vout->render.i_height ; i_y-- ; )
189         {
190             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
191             {
192                 SSE2_CALL( SSE2_YUV422_YUYV_UNALIGNED );
193             }
194             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
195             {
196                 C_YUV422_YUYV( p_line, p_y, p_u, p_v );
197             }
198             p_y += i_source_margin;
199             p_u += i_source_margin_c;
200             p_v += i_source_margin_c;
201             p_line += i_dest_margin;
202         }
203     }
204     SSE2_END;
205
206 #else
207
208     for( i_y = p_vout->render.i_height ; i_y-- ; )
209     {
210         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
211         {
212 #if defined (MODULE_NAME_IS_i422_yuy2)
213             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
214             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
215             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
216             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
217 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
218             MMX_CALL( MMX_YUV422_YUYV );
219 #endif
220         }
221         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
222         {
223             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
224         }
225         p_y += i_source_margin;
226         p_u += i_source_margin_c;
227         p_v += i_source_margin_c;
228         p_line += i_dest_margin;
229     }
230 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
231     MMX_END;
232 #endif
233
234 #endif
235 }
236
237 /*****************************************************************************
238  * I422_YVYU: planar YUV 4:2:2 to packed YVYU 4:2:2
239  *****************************************************************************/
240 static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
241                                               picture_t *p_dest )
242 {
243     uint8_t *p_line = p_dest->p->p_pixels;
244     uint8_t *p_y = p_source->Y_PIXELS;
245     uint8_t *p_u = p_source->U_PIXELS;
246     uint8_t *p_v = p_source->V_PIXELS;
247
248     int i_x, i_y;
249
250     const int i_source_margin = p_source->p[0].i_pitch
251                                  - p_source->p[0].i_visible_pitch;
252     const int i_source_margin_c = p_source->p[1].i_pitch
253                                  - p_source->p[1].i_visible_pitch;
254     const int i_dest_margin = p_dest->p->i_pitch
255                                - p_dest->p->i_visible_pitch;
256
257 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
258
259     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
260         ((int)p_line|(int)p_y))) )
261     {
262         /* use faster SSE2 aligned fetch and store */
263         for( i_y = p_vout->render.i_height ; i_y-- ; )
264         {
265             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
266             {
267                 SSE2_CALL( SSE2_YUV422_YVYU_ALIGNED );
268             }
269             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
270             {
271                 C_YUV422_YVYU( p_line, p_y, p_u, p_v );
272             }
273             p_y += i_source_margin;
274             p_u += i_source_margin_c;
275             p_v += i_source_margin_c;
276             p_line += i_dest_margin;
277         }
278     }
279     else {
280         /* use slower SSE2 unaligned fetch and store */
281         for( i_y = p_vout->render.i_height ; i_y-- ; )
282         {
283             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
284             {
285                 SSE2_CALL( SSE2_YUV422_YVYU_UNALIGNED );
286             }
287             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
288             {
289                 C_YUV422_YVYU( p_line, p_y, p_u, p_v );
290             }
291             p_y += i_source_margin;
292             p_u += i_source_margin_c;
293             p_v += i_source_margin_c;
294             p_line += i_dest_margin;
295         }
296     }
297     SSE2_END;
298
299 #else
300
301     for( i_y = p_vout->render.i_height ; i_y-- ; )
302     {
303         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
304         {
305 #if defined (MODULE_NAME_IS_i422_yuy2)
306             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
307             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
308             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
309             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
310 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
311             MMX_CALL( MMX_YUV422_YVYU );
312 #endif
313         }
314         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
315         {
316             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
317         }
318         p_y += i_source_margin;
319         p_u += i_source_margin_c;
320         p_v += i_source_margin_c;
321         p_line += i_dest_margin;
322     }
323 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
324     MMX_END;
325 #endif
326
327 #endif
328 }
329
330 /*****************************************************************************
331  * I422_UYVY: planar YUV 4:2:2 to packed UYVY 4:2:2
332  *****************************************************************************/
333 static void I422_UYVY( vout_thread_t *p_vout, picture_t *p_source,
334                                               picture_t *p_dest )
335 {
336     uint8_t *p_line = p_dest->p->p_pixels;
337     uint8_t *p_y = p_source->Y_PIXELS;
338     uint8_t *p_u = p_source->U_PIXELS;
339     uint8_t *p_v = p_source->V_PIXELS;
340
341     int i_x, i_y;
342
343     const int i_source_margin = p_source->p[0].i_pitch
344                                  - p_source->p[0].i_visible_pitch;
345     const int i_source_margin_c = p_source->p[1].i_pitch
346                                  - p_source->p[1].i_visible_pitch;
347     const int i_dest_margin = p_dest->p->i_pitch
348                                - p_dest->p->i_visible_pitch;
349
350 #if defined (MODULE_NAME_IS_i422_yuy2_sse2)
351
352     if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
353         ((int)p_line|(int)p_y))) )
354     {
355         /* use faster SSE2 aligned fetch and store */
356         for( i_y = p_vout->render.i_height ; i_y-- ; )
357         {
358             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
359             {
360                 SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
361             }
362             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
363             {
364                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
365             }
366             p_y += i_source_margin;
367             p_u += i_source_margin_c;
368             p_v += i_source_margin_c;
369             p_line += i_dest_margin;
370         }
371     }
372     else {
373         /* use slower SSE2 unaligned fetch and store */
374         for( i_y = p_vout->render.i_height ; i_y-- ; )
375         {
376             for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
377             {
378                 SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
379             }
380             for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
381             {
382                 C_YUV422_UYVY( p_line, p_y, p_u, p_v );
383             }
384             p_y += i_source_margin;
385             p_u += i_source_margin_c;
386             p_v += i_source_margin_c;
387             p_line += i_dest_margin;
388         }
389     }
390     SSE2_END;
391
392 #else
393
394     for( i_y = p_vout->render.i_height ; i_y-- ; )
395     {
396         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
397         {
398 #if defined (MODULE_NAME_IS_i422_yuy2)
399             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
400             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
401             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
402             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
403 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
404             MMX_CALL( MMX_YUV422_UYVY );
405 #endif
406         }
407         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
408         {
409             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
410         }
411         p_y += i_source_margin;
412         p_u += i_source_margin_c;
413         p_v += i_source_margin_c;
414         p_line += i_dest_margin;
415     }
416 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
417     MMX_END;
418 #endif
419
420 #endif
421 }
422
423 /*****************************************************************************
424  * I422_IUYV: planar YUV 4:2:2 to interleaved packed IUYV 4:2:2
425  *****************************************************************************/
426 static void I422_IUYV( vout_thread_t *p_vout, picture_t *p_source,
427                                               picture_t *p_dest )
428 {
429     /* FIXME: TODO ! */
430     msg_Err( p_vout, "I422_IUYV unimplemented, please harass <sam@zoy.org>" );
431 }
432
433 /*****************************************************************************
434  * I422_cyuv: planar YUV 4:2:2 to upside-down packed UYVY 4:2:2
435  *****************************************************************************/
436 static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
437                                               picture_t *p_dest )
438 {
439     uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
440     uint8_t *p_y = p_source->Y_PIXELS;
441     uint8_t *p_u = p_source->U_PIXELS;
442     uint8_t *p_v = p_source->V_PIXELS;
443
444     int i_x, i_y;
445
446     for( i_y = p_vout->render.i_height ; i_y-- ; )
447     {
448         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
449         {
450             p_line -= 2 * p_dest->p->i_pitch;
451
452 #if defined (MODULE_NAME_IS_i422_yuy2)
453             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
454             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
455             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
456             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
457 #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
458             MMX_CALL( MMX_YUV422_UYVY );
459 #endif
460         }
461     }
462 #if defined (MODULE_NAME_IS_i422_yuy2_mmx)
463     MMX_END;
464 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
465     SSE2_END;
466 #endif
467 }
468
469 /*****************************************************************************
470  * I422_Y211: planar YUV 4:2:2 to packed YUYV 2:1:1
471  *****************************************************************************/
472 #if defined (MODULE_NAME_IS_i422_yuy2)
473 static void I422_Y211( vout_thread_t *p_vout, picture_t *p_source,
474                                               picture_t *p_dest )
475 {
476     uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
477     uint8_t *p_y = p_source->Y_PIXELS;
478     uint8_t *p_u = p_source->U_PIXELS;
479     uint8_t *p_v = p_source->V_PIXELS;
480
481     int i_x, i_y;
482
483     for( i_y = p_vout->render.i_height ; i_y-- ; )
484     {
485         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
486         {
487             C_YUV422_Y211( p_line, p_y, p_u, p_v );
488             C_YUV422_Y211( p_line, p_y, p_u, p_v );
489         }
490     }
491 }
492 #endif
493
494
495 /*****************************************************************************
496  * I422_YV12: planar YUV 4:2:2 to planar YV12
497  *****************************************************************************/
498 #if defined (MODULE_NAME_IS_i422_yuy2)
499 static void I422_YV12( vout_thread_t *p_vout, picture_t *p_source,
500                                               picture_t *p_dest )
501 {
502     uint16_t i_dpy = p_dest->p[Y_PLANE].i_pitch;
503     uint16_t i_spy = p_source->p[Y_PLANE].i_pitch;
504     uint16_t i_dpuv = p_dest->p[U_PLANE].i_pitch;
505     uint16_t i_spuv = p_source->p[U_PLANE].i_pitch;
506     uint16_t i_width = p_vout->render.i_width;
507     uint16_t i_y = p_vout->render.i_height;
508     uint8_t *p_dy = p_dest->Y_PIXELS + (i_y-1)*i_dpy;
509     uint8_t *p_y = p_source->Y_PIXELS + (i_y-1)*i_spy;
510     uint8_t *p_du = p_dest->U_PIXELS + (i_y/2-1)*i_dpuv;
511     uint8_t *p_u = p_source->U_PIXELS + (i_y-1)*i_spuv;
512     uint8_t *p_dv = p_dest->V_PIXELS + (i_y/2-1)*i_dpuv;
513     uint8_t *p_v = p_source->V_PIXELS + (i_y-1)*i_spuv;
514     i_y /= 2;
515
516     for ( ; i_y--; )
517     {
518         memcpy(p_dy, p_y, i_width); p_dy -= i_dpy; p_y -= i_spy;
519         memcpy(p_dy, p_y, i_width); p_dy -= i_dpy; p_y -= i_spy;
520         memcpy(p_du, p_u, i_width/2); p_du -= i_dpuv; p_u -= 2*i_spuv;
521         memcpy(p_dv, p_v, i_width/2); p_dv -= i_dpuv; p_v -= 2*i_spuv;
522     }
523
524 }
525 #endif