1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
27 #include <string.h> /* strerror() */
28 #include <stdlib.h> /* malloc(), free() */
33 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
37 #include "i420_yuy2.h"
39 #define SRC_FOURCC "I420,IYUV,YV12"
41 #if defined (MODULE_NAME_IS_i420_yuy2)
42 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
43 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
44 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
45 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
46 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
49 /*****************************************************************************
50 * Local and extern prototypes.
51 *****************************************************************************/
52 static int Activate ( vlc_object_t * );
54 static void I420_YUY2 ( vout_thread_t *, picture_t *, picture_t * );
55 static void I420_YVYU ( vout_thread_t *, picture_t *, picture_t * );
56 static void I420_UYVY ( vout_thread_t *, picture_t *, picture_t * );
57 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
58 static void I420_IUYV ( vout_thread_t *, picture_t *, picture_t * );
59 static void I420_cyuv ( vout_thread_t *, picture_t *, picture_t * );
61 #if defined (MODULE_NAME_IS_i420_yuy2)
62 static void I420_Y211 ( vout_thread_t *, picture_t *, picture_t * );
65 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
66 static uint64_t i_00ffw;
67 static uint64_t i_80w;
70 /*****************************************************************************
72 *****************************************************************************/
74 #if defined (MODULE_NAME_IS_i420_yuy2)
75 set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
76 set_capability( "chroma", 80 );
77 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
78 set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
79 set_capability( "chroma", 100 );
80 add_requirement( MMX );
81 /* Initialize MMX-specific constants */
82 i_00ffw = 0x00ff00ff00ff00ffULL;
83 i_80w = 0x0000000080808080ULL;
84 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
86 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
87 set_capability( "chroma", 100 );
88 add_requirement( ALTIVEC );
90 set_callbacks( Activate, NULL );
93 /*****************************************************************************
94 * Activate: allocate a chroma function
95 *****************************************************************************
96 * This function allocates and initializes a chroma function
97 *****************************************************************************/
98 static int Activate( vlc_object_t *p_this )
100 vout_thread_t *p_vout = (vout_thread_t *)p_this;
102 if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 )
107 switch( p_vout->render.i_chroma )
109 case VLC_FOURCC('Y','V','1','2'):
110 case VLC_FOURCC('I','4','2','0'):
111 case VLC_FOURCC('I','Y','U','V'):
112 switch( p_vout->output.i_chroma )
114 case VLC_FOURCC('Y','U','Y','2'):
115 case VLC_FOURCC('Y','U','N','V'):
116 p_vout->chroma.pf_convert = I420_YUY2;
119 case VLC_FOURCC('Y','V','Y','U'):
120 p_vout->chroma.pf_convert = I420_YVYU;
123 case VLC_FOURCC('U','Y','V','Y'):
124 case VLC_FOURCC('U','Y','N','V'):
125 case VLC_FOURCC('Y','4','2','2'):
126 p_vout->chroma.pf_convert = I420_UYVY;
129 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
130 case VLC_FOURCC('I','U','Y','V'):
131 p_vout->chroma.pf_convert = I420_IUYV;
134 case VLC_FOURCC('c','y','u','v'):
135 p_vout->chroma.pf_convert = I420_cyuv;
139 #if defined (MODULE_NAME_IS_i420_yuy2)
140 case VLC_FOURCC('Y','2','1','1'):
141 p_vout->chroma.pf_convert = I420_Y211;
157 /* Following functions are local */
159 /*****************************************************************************
160 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
161 *****************************************************************************/
162 static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
165 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
166 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
167 uint8_t *p_u = p_source->U_PIXELS;
168 uint8_t *p_v = p_source->V_PIXELS;
172 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
173 #define VEC_NEXT_LINES( ) \
175 p_line2 += p_dest->p->i_pitch; \
177 p_y2 += p_source->p[Y_PLANE].i_pitch;
179 #define VEC_LOAD_UV( ) \
180 u_vec = vec_ld( 0, p_u ); p_u += 16; \
181 v_vec = vec_ld( 0, p_v ); p_v += 16;
183 #define VEC_MERGE( a ) \
184 uv_vec = a( u_vec, v_vec ); \
185 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
186 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
187 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
188 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
189 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
190 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
192 vector unsigned char u_vec;
193 vector unsigned char v_vec;
194 vector unsigned char uv_vec;
195 vector unsigned char y_vec;
197 if( !( ( p_vout->render.i_width % 32 ) |
198 ( p_vout->render.i_height % 2 ) ) )
200 /* Width is a multiple of 32, we take 2 lines at a time */
201 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
204 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
207 VEC_MERGE( vec_mergeh );
208 VEC_MERGE( vec_mergel );
212 else if( !( ( p_vout->render.i_width % 16 ) |
213 ( p_vout->render.i_height % 4 ) ) )
215 /* Width is only a multiple of 16, we take 4 lines at a time */
216 for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
218 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
220 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
223 VEC_MERGE( vec_mergeh );
224 VEC_MERGE( vec_mergel );
227 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
229 VEC_MERGE( vec_mergeh );
231 /* Line 3 and 4, pixels 0 to 16 */
233 VEC_MERGE( vec_mergel );
235 /* Line 3 and 4, pixels 16 to ( width ) */
236 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
239 VEC_MERGE( vec_mergeh );
240 VEC_MERGE( vec_mergel );
246 /* Crap, use the C version */
247 #undef VEC_NEXT_LINES
252 const int i_source_margin = p_source->p[0].i_pitch
253 - p_source->p[0].i_visible_pitch;
254 const int i_source_margin_c = p_source->p[1].i_pitch
255 - p_source->p[1].i_visible_pitch;
256 const int i_dest_margin = p_dest->p->i_pitch
257 - p_dest->p->i_visible_pitch;
259 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
262 p_line2 += p_dest->p->i_pitch;
265 p_y2 += p_source->p[Y_PLANE].i_pitch;
267 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
268 for( i_x = p_vout->render.i_width / 2 ; i_x-- ; )
273 for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
275 MMX_CALL( MMX_YUV420_YUYV );
277 for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
283 p_y1 += i_source_margin;
284 p_y2 += i_source_margin;
285 p_u += i_source_margin_c;
286 p_v += i_source_margin_c;
287 p_line1 += i_dest_margin;
288 p_line2 += i_dest_margin;
291 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
296 /*****************************************************************************
297 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
298 *****************************************************************************/
299 static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
302 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
303 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
304 uint8_t *p_u = p_source->U_PIXELS;
305 uint8_t *p_v = p_source->V_PIXELS;
309 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
310 #define VEC_NEXT_LINES( ) \
312 p_line2 += p_dest->p->i_pitch; \
314 p_y2 += p_source->p[Y_PLANE].i_pitch;
316 #define VEC_LOAD_UV( ) \
317 u_vec = vec_ld( 0, p_u ); p_u += 16; \
318 v_vec = vec_ld( 0, p_v ); p_v += 16;
320 #define VEC_MERGE( a ) \
321 vu_vec = a( v_vec, u_vec ); \
322 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
323 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
324 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
325 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
326 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
327 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
329 vector unsigned char u_vec;
330 vector unsigned char v_vec;
331 vector unsigned char vu_vec;
332 vector unsigned char y_vec;
334 if( !( ( p_vout->render.i_width % 32 ) |
335 ( p_vout->render.i_height % 2 ) ) )
337 /* Width is a multiple of 32, we take 2 lines at a time */
338 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
341 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
344 VEC_MERGE( vec_mergeh );
345 VEC_MERGE( vec_mergel );
349 else if( !( ( p_vout->render.i_width % 16 ) |
350 ( p_vout->render.i_height % 4 ) ) )
352 /* Width is only a multiple of 16, we take 4 lines at a time */
353 for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
355 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
357 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
360 VEC_MERGE( vec_mergeh );
361 VEC_MERGE( vec_mergel );
364 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
366 VEC_MERGE( vec_mergeh );
368 /* Line 3 and 4, pixels 0 to 16 */
370 VEC_MERGE( vec_mergel );
372 /* Line 3 and 4, pixels 16 to ( width ) */
373 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
376 VEC_MERGE( vec_mergeh );
377 VEC_MERGE( vec_mergel );
383 /* Crap, use the C version */
384 #undef VEC_NEXT_LINES
389 const int i_source_margin = p_source->p[0].i_pitch
390 - p_source->p[0].i_visible_pitch;
391 const int i_source_margin_c = p_source->p[1].i_pitch
392 - p_source->p[1].i_visible_pitch;
393 const int i_dest_margin = p_dest->p->i_pitch
394 - p_dest->p->i_visible_pitch;
396 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
399 p_line2 += p_dest->p->i_pitch;
402 p_y2 += p_source->p[Y_PLANE].i_pitch;
404 for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
406 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
412 MMX_CALL( MMX_YUV420_YVYU );
416 p_y1 += i_source_margin;
417 p_y2 += i_source_margin;
418 p_u += i_source_margin_c;
419 p_v += i_source_margin_c;
420 p_line1 += i_dest_margin;
421 p_line2 += i_dest_margin;
423 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
428 /*****************************************************************************
429 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
430 *****************************************************************************/
431 static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
434 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
435 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
436 uint8_t *p_u = p_source->U_PIXELS;
437 uint8_t *p_v = p_source->V_PIXELS;
441 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
442 #define VEC_NEXT_LINES( ) \
444 p_line2 += p_dest->p->i_pitch; \
446 p_y2 += p_source->p[Y_PLANE].i_pitch;
448 #define VEC_LOAD_UV( ) \
449 u_vec = vec_ld( 0, p_u ); p_u += 16; \
450 v_vec = vec_ld( 0, p_v ); p_v += 16;
452 #define VEC_MERGE( a ) \
453 uv_vec = a( u_vec, v_vec ); \
454 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
455 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
456 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
457 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
458 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
459 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
461 vector unsigned char u_vec;
462 vector unsigned char v_vec;
463 vector unsigned char uv_vec;
464 vector unsigned char y_vec;
466 if( !( ( p_vout->render.i_width % 32 ) |
467 ( p_vout->render.i_height % 2 ) ) )
469 /* Width is a multiple of 32, we take 2 lines at a time */
470 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
473 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
476 VEC_MERGE( vec_mergeh );
477 VEC_MERGE( vec_mergel );
481 else if( !( ( p_vout->render.i_width % 16 ) |
482 ( p_vout->render.i_height % 4 ) ) )
484 /* Width is only a multiple of 16, we take 4 lines at a time */
485 for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
487 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
489 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
492 VEC_MERGE( vec_mergeh );
493 VEC_MERGE( vec_mergel );
496 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
498 VEC_MERGE( vec_mergeh );
500 /* Line 3 and 4, pixels 0 to 16 */
502 VEC_MERGE( vec_mergel );
504 /* Line 3 and 4, pixels 16 to ( width ) */
505 for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
508 VEC_MERGE( vec_mergeh );
509 VEC_MERGE( vec_mergel );
515 /* Crap, use the C version */
516 #undef VEC_NEXT_LINES
521 const int i_source_margin = p_source->p[0].i_pitch
522 - p_source->p[0].i_visible_pitch;
523 const int i_source_margin_c = p_source->p[1].i_pitch
524 - p_source->p[1].i_visible_pitch;
525 const int i_dest_margin = p_dest->p->i_pitch
526 - p_dest->p->i_visible_pitch;
528 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
531 p_line2 += p_dest->p->i_pitch;
534 p_y2 += p_source->p[Y_PLANE].i_pitch;
536 for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
538 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
544 MMX_CALL( MMX_YUV420_UYVY );
547 for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x--; )
552 p_y1 += i_source_margin;
553 p_y2 += i_source_margin;
554 p_u += i_source_margin_c;
555 p_v += i_source_margin_c;
556 p_line1 += i_dest_margin;
557 p_line2 += i_dest_margin;
560 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
561 __asm__ __volatile__("emms" :: );
564 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
569 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
570 /*****************************************************************************
571 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
572 *****************************************************************************/
573 static void I420_IUYV( vout_thread_t *p_vout, picture_t *p_source,
577 msg_Err( p_vout, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
580 /*****************************************************************************
581 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
582 *****************************************************************************/
583 static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
586 uint8_t *p_line1 = p_dest->p->p_pixels +
587 p_dest->p->i_visible_lines * p_dest->p->i_pitch
588 + p_dest->p->i_pitch;
589 uint8_t *p_line2 = p_dest->p->p_pixels +
590 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
591 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
592 uint8_t *p_u = p_source->U_PIXELS;
593 uint8_t *p_v = p_source->V_PIXELS;
597 const int i_source_margin = p_source->p[0].i_pitch
598 - p_source->p[0].i_visible_pitch;
599 const int i_source_margin_c = p_source->p[1].i_pitch
600 - p_source->p[1].i_visible_pitch;
601 const int i_dest_margin = p_dest->p->i_pitch
602 - p_dest->p->i_visible_pitch;
604 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
606 p_line1 -= 3 * p_dest->p->i_pitch;
607 p_line2 -= 3 * p_dest->p->i_pitch;
610 p_y2 += p_source->p[Y_PLANE].i_pitch;
612 for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
614 #if defined (MODULE_NAME_IS_i420_yuy2)
620 MMX_CALL( MMX_YUV420_UYVY );
624 p_y1 += i_source_margin;
625 p_y2 += i_source_margin;
626 p_u += i_source_margin_c;
627 p_v += i_source_margin_c;
628 p_line1 += i_dest_margin;
629 p_line2 += i_dest_margin;
632 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
634 /*****************************************************************************
635 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
636 *****************************************************************************/
637 #if defined (MODULE_NAME_IS_i420_yuy2)
638 static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source,
641 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
642 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
643 uint8_t *p_u = p_source->U_PIXELS;
644 uint8_t *p_v = p_source->V_PIXELS;
648 const int i_source_margin = p_source->p[0].i_pitch
649 - p_source->p[0].i_visible_pitch;
650 const int i_source_margin_c = p_source->p[1].i_pitch
651 - p_source->p[1].i_visible_pitch;
652 const int i_dest_margin = p_dest->p->i_pitch
653 - p_dest->p->i_visible_pitch;
655 for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
658 p_line2 += p_dest->p->i_pitch;
661 p_y2 += p_source->p[Y_PLANE].i_pitch;
663 for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
669 p_y1 += i_source_margin;
670 p_y2 += i_source_margin;
671 p_u += i_source_margin_c;
672 p_v += i_source_margin_c;
673 p_line1 += i_dest_margin;
674 p_line2 += i_dest_margin;