1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
38 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
42 #include "i420_yuy2.h"
44 #define SRC_FOURCC "I420,IYUV,YV12"
46 #if defined (MODULE_NAME_IS_i420_yuy2)
47 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
48 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
49 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
50 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
51 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
52 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
53 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
56 /*****************************************************************************
57 * Local and extern prototypes.
58 *****************************************************************************/
59 static int Activate ( vlc_object_t * );
61 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
62 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
63 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
64 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
65 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
66 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
67 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
68 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
69 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
70 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
71 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
73 #if defined (MODULE_NAME_IS_i420_yuy2)
74 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
75 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
78 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
79 /* Initialize MMX-specific constants */
80 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
81 static const uint64_t i_80w = 0x0000000080808080ULL;
84 /*****************************************************************************
86 *****************************************************************************/
88 #if defined (MODULE_NAME_IS_i420_yuy2)
89 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
90 set_capability( "video filter2", 80 )
91 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
92 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
93 set_capability( "video filter2", 160 )
94 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
95 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
96 set_capability( "video filter2", 250 )
97 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
99 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
100 set_capability( "video filter2", 250 )
101 add_requirement( ALTIVEC )
103 set_callbacks( Activate, NULL )
106 /*****************************************************************************
107 * Activate: allocate a chroma function
108 *****************************************************************************
109 * This function allocates and initializes a chroma function
110 *****************************************************************************/
111 static int Activate( vlc_object_t *p_this )
113 filter_t *p_filter = (filter_t *)p_this;
115 if( p_filter->fmt_in.video.i_width & 1
116 || p_filter->fmt_in.video.i_height & 1 )
121 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
122 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
125 switch( p_filter->fmt_in.video.i_chroma )
129 switch( p_filter->fmt_out.video.i_chroma )
132 p_filter->pf_video_filter = I420_YUY2_Filter;
136 p_filter->pf_video_filter = I420_YVYU_Filter;
140 p_filter->pf_video_filter = I420_UYVY_Filter;
142 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
143 case VLC_FOURCC('I','U','Y','V'):
144 p_filter->pf_video_filter = I420_IUYV_Filter;
148 p_filter->pf_video_filter = I420_cyuv_Filter;
152 #if defined (MODULE_NAME_IS_i420_yuy2)
154 p_filter->pf_video_filter = I420_Y211_Filter;
171 static inline unsigned long long read_cycles(void)
173 unsigned long long v;
174 __asm__ __volatile__("rdtsc" : "=A" (v): );
180 /* Following functions are local */
182 VIDEO_FILTER_WRAPPER( I420_YUY2 )
183 VIDEO_FILTER_WRAPPER( I420_YVYU )
184 VIDEO_FILTER_WRAPPER( I420_UYVY )
185 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
186 VIDEO_FILTER_WRAPPER( I420_IUYV )
187 VIDEO_FILTER_WRAPPER( I420_cyuv )
189 #if defined (MODULE_NAME_IS_i420_yuy2)
190 VIDEO_FILTER_WRAPPER( I420_Y211 )
193 /*****************************************************************************
194 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
195 *****************************************************************************/
196 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
199 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
200 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
201 uint8_t *p_u = p_source->U_PIXELS;
202 uint8_t *p_v = p_source->V_PIXELS;
206 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
207 #define VEC_NEXT_LINES( ) \
209 p_line2 += p_dest->p->i_pitch; \
211 p_y2 += p_source->p[Y_PLANE].i_pitch;
213 #define VEC_LOAD_UV( ) \
214 u_vec = vec_ld( 0, p_u ); p_u += 16; \
215 v_vec = vec_ld( 0, p_v ); p_v += 16;
217 #define VEC_MERGE( a ) \
218 uv_vec = a( u_vec, v_vec ); \
219 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
220 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
221 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
222 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
223 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
224 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
226 vector unsigned char u_vec;
227 vector unsigned char v_vec;
228 vector unsigned char uv_vec;
229 vector unsigned char y_vec;
231 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
232 ( p_filter->fmt_in.video.i_height % 2 ) ) )
234 /* Width is a multiple of 32, we take 2 lines at a time */
235 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
238 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
241 VEC_MERGE( vec_mergeh );
242 VEC_MERGE( vec_mergel );
246 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
247 ( p_filter->fmt_in.video.i_height % 4 ) ) )
249 /* Width is only a multiple of 16, we take 4 lines at a time */
250 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
252 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
254 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
257 VEC_MERGE( vec_mergeh );
258 VEC_MERGE( vec_mergel );
261 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
263 VEC_MERGE( vec_mergeh );
265 /* Line 3 and 4, pixels 0 to 16 */
267 VEC_MERGE( vec_mergel );
269 /* Line 3 and 4, pixels 16 to ( width ) */
270 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
273 VEC_MERGE( vec_mergeh );
274 VEC_MERGE( vec_mergel );
280 /* Crap, use the C version */
281 #undef VEC_NEXT_LINES
286 const int i_source_margin = p_source->p[0].i_pitch
287 - p_source->p[0].i_visible_pitch;
288 const int i_source_margin_c = p_source->p[1].i_pitch
289 - p_source->p[1].i_visible_pitch;
290 const int i_dest_margin = p_dest->p->i_pitch
291 - p_dest->p->i_visible_pitch;
293 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
294 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
297 p_line2 += p_dest->p->i_pitch;
300 p_y2 += p_source->p[Y_PLANE].i_pitch;
302 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
303 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
311 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
313 MMX_CALL( MMX_YUV420_YUYV );
316 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
321 p_y1 += i_source_margin;
322 p_y2 += i_source_margin;
323 p_u += i_source_margin_c;
324 p_v += i_source_margin_c;
325 p_line1 += i_dest_margin;
326 p_line2 += i_dest_margin;
329 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
330 /* re-enable FPU registers */
334 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
338 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
340 ** SSE2 128 bits fetch/store instructions are faster
341 ** if memory access is 16 bytes aligned
344 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
345 ((intptr_t)p_line2|(intptr_t)p_y2))) )
347 /* use faster SSE2 aligned fetch and store */
348 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
351 p_line2 += p_dest->p->i_pitch;
354 p_y2 += p_source->p[Y_PLANE].i_pitch;
356 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
358 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
360 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
365 p_y1 += i_source_margin;
366 p_y2 += i_source_margin;
367 p_u += i_source_margin_c;
368 p_v += i_source_margin_c;
369 p_line1 += i_dest_margin;
370 p_line2 += i_dest_margin;
375 /* use slower SSE2 unaligned fetch and store */
376 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
379 p_line2 += p_dest->p->i_pitch;
382 p_y2 += p_source->p[Y_PLANE].i_pitch;
384 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
386 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
388 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
393 p_y1 += i_source_margin;
394 p_y2 += i_source_margin;
395 p_u += i_source_margin_c;
396 p_v += i_source_margin_c;
397 p_line1 += i_dest_margin;
398 p_line2 += i_dest_margin;
401 /* make sure all SSE2 stores are visible thereafter */
404 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
407 /*****************************************************************************
408 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
409 *****************************************************************************/
410 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
413 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
414 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
415 uint8_t *p_u = p_source->U_PIXELS;
416 uint8_t *p_v = p_source->V_PIXELS;
420 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
421 #define VEC_NEXT_LINES( ) \
423 p_line2 += p_dest->p->i_pitch; \
425 p_y2 += p_source->p[Y_PLANE].i_pitch;
427 #define VEC_LOAD_UV( ) \
428 u_vec = vec_ld( 0, p_u ); p_u += 16; \
429 v_vec = vec_ld( 0, p_v ); p_v += 16;
431 #define VEC_MERGE( a ) \
432 vu_vec = a( v_vec, u_vec ); \
433 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
434 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
435 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
436 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
437 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
438 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
440 vector unsigned char u_vec;
441 vector unsigned char v_vec;
442 vector unsigned char vu_vec;
443 vector unsigned char y_vec;
445 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
446 ( p_filter->fmt_in.video.i_height % 2 ) ) )
448 /* Width is a multiple of 32, we take 2 lines at a time */
449 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
452 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
455 VEC_MERGE( vec_mergeh );
456 VEC_MERGE( vec_mergel );
460 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
461 ( p_filter->fmt_in.video.i_height % 4 ) ) )
463 /* Width is only a multiple of 16, we take 4 lines at a time */
464 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
466 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
468 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
471 VEC_MERGE( vec_mergeh );
472 VEC_MERGE( vec_mergel );
475 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
477 VEC_MERGE( vec_mergeh );
479 /* Line 3 and 4, pixels 0 to 16 */
481 VEC_MERGE( vec_mergel );
483 /* Line 3 and 4, pixels 16 to ( width ) */
484 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
487 VEC_MERGE( vec_mergeh );
488 VEC_MERGE( vec_mergel );
494 /* Crap, use the C version */
495 #undef VEC_NEXT_LINES
500 const int i_source_margin = p_source->p[0].i_pitch
501 - p_source->p[0].i_visible_pitch;
502 const int i_source_margin_c = p_source->p[1].i_pitch
503 - p_source->p[1].i_visible_pitch;
504 const int i_dest_margin = p_dest->p->i_pitch
505 - p_dest->p->i_visible_pitch;
507 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
508 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
511 p_line2 += p_dest->p->i_pitch;
514 p_y2 += p_source->p[Y_PLANE].i_pitch;
516 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
518 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
524 MMX_CALL( MMX_YUV420_YVYU );
527 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
532 p_y1 += i_source_margin;
533 p_y2 += i_source_margin;
534 p_u += i_source_margin_c;
535 p_v += i_source_margin_c;
536 p_line1 += i_dest_margin;
537 p_line2 += i_dest_margin;
540 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
541 /* re-enable FPU registers */
545 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
549 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
551 ** SSE2 128 bits fetch/store instructions are faster
552 ** if memory access is 16 bytes aligned
554 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
555 ((intptr_t)p_line2|(intptr_t)p_y2))) )
557 /* use faster SSE2 aligned fetch and store */
558 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
561 p_line2 += p_dest->p->i_pitch;
564 p_y2 += p_source->p[Y_PLANE].i_pitch;
566 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
568 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
570 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
575 p_y1 += i_source_margin;
576 p_y2 += i_source_margin;
577 p_u += i_source_margin_c;
578 p_v += i_source_margin_c;
579 p_line1 += i_dest_margin;
580 p_line2 += i_dest_margin;
585 /* use slower SSE2 unaligned fetch and store */
586 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
589 p_line2 += p_dest->p->i_pitch;
592 p_y2 += p_source->p[Y_PLANE].i_pitch;
594 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
596 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
598 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
603 p_y1 += i_source_margin;
604 p_y2 += i_source_margin;
605 p_u += i_source_margin_c;
606 p_v += i_source_margin_c;
607 p_line1 += i_dest_margin;
608 p_line2 += i_dest_margin;
611 /* make sure all SSE2 stores are visible thereafter */
613 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
616 /*****************************************************************************
617 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
618 *****************************************************************************/
619 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
622 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
623 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
624 uint8_t *p_u = p_source->U_PIXELS;
625 uint8_t *p_v = p_source->V_PIXELS;
629 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
630 #define VEC_NEXT_LINES( ) \
632 p_line2 += p_dest->p->i_pitch; \
634 p_y2 += p_source->p[Y_PLANE].i_pitch;
636 #define VEC_LOAD_UV( ) \
637 u_vec = vec_ld( 0, p_u ); p_u += 16; \
638 v_vec = vec_ld( 0, p_v ); p_v += 16;
640 #define VEC_MERGE( a ) \
641 uv_vec = a( u_vec, v_vec ); \
642 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
643 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
644 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
645 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
646 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
647 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
649 vector unsigned char u_vec;
650 vector unsigned char v_vec;
651 vector unsigned char uv_vec;
652 vector unsigned char y_vec;
654 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
655 ( p_filter->fmt_in.video.i_height % 2 ) ) )
657 /* Width is a multiple of 32, we take 2 lines at a time */
658 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
661 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
664 VEC_MERGE( vec_mergeh );
665 VEC_MERGE( vec_mergel );
669 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
670 ( p_filter->fmt_in.video.i_height % 4 ) ) )
672 /* Width is only a multiple of 16, we take 4 lines at a time */
673 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
675 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
677 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
680 VEC_MERGE( vec_mergeh );
681 VEC_MERGE( vec_mergel );
684 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
686 VEC_MERGE( vec_mergeh );
688 /* Line 3 and 4, pixels 0 to 16 */
690 VEC_MERGE( vec_mergel );
692 /* Line 3 and 4, pixels 16 to ( width ) */
693 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
696 VEC_MERGE( vec_mergeh );
697 VEC_MERGE( vec_mergel );
703 /* Crap, use the C version */
704 #undef VEC_NEXT_LINES
709 const int i_source_margin = p_source->p[0].i_pitch
710 - p_source->p[0].i_visible_pitch;
711 const int i_source_margin_c = p_source->p[1].i_pitch
712 - p_source->p[1].i_visible_pitch;
713 const int i_dest_margin = p_dest->p->i_pitch
714 - p_dest->p->i_visible_pitch;
716 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
717 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
720 p_line2 += p_dest->p->i_pitch;
723 p_y2 += p_source->p[Y_PLANE].i_pitch;
725 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
727 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
733 MMX_CALL( MMX_YUV420_UYVY );
736 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
741 p_y1 += i_source_margin;
742 p_y2 += i_source_margin;
743 p_u += i_source_margin_c;
744 p_v += i_source_margin_c;
745 p_line1 += i_dest_margin;
746 p_line2 += i_dest_margin;
749 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
750 /* re-enable FPU registers */
754 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
758 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
760 ** SSE2 128 bits fetch/store instructions are faster
761 ** if memory access is 16 bytes aligned
763 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
764 ((intptr_t)p_line2|(intptr_t)p_y2))) )
766 /* use faster SSE2 aligned fetch and store */
767 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
770 p_line2 += p_dest->p->i_pitch;
773 p_y2 += p_source->p[Y_PLANE].i_pitch;
775 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
777 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
779 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
784 p_y1 += i_source_margin;
785 p_y2 += i_source_margin;
786 p_u += i_source_margin_c;
787 p_v += i_source_margin_c;
788 p_line1 += i_dest_margin;
789 p_line2 += i_dest_margin;
794 /* use slower SSE2 unaligned fetch and store */
795 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
798 p_line2 += p_dest->p->i_pitch;
801 p_y2 += p_source->p[Y_PLANE].i_pitch;
803 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
805 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
807 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
812 p_y1 += i_source_margin;
813 p_y2 += i_source_margin;
814 p_u += i_source_margin_c;
815 p_v += i_source_margin_c;
816 p_line1 += i_dest_margin;
817 p_line2 += i_dest_margin;
820 /* make sure all SSE2 stores are visible thereafter */
822 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
825 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
826 /*****************************************************************************
827 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
828 *****************************************************************************/
829 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
832 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
834 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
837 /*****************************************************************************
838 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
839 *****************************************************************************/
840 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
843 uint8_t *p_line1 = p_dest->p->p_pixels +
844 p_dest->p->i_visible_lines * p_dest->p->i_pitch
845 + p_dest->p->i_pitch;
846 uint8_t *p_line2 = p_dest->p->p_pixels +
847 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
848 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
849 uint8_t *p_u = p_source->U_PIXELS;
850 uint8_t *p_v = p_source->V_PIXELS;
854 const int i_source_margin = p_source->p[0].i_pitch
855 - p_source->p[0].i_visible_pitch;
856 const int i_source_margin_c = p_source->p[1].i_pitch
857 - p_source->p[1].i_visible_pitch;
858 const int i_dest_margin = p_dest->p->i_pitch
859 - p_dest->p->i_visible_pitch;
861 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
862 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
864 p_line1 -= 3 * p_dest->p->i_pitch;
865 p_line2 -= 3 * p_dest->p->i_pitch;
868 p_y2 += p_source->p[Y_PLANE].i_pitch;
870 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
872 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
878 MMX_CALL( MMX_YUV420_UYVY );
881 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
886 p_y1 += i_source_margin;
887 p_y2 += i_source_margin;
888 p_u += i_source_margin_c;
889 p_v += i_source_margin_c;
890 p_line1 += i_dest_margin;
891 p_line2 += i_dest_margin;
894 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
895 /* re-enable FPU registers */
899 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
901 ** SSE2 128 bits fetch/store instructions are faster
902 ** if memory access is 16 bytes aligned
904 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
905 ((intptr_t)p_line2|(intptr_t)p_y2))) )
907 /* use faster SSE2 aligned fetch and store */
908 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
911 p_line2 += p_dest->p->i_pitch;
914 p_y2 += p_source->p[Y_PLANE].i_pitch;
916 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
918 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
920 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
925 p_y1 += i_source_margin;
926 p_y2 += i_source_margin;
927 p_u += i_source_margin_c;
928 p_v += i_source_margin_c;
929 p_line1 += i_dest_margin;
930 p_line2 += i_dest_margin;
935 /* use slower SSE2 unaligned fetch and store */
936 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
939 p_line2 += p_dest->p->i_pitch;
942 p_y2 += p_source->p[Y_PLANE].i_pitch;
944 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
946 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
948 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
953 p_y1 += i_source_margin;
954 p_y2 += i_source_margin;
955 p_u += i_source_margin_c;
956 p_v += i_source_margin_c;
957 p_line1 += i_dest_margin;
958 p_line2 += i_dest_margin;
961 /* make sure all SSE2 stores are visible thereafter */
963 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
965 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
967 /*****************************************************************************
968 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
969 *****************************************************************************/
970 #if defined (MODULE_NAME_IS_i420_yuy2)
971 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
974 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
975 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
976 uint8_t *p_u = p_source->U_PIXELS;
977 uint8_t *p_v = p_source->V_PIXELS;
981 const int i_source_margin = p_source->p[0].i_pitch
982 - p_source->p[0].i_visible_pitch;
983 const int i_source_margin_c = p_source->p[1].i_pitch
984 - p_source->p[1].i_visible_pitch;
985 const int i_dest_margin = p_dest->p->i_pitch
986 - p_dest->p->i_visible_pitch;
988 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
991 p_line2 += p_dest->p->i_pitch;
994 p_y2 += p_source->p[Y_PLANE].i_pitch;
996 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
1002 p_y1 += i_source_margin;
1003 p_y2 += i_source_margin;
1004 p_u += i_source_margin_c;
1005 p_v += i_source_margin_c;
1006 p_line1 += i_dest_margin;
1007 p_line2 += i_dest_margin;