1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
38 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
42 #include "i420_yuy2.h"
44 #define SRC_FOURCC "I420,IYUV,YV12"
46 #if defined (MODULE_NAME_IS_i420_yuy2)
47 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
48 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
49 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
50 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
51 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
52 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
53 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
56 /*****************************************************************************
57 * Local and extern prototypes.
58 *****************************************************************************/
59 static int Activate ( vlc_object_t * );
61 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
62 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
63 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
64 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
65 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
66 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
67 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
68 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
69 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
70 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
71 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
73 #if defined (MODULE_NAME_IS_i420_yuy2)
74 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
75 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
78 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
79 /* Initialize MMX-specific constants */
80 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
81 static const uint64_t i_80w = 0x0000000080808080ULL;
84 /*****************************************************************************
86 *****************************************************************************/
88 #if defined (MODULE_NAME_IS_i420_yuy2)
89 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
90 set_capability( "video filter2", 80 )
91 # define CPU_CAPABILITY 0
92 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
93 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
94 set_capability( "video filter2", 160 )
95 # define CPU_CAPABILITY CPU_CAPABILITY_MMX
96 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
97 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
98 set_capability( "video filter2", 250 )
99 # define CPU_CAPABILITY CPU_CAPABILITY_SSE2
100 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
102 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
103 set_capability( "video filter2", 250 )
104 # define CPU_CAPABILITY CPU_CAPABILITY_ALTIVEC
106 set_callbacks( Activate, NULL )
109 /*****************************************************************************
110 * Activate: allocate a chroma function
111 *****************************************************************************
112 * This function allocates and initializes a chroma function
113 *****************************************************************************/
114 static int Activate( vlc_object_t *p_this )
116 filter_t *p_filter = (filter_t *)p_this;
119 if( !(vlc_CPU() & CPU_CAPABILITY) )
122 if( p_filter->fmt_in.video.i_width & 1
123 || p_filter->fmt_in.video.i_height & 1 )
128 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
129 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
132 switch( p_filter->fmt_in.video.i_chroma )
136 switch( p_filter->fmt_out.video.i_chroma )
139 p_filter->pf_video_filter = I420_YUY2_Filter;
143 p_filter->pf_video_filter = I420_YVYU_Filter;
147 p_filter->pf_video_filter = I420_UYVY_Filter;
149 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
150 case VLC_FOURCC('I','U','Y','V'):
151 p_filter->pf_video_filter = I420_IUYV_Filter;
155 p_filter->pf_video_filter = I420_cyuv_Filter;
159 #if defined (MODULE_NAME_IS_i420_yuy2)
161 p_filter->pf_video_filter = I420_Y211_Filter;
178 static inline unsigned long long read_cycles(void)
180 unsigned long long v;
181 __asm__ __volatile__("rdtsc" : "=A" (v): );
187 /* Following functions are local */
189 VIDEO_FILTER_WRAPPER( I420_YUY2 )
190 VIDEO_FILTER_WRAPPER( I420_YVYU )
191 VIDEO_FILTER_WRAPPER( I420_UYVY )
192 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
193 VIDEO_FILTER_WRAPPER( I420_IUYV )
194 VIDEO_FILTER_WRAPPER( I420_cyuv )
196 #if defined (MODULE_NAME_IS_i420_yuy2)
197 VIDEO_FILTER_WRAPPER( I420_Y211 )
200 /*****************************************************************************
201 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
202 *****************************************************************************/
203 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
206 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
207 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
208 uint8_t *p_u = p_source->U_PIXELS;
209 uint8_t *p_v = p_source->V_PIXELS;
213 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
214 #define VEC_NEXT_LINES( ) \
216 p_line2 += p_dest->p->i_pitch; \
218 p_y2 += p_source->p[Y_PLANE].i_pitch;
220 #define VEC_LOAD_UV( ) \
221 u_vec = vec_ld( 0, p_u ); p_u += 16; \
222 v_vec = vec_ld( 0, p_v ); p_v += 16;
224 #define VEC_MERGE( a ) \
225 uv_vec = a( u_vec, v_vec ); \
226 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
227 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
228 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
229 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
230 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
231 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
233 vector unsigned char u_vec;
234 vector unsigned char v_vec;
235 vector unsigned char uv_vec;
236 vector unsigned char y_vec;
238 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
239 ( p_filter->fmt_in.video.i_height % 2 ) ) )
241 /* Width is a multiple of 32, we take 2 lines at a time */
242 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
245 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
248 VEC_MERGE( vec_mergeh );
249 VEC_MERGE( vec_mergel );
253 #warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
255 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
256 ( p_filter->fmt_in.video.i_height % 4 ) ) )
258 /* Width is only a multiple of 16, we take 4 lines at a time */
259 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
261 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
263 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
266 VEC_MERGE( vec_mergeh );
267 VEC_MERGE( vec_mergel );
270 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
272 VEC_MERGE( vec_mergeh );
274 /* Line 3 and 4, pixels 0 to 16 */
276 VEC_MERGE( vec_mergel );
278 /* Line 3 and 4, pixels 16 to ( width ) */
279 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
282 VEC_MERGE( vec_mergeh );
283 VEC_MERGE( vec_mergel );
290 /* Crap, use the C version */
291 #undef VEC_NEXT_LINES
296 const int i_source_margin = p_source->p[0].i_pitch
297 - p_source->p[0].i_visible_pitch;
298 const int i_source_margin_c = p_source->p[1].i_pitch
299 - p_source->p[1].i_visible_pitch;
300 const int i_dest_margin = p_dest->p->i_pitch
301 - p_dest->p->i_visible_pitch;
303 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
304 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
307 p_line2 += p_dest->p->i_pitch;
310 p_y2 += p_source->p[Y_PLANE].i_pitch;
312 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
313 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
321 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
323 MMX_CALL( MMX_YUV420_YUYV );
326 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
331 p_y1 += i_source_margin;
332 p_y2 += i_source_margin;
333 p_u += i_source_margin_c;
334 p_v += i_source_margin_c;
335 p_line1 += i_dest_margin;
336 p_line2 += i_dest_margin;
339 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
340 /* re-enable FPU registers */
344 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
348 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
350 ** SSE2 128 bits fetch/store instructions are faster
351 ** if memory access is 16 bytes aligned
354 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
355 ((intptr_t)p_line2|(intptr_t)p_y2))) )
357 /* use faster SSE2 aligned fetch and store */
358 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
361 p_line2 += p_dest->p->i_pitch;
364 p_y2 += p_source->p[Y_PLANE].i_pitch;
366 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
368 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
370 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
375 p_y1 += i_source_margin;
376 p_y2 += i_source_margin;
377 p_u += i_source_margin_c;
378 p_v += i_source_margin_c;
379 p_line1 += i_dest_margin;
380 p_line2 += i_dest_margin;
385 /* use slower SSE2 unaligned fetch and store */
386 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
389 p_line2 += p_dest->p->i_pitch;
392 p_y2 += p_source->p[Y_PLANE].i_pitch;
394 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
396 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
398 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
403 p_y1 += i_source_margin;
404 p_y2 += i_source_margin;
405 p_u += i_source_margin_c;
406 p_v += i_source_margin_c;
407 p_line1 += i_dest_margin;
408 p_line2 += i_dest_margin;
411 /* make sure all SSE2 stores are visible thereafter */
414 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
417 /*****************************************************************************
418 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
419 *****************************************************************************/
420 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
423 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
424 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
425 uint8_t *p_u = p_source->U_PIXELS;
426 uint8_t *p_v = p_source->V_PIXELS;
430 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
431 #define VEC_NEXT_LINES( ) \
433 p_line2 += p_dest->p->i_pitch; \
435 p_y2 += p_source->p[Y_PLANE].i_pitch;
437 #define VEC_LOAD_UV( ) \
438 u_vec = vec_ld( 0, p_u ); p_u += 16; \
439 v_vec = vec_ld( 0, p_v ); p_v += 16;
441 #define VEC_MERGE( a ) \
442 vu_vec = a( v_vec, u_vec ); \
443 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
444 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
445 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
446 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
447 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
448 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
450 vector unsigned char u_vec;
451 vector unsigned char v_vec;
452 vector unsigned char vu_vec;
453 vector unsigned char y_vec;
455 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
456 ( p_filter->fmt_in.video.i_height % 2 ) ) )
458 /* Width is a multiple of 32, we take 2 lines at a time */
459 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
462 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
465 VEC_MERGE( vec_mergeh );
466 VEC_MERGE( vec_mergel );
470 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
471 ( p_filter->fmt_in.video.i_height % 4 ) ) )
473 /* Width is only a multiple of 16, we take 4 lines at a time */
474 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
476 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
478 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
481 VEC_MERGE( vec_mergeh );
482 VEC_MERGE( vec_mergel );
485 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
487 VEC_MERGE( vec_mergeh );
489 /* Line 3 and 4, pixels 0 to 16 */
491 VEC_MERGE( vec_mergel );
493 /* Line 3 and 4, pixels 16 to ( width ) */
494 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
497 VEC_MERGE( vec_mergeh );
498 VEC_MERGE( vec_mergel );
504 /* Crap, use the C version */
505 #undef VEC_NEXT_LINES
510 const int i_source_margin = p_source->p[0].i_pitch
511 - p_source->p[0].i_visible_pitch;
512 const int i_source_margin_c = p_source->p[1].i_pitch
513 - p_source->p[1].i_visible_pitch;
514 const int i_dest_margin = p_dest->p->i_pitch
515 - p_dest->p->i_visible_pitch;
517 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
518 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
521 p_line2 += p_dest->p->i_pitch;
524 p_y2 += p_source->p[Y_PLANE].i_pitch;
526 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
528 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
534 MMX_CALL( MMX_YUV420_YVYU );
537 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
542 p_y1 += i_source_margin;
543 p_y2 += i_source_margin;
544 p_u += i_source_margin_c;
545 p_v += i_source_margin_c;
546 p_line1 += i_dest_margin;
547 p_line2 += i_dest_margin;
550 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
551 /* re-enable FPU registers */
555 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
559 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
561 ** SSE2 128 bits fetch/store instructions are faster
562 ** if memory access is 16 bytes aligned
564 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
565 ((intptr_t)p_line2|(intptr_t)p_y2))) )
567 /* use faster SSE2 aligned fetch and store */
568 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
571 p_line2 += p_dest->p->i_pitch;
574 p_y2 += p_source->p[Y_PLANE].i_pitch;
576 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
578 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
580 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
585 p_y1 += i_source_margin;
586 p_y2 += i_source_margin;
587 p_u += i_source_margin_c;
588 p_v += i_source_margin_c;
589 p_line1 += i_dest_margin;
590 p_line2 += i_dest_margin;
595 /* use slower SSE2 unaligned fetch and store */
596 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
599 p_line2 += p_dest->p->i_pitch;
602 p_y2 += p_source->p[Y_PLANE].i_pitch;
604 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
606 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
608 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
613 p_y1 += i_source_margin;
614 p_y2 += i_source_margin;
615 p_u += i_source_margin_c;
616 p_v += i_source_margin_c;
617 p_line1 += i_dest_margin;
618 p_line2 += i_dest_margin;
621 /* make sure all SSE2 stores are visible thereafter */
623 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
626 /*****************************************************************************
627 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
628 *****************************************************************************/
629 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
632 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
633 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
634 uint8_t *p_u = p_source->U_PIXELS;
635 uint8_t *p_v = p_source->V_PIXELS;
639 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
640 #define VEC_NEXT_LINES( ) \
642 p_line2 += p_dest->p->i_pitch; \
644 p_y2 += p_source->p[Y_PLANE].i_pitch;
646 #define VEC_LOAD_UV( ) \
647 u_vec = vec_ld( 0, p_u ); p_u += 16; \
648 v_vec = vec_ld( 0, p_v ); p_v += 16;
650 #define VEC_MERGE( a ) \
651 uv_vec = a( u_vec, v_vec ); \
652 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
653 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
654 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
655 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
656 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
657 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
659 vector unsigned char u_vec;
660 vector unsigned char v_vec;
661 vector unsigned char uv_vec;
662 vector unsigned char y_vec;
664 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
665 ( p_filter->fmt_in.video.i_height % 2 ) ) )
667 /* Width is a multiple of 32, we take 2 lines at a time */
668 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
671 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
674 VEC_MERGE( vec_mergeh );
675 VEC_MERGE( vec_mergel );
679 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
680 ( p_filter->fmt_in.video.i_height % 4 ) ) )
682 /* Width is only a multiple of 16, we take 4 lines at a time */
683 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
685 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
687 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
690 VEC_MERGE( vec_mergeh );
691 VEC_MERGE( vec_mergel );
694 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
696 VEC_MERGE( vec_mergeh );
698 /* Line 3 and 4, pixels 0 to 16 */
700 VEC_MERGE( vec_mergel );
702 /* Line 3 and 4, pixels 16 to ( width ) */
703 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
706 VEC_MERGE( vec_mergeh );
707 VEC_MERGE( vec_mergel );
713 /* Crap, use the C version */
714 #undef VEC_NEXT_LINES
719 const int i_source_margin = p_source->p[0].i_pitch
720 - p_source->p[0].i_visible_pitch;
721 const int i_source_margin_c = p_source->p[1].i_pitch
722 - p_source->p[1].i_visible_pitch;
723 const int i_dest_margin = p_dest->p->i_pitch
724 - p_dest->p->i_visible_pitch;
726 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
727 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
730 p_line2 += p_dest->p->i_pitch;
733 p_y2 += p_source->p[Y_PLANE].i_pitch;
735 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
737 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
743 MMX_CALL( MMX_YUV420_UYVY );
746 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
751 p_y1 += i_source_margin;
752 p_y2 += i_source_margin;
753 p_u += i_source_margin_c;
754 p_v += i_source_margin_c;
755 p_line1 += i_dest_margin;
756 p_line2 += i_dest_margin;
759 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
760 /* re-enable FPU registers */
764 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
768 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
770 ** SSE2 128 bits fetch/store instructions are faster
771 ** if memory access is 16 bytes aligned
773 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
774 ((intptr_t)p_line2|(intptr_t)p_y2))) )
776 /* use faster SSE2 aligned fetch and store */
777 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
780 p_line2 += p_dest->p->i_pitch;
783 p_y2 += p_source->p[Y_PLANE].i_pitch;
785 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
787 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
789 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
794 p_y1 += i_source_margin;
795 p_y2 += i_source_margin;
796 p_u += i_source_margin_c;
797 p_v += i_source_margin_c;
798 p_line1 += i_dest_margin;
799 p_line2 += i_dest_margin;
804 /* use slower SSE2 unaligned fetch and store */
805 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
808 p_line2 += p_dest->p->i_pitch;
811 p_y2 += p_source->p[Y_PLANE].i_pitch;
813 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
815 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
817 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
822 p_y1 += i_source_margin;
823 p_y2 += i_source_margin;
824 p_u += i_source_margin_c;
825 p_v += i_source_margin_c;
826 p_line1 += i_dest_margin;
827 p_line2 += i_dest_margin;
830 /* make sure all SSE2 stores are visible thereafter */
832 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
835 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
836 /*****************************************************************************
837 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
838 *****************************************************************************/
839 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
842 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
844 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
847 /*****************************************************************************
848 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
849 *****************************************************************************/
850 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
853 uint8_t *p_line1 = p_dest->p->p_pixels +
854 p_dest->p->i_visible_lines * p_dest->p->i_pitch
855 + p_dest->p->i_pitch;
856 uint8_t *p_line2 = p_dest->p->p_pixels +
857 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
858 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
859 uint8_t *p_u = p_source->U_PIXELS;
860 uint8_t *p_v = p_source->V_PIXELS;
864 const int i_source_margin = p_source->p[0].i_pitch
865 - p_source->p[0].i_visible_pitch;
866 const int i_source_margin_c = p_source->p[1].i_pitch
867 - p_source->p[1].i_visible_pitch;
868 const int i_dest_margin = p_dest->p->i_pitch
869 - p_dest->p->i_visible_pitch;
871 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
872 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
874 p_line1 -= 3 * p_dest->p->i_pitch;
875 p_line2 -= 3 * p_dest->p->i_pitch;
878 p_y2 += p_source->p[Y_PLANE].i_pitch;
880 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
882 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
888 MMX_CALL( MMX_YUV420_UYVY );
891 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
896 p_y1 += i_source_margin;
897 p_y2 += i_source_margin;
898 p_u += i_source_margin_c;
899 p_v += i_source_margin_c;
900 p_line1 += i_dest_margin;
901 p_line2 += i_dest_margin;
904 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
905 /* re-enable FPU registers */
909 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
911 ** SSE2 128 bits fetch/store instructions are faster
912 ** if memory access is 16 bytes aligned
914 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
915 ((intptr_t)p_line2|(intptr_t)p_y2))) )
917 /* use faster SSE2 aligned fetch and store */
918 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
921 p_line2 += p_dest->p->i_pitch;
924 p_y2 += p_source->p[Y_PLANE].i_pitch;
926 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
928 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
930 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
935 p_y1 += i_source_margin;
936 p_y2 += i_source_margin;
937 p_u += i_source_margin_c;
938 p_v += i_source_margin_c;
939 p_line1 += i_dest_margin;
940 p_line2 += i_dest_margin;
945 /* use slower SSE2 unaligned fetch and store */
946 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
949 p_line2 += p_dest->p->i_pitch;
952 p_y2 += p_source->p[Y_PLANE].i_pitch;
954 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
956 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
958 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
963 p_y1 += i_source_margin;
964 p_y2 += i_source_margin;
965 p_u += i_source_margin_c;
966 p_v += i_source_margin_c;
967 p_line1 += i_dest_margin;
968 p_line2 += i_dest_margin;
971 /* make sure all SSE2 stores are visible thereafter */
973 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
975 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
977 /*****************************************************************************
978 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
979 *****************************************************************************/
980 #if defined (MODULE_NAME_IS_i420_yuy2)
981 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
984 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
985 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
986 uint8_t *p_u = p_source->U_PIXELS;
987 uint8_t *p_v = p_source->V_PIXELS;
991 const int i_source_margin = p_source->p[0].i_pitch
992 - p_source->p[0].i_visible_pitch;
993 const int i_source_margin_c = p_source->p[1].i_pitch
994 - p_source->p[1].i_visible_pitch;
995 const int i_dest_margin = p_dest->p->i_pitch
996 - p_dest->p->i_visible_pitch;
998 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
1001 p_line2 += p_dest->p->i_pitch;
1004 p_y2 += p_source->p[Y_PLANE].i_pitch;
1006 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
1012 p_y1 += i_source_margin;
1013 p_y2 += i_source_margin;
1014 p_u += i_source_margin_c;
1015 p_v += i_source_margin_c;
1016 p_line1 += i_dest_margin;
1017 p_line2 += i_dest_margin;