1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 VLC authors and VideoLAN
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
38 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
42 #include "i420_yuy2.h"
44 #define SRC_FOURCC "I420,IYUV,YV12"
46 #if defined (MODULE_NAME_IS_i420_yuy2)
47 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,Y211"
49 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
50 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV"
51 # define VLC_TARGET VLC_MMX
52 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
53 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV"
54 # define VLC_TARGET VLC_SSE
55 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
56 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
60 /*****************************************************************************
61 * Local and extern prototypes.
62 *****************************************************************************/
63 static int Activate ( vlc_object_t * );
65 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
66 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
67 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
68 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
69 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
70 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
71 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
72 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
73 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
75 #if defined (MODULE_NAME_IS_i420_yuy2)
76 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
77 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
80 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
81 /* Initialize MMX-specific constants */
82 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
83 static const uint64_t i_80w = 0x0000000080808080ULL;
86 /*****************************************************************************
88 *****************************************************************************/
90 #if defined (MODULE_NAME_IS_i420_yuy2)
91 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
92 set_capability( "video filter2", 80 )
93 # define vlc_CPU_capable() (true)
94 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
95 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
96 set_capability( "video filter2", 160 )
97 # define vlc_CPU_capable() vlc_CPU_MMX()
98 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
99 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
100 set_capability( "video filter2", 250 )
101 # define vlc_CPU_capable() vlc_CPU_SSE2()
102 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
104 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
105 set_capability( "video filter2", 250 )
106 # define vlc_CPU_capable() vlc_CPU_ALTIVEC()
108 set_callbacks( Activate, NULL )
111 /*****************************************************************************
112 * Activate: allocate a chroma function
113 *****************************************************************************
114 * This function allocates and initializes a chroma function
115 *****************************************************************************/
116 static int Activate( vlc_object_t *p_this )
118 filter_t *p_filter = (filter_t *)p_this;
120 if( !vlc_CPU_capable() )
122 if( p_filter->fmt_in.video.i_width & 1
123 || p_filter->fmt_in.video.i_height & 1 )
128 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
129 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height
130 || p_filter->fmt_in.video.orientation != p_filter->fmt_out.video.orientation )
133 switch( p_filter->fmt_in.video.i_chroma )
135 // case VLC_CODEC_YV12: FIXME invert U and V in the filters :)
137 switch( p_filter->fmt_out.video.i_chroma )
140 p_filter->pf_video_filter = I420_YUY2_Filter;
144 p_filter->pf_video_filter = I420_YVYU_Filter;
148 p_filter->pf_video_filter = I420_UYVY_Filter;
150 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
151 case VLC_FOURCC('I','U','Y','V'):
152 p_filter->pf_video_filter = I420_IUYV_Filter;
156 #if defined (MODULE_NAME_IS_i420_yuy2)
158 p_filter->pf_video_filter = I420_Y211_Filter;
175 static inline unsigned long long read_cycles(void)
177 unsigned long long v;
178 __asm__ __volatile__("rdtsc" : "=A" (v): );
184 /* Following functions are local */
186 VIDEO_FILTER_WRAPPER( I420_YUY2 )
187 VIDEO_FILTER_WRAPPER( I420_YVYU )
188 VIDEO_FILTER_WRAPPER( I420_UYVY )
189 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
190 VIDEO_FILTER_WRAPPER( I420_IUYV )
192 #if defined (MODULE_NAME_IS_i420_yuy2)
193 VIDEO_FILTER_WRAPPER( I420_Y211 )
196 /*****************************************************************************
197 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
198 *****************************************************************************/
200 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
203 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
204 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
205 uint8_t *p_u = p_source->U_PIXELS;
206 uint8_t *p_v = p_source->V_PIXELS;
210 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
211 #define VEC_NEXT_LINES( ) \
213 p_line2 += p_dest->p->i_pitch; \
215 p_y2 += p_source->p[Y_PLANE].i_pitch;
217 #define VEC_LOAD_UV( ) \
218 u_vec = vec_ld( 0, p_u ); p_u += 16; \
219 v_vec = vec_ld( 0, p_v ); p_v += 16;
221 #define VEC_MERGE( a ) \
222 uv_vec = a( u_vec, v_vec ); \
223 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
224 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
225 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
226 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
227 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
228 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
230 vector unsigned char u_vec;
231 vector unsigned char v_vec;
232 vector unsigned char uv_vec;
233 vector unsigned char y_vec;
235 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
236 ( p_filter->fmt_in.video.i_height % 2 ) ) )
238 /* Width is a multiple of 32, we take 2 lines at a time */
239 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
242 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
245 VEC_MERGE( vec_mergeh );
246 VEC_MERGE( vec_mergel );
250 #warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
252 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
253 ( p_filter->fmt_in.video.i_height % 4 ) ) )
255 /* Width is only a multiple of 16, we take 4 lines at a time */
256 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
258 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
260 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
263 VEC_MERGE( vec_mergeh );
264 VEC_MERGE( vec_mergel );
267 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
269 VEC_MERGE( vec_mergeh );
271 /* Line 3 and 4, pixels 0 to 16 */
273 VEC_MERGE( vec_mergel );
275 /* Line 3 and 4, pixels 16 to ( width ) */
276 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
279 VEC_MERGE( vec_mergeh );
280 VEC_MERGE( vec_mergel );
287 /* Crap, use the C version */
288 #undef VEC_NEXT_LINES
293 const int i_source_margin = p_source->p[0].i_pitch
294 - p_source->p[0].i_visible_pitch;
295 const int i_source_margin_c = p_source->p[1].i_pitch
296 - p_source->p[1].i_visible_pitch;
297 const int i_dest_margin = p_dest->p->i_pitch
298 - p_dest->p->i_visible_pitch;
300 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
301 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
304 p_line2 += p_dest->p->i_pitch;
307 p_y2 += p_source->p[Y_PLANE].i_pitch;
309 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
310 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
318 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
320 MMX_CALL( MMX_YUV420_YUYV );
323 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
328 p_y1 += i_source_margin;
329 p_y2 += i_source_margin;
330 p_u += i_source_margin_c;
331 p_v += i_source_margin_c;
332 p_line1 += i_dest_margin;
333 p_line2 += i_dest_margin;
336 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
337 /* re-enable FPU registers */
341 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
345 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
347 ** SSE2 128 bits fetch/store instructions are faster
348 ** if memory access is 16 bytes aligned
351 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
352 ((intptr_t)p_line2|(intptr_t)p_y2))) )
354 /* use faster SSE2 aligned fetch and store */
355 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
358 p_line2 += p_dest->p->i_pitch;
361 p_y2 += p_source->p[Y_PLANE].i_pitch;
363 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
365 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
367 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
372 p_y1 += i_source_margin;
373 p_y2 += i_source_margin;
374 p_u += i_source_margin_c;
375 p_v += i_source_margin_c;
376 p_line1 += i_dest_margin;
377 p_line2 += i_dest_margin;
382 /* use slower SSE2 unaligned fetch and store */
383 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
386 p_line2 += p_dest->p->i_pitch;
389 p_y2 += p_source->p[Y_PLANE].i_pitch;
391 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
393 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
395 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
400 p_y1 += i_source_margin;
401 p_y2 += i_source_margin;
402 p_u += i_source_margin_c;
403 p_v += i_source_margin_c;
404 p_line1 += i_dest_margin;
405 p_line2 += i_dest_margin;
408 /* make sure all SSE2 stores are visible thereafter */
411 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
414 /*****************************************************************************
415 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
416 *****************************************************************************/
418 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
421 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
422 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
423 uint8_t *p_u = p_source->U_PIXELS;
424 uint8_t *p_v = p_source->V_PIXELS;
428 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
429 #define VEC_NEXT_LINES( ) \
431 p_line2 += p_dest->p->i_pitch; \
433 p_y2 += p_source->p[Y_PLANE].i_pitch;
435 #define VEC_LOAD_UV( ) \
436 u_vec = vec_ld( 0, p_u ); p_u += 16; \
437 v_vec = vec_ld( 0, p_v ); p_v += 16;
439 #define VEC_MERGE( a ) \
440 vu_vec = a( v_vec, u_vec ); \
441 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
442 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
443 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
444 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
445 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
446 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
448 vector unsigned char u_vec;
449 vector unsigned char v_vec;
450 vector unsigned char vu_vec;
451 vector unsigned char y_vec;
453 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
454 ( p_filter->fmt_in.video.i_height % 2 ) ) )
456 /* Width is a multiple of 32, we take 2 lines at a time */
457 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
460 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
463 VEC_MERGE( vec_mergeh );
464 VEC_MERGE( vec_mergel );
468 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
469 ( p_filter->fmt_in.video.i_height % 4 ) ) )
471 /* Width is only a multiple of 16, we take 4 lines at a time */
472 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
474 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
476 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
479 VEC_MERGE( vec_mergeh );
480 VEC_MERGE( vec_mergel );
483 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
485 VEC_MERGE( vec_mergeh );
487 /* Line 3 and 4, pixels 0 to 16 */
489 VEC_MERGE( vec_mergel );
491 /* Line 3 and 4, pixels 16 to ( width ) */
492 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
495 VEC_MERGE( vec_mergeh );
496 VEC_MERGE( vec_mergel );
502 /* Crap, use the C version */
503 #undef VEC_NEXT_LINES
508 const int i_source_margin = p_source->p[0].i_pitch
509 - p_source->p[0].i_visible_pitch;
510 const int i_source_margin_c = p_source->p[1].i_pitch
511 - p_source->p[1].i_visible_pitch;
512 const int i_dest_margin = p_dest->p->i_pitch
513 - p_dest->p->i_visible_pitch;
515 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
516 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
519 p_line2 += p_dest->p->i_pitch;
522 p_y2 += p_source->p[Y_PLANE].i_pitch;
524 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
526 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
532 MMX_CALL( MMX_YUV420_YVYU );
535 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
540 p_y1 += i_source_margin;
541 p_y2 += i_source_margin;
542 p_u += i_source_margin_c;
543 p_v += i_source_margin_c;
544 p_line1 += i_dest_margin;
545 p_line2 += i_dest_margin;
548 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
549 /* re-enable FPU registers */
553 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
557 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
559 ** SSE2 128 bits fetch/store instructions are faster
560 ** if memory access is 16 bytes aligned
562 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
563 ((intptr_t)p_line2|(intptr_t)p_y2))) )
565 /* use faster SSE2 aligned fetch and store */
566 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
569 p_line2 += p_dest->p->i_pitch;
572 p_y2 += p_source->p[Y_PLANE].i_pitch;
574 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
576 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
578 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
583 p_y1 += i_source_margin;
584 p_y2 += i_source_margin;
585 p_u += i_source_margin_c;
586 p_v += i_source_margin_c;
587 p_line1 += i_dest_margin;
588 p_line2 += i_dest_margin;
593 /* use slower SSE2 unaligned fetch and store */
594 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
597 p_line2 += p_dest->p->i_pitch;
600 p_y2 += p_source->p[Y_PLANE].i_pitch;
602 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
604 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
606 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
611 p_y1 += i_source_margin;
612 p_y2 += i_source_margin;
613 p_u += i_source_margin_c;
614 p_v += i_source_margin_c;
615 p_line1 += i_dest_margin;
616 p_line2 += i_dest_margin;
619 /* make sure all SSE2 stores are visible thereafter */
621 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
624 /*****************************************************************************
625 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
626 *****************************************************************************/
628 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
631 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
632 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
633 uint8_t *p_u = p_source->U_PIXELS;
634 uint8_t *p_v = p_source->V_PIXELS;
638 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
639 #define VEC_NEXT_LINES( ) \
641 p_line2 += p_dest->p->i_pitch; \
643 p_y2 += p_source->p[Y_PLANE].i_pitch;
645 #define VEC_LOAD_UV( ) \
646 u_vec = vec_ld( 0, p_u ); p_u += 16; \
647 v_vec = vec_ld( 0, p_v ); p_v += 16;
649 #define VEC_MERGE( a ) \
650 uv_vec = a( u_vec, v_vec ); \
651 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
652 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
653 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
654 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
655 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
656 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
658 vector unsigned char u_vec;
659 vector unsigned char v_vec;
660 vector unsigned char uv_vec;
661 vector unsigned char y_vec;
663 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
664 ( p_filter->fmt_in.video.i_height % 2 ) ) )
666 /* Width is a multiple of 32, we take 2 lines at a time */
667 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
670 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
673 VEC_MERGE( vec_mergeh );
674 VEC_MERGE( vec_mergel );
678 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
679 ( p_filter->fmt_in.video.i_height % 4 ) ) )
681 /* Width is only a multiple of 16, we take 4 lines at a time */
682 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
684 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
686 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
689 VEC_MERGE( vec_mergeh );
690 VEC_MERGE( vec_mergel );
693 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
695 VEC_MERGE( vec_mergeh );
697 /* Line 3 and 4, pixels 0 to 16 */
699 VEC_MERGE( vec_mergel );
701 /* Line 3 and 4, pixels 16 to ( width ) */
702 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
705 VEC_MERGE( vec_mergeh );
706 VEC_MERGE( vec_mergel );
712 /* Crap, use the C version */
713 #undef VEC_NEXT_LINES
718 const int i_source_margin = p_source->p[0].i_pitch
719 - p_source->p[0].i_visible_pitch;
720 const int i_source_margin_c = p_source->p[1].i_pitch
721 - p_source->p[1].i_visible_pitch;
722 const int i_dest_margin = p_dest->p->i_pitch
723 - p_dest->p->i_visible_pitch;
725 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
726 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
729 p_line2 += p_dest->p->i_pitch;
732 p_y2 += p_source->p[Y_PLANE].i_pitch;
734 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
736 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
742 MMX_CALL( MMX_YUV420_UYVY );
745 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
750 p_y1 += i_source_margin;
751 p_y2 += i_source_margin;
752 p_u += i_source_margin_c;
753 p_v += i_source_margin_c;
754 p_line1 += i_dest_margin;
755 p_line2 += i_dest_margin;
758 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
759 /* re-enable FPU registers */
763 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
767 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
769 ** SSE2 128 bits fetch/store instructions are faster
770 ** if memory access is 16 bytes aligned
772 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
773 ((intptr_t)p_line2|(intptr_t)p_y2))) )
775 /* use faster SSE2 aligned fetch and store */
776 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
779 p_line2 += p_dest->p->i_pitch;
782 p_y2 += p_source->p[Y_PLANE].i_pitch;
784 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
786 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
788 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
793 p_y1 += i_source_margin;
794 p_y2 += i_source_margin;
795 p_u += i_source_margin_c;
796 p_v += i_source_margin_c;
797 p_line1 += i_dest_margin;
798 p_line2 += i_dest_margin;
803 /* use slower SSE2 unaligned fetch and store */
804 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
807 p_line2 += p_dest->p->i_pitch;
810 p_y2 += p_source->p[Y_PLANE].i_pitch;
812 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
814 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
816 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
821 p_y1 += i_source_margin;
822 p_y2 += i_source_margin;
823 p_u += i_source_margin_c;
824 p_v += i_source_margin_c;
825 p_line1 += i_dest_margin;
826 p_line2 += i_dest_margin;
829 /* make sure all SSE2 stores are visible thereafter */
831 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
834 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
835 /*****************************************************************************
836 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
837 *****************************************************************************/
838 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
841 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
843 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
845 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
847 /*****************************************************************************
848 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
849 *****************************************************************************/
850 #if defined (MODULE_NAME_IS_i420_yuy2)
851 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
854 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
855 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
856 uint8_t *p_u = p_source->U_PIXELS;
857 uint8_t *p_v = p_source->V_PIXELS;
861 const int i_source_margin = p_source->p[0].i_pitch
862 - p_source->p[0].i_visible_pitch;
863 const int i_source_margin_c = p_source->p[1].i_pitch
864 - p_source->p[1].i_visible_pitch;
865 const int i_dest_margin = p_dest->p->i_pitch
866 - p_dest->p->i_visible_pitch;
868 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
871 p_line2 += p_dest->p->i_pitch;
874 p_y2 += p_source->p[Y_PLANE].i_pitch;
876 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
882 p_y1 += i_source_margin;
883 p_y2 += i_source_margin;
884 p_u += i_source_margin_c;
885 p_v += i_source_margin_c;
886 p_line1 += i_dest_margin;
887 p_line2 += i_dest_margin;