1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
37 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
41 #include "i420_yuy2.h"
43 #define SRC_FOURCC "I420,IYUV,YV12"
45 #if defined (MODULE_NAME_IS_i420_yuy2)
46 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
47 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
48 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
49 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
50 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
51 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
52 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
55 /*****************************************************************************
56 * Local and extern prototypes.
57 *****************************************************************************/
58 static int Activate ( vlc_object_t * );
60 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
61 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
62 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
63 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
64 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
65 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
66 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
67 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
68 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
69 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
70 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
72 #if defined (MODULE_NAME_IS_i420_yuy2)
73 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
74 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
77 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
78 /* Initialize MMX-specific constants */
79 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
80 static const uint64_t i_80w = 0x0000000080808080ULL;
83 /*****************************************************************************
85 *****************************************************************************/
87 #if defined (MODULE_NAME_IS_i420_yuy2)
88 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
89 set_capability( "video filter2", 80 )
90 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
91 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
92 set_capability( "video filter2", 160 )
93 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
94 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
95 set_capability( "video filter2", 250 )
96 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
98 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
99 set_capability( "video filter2", 250 )
101 set_callbacks( Activate, NULL )
104 /*****************************************************************************
105 * Activate: allocate a chroma function
106 *****************************************************************************
107 * This function allocates and initializes a chroma function
108 *****************************************************************************/
109 static int Activate( vlc_object_t *p_this )
111 filter_t *p_filter = (filter_t *)p_this;
113 if( p_filter->fmt_in.video.i_width & 1
114 || p_filter->fmt_in.video.i_height & 1 )
119 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
120 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
123 switch( p_filter->fmt_in.video.i_chroma )
127 switch( p_filter->fmt_out.video.i_chroma )
130 p_filter->pf_video_filter = I420_YUY2_Filter;
134 p_filter->pf_video_filter = I420_YVYU_Filter;
138 p_filter->pf_video_filter = I420_UYVY_Filter;
140 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
141 case VLC_FOURCC('I','U','Y','V'):
142 p_filter->pf_video_filter = I420_IUYV_Filter;
146 p_filter->pf_video_filter = I420_cyuv_Filter;
150 #if defined (MODULE_NAME_IS_i420_yuy2)
152 p_filter->pf_video_filter = I420_Y211_Filter;
169 static inline unsigned long long read_cycles(void)
171 unsigned long long v;
172 __asm__ __volatile__("rdtsc" : "=A" (v): );
178 /* Following functions are local */
180 VIDEO_FILTER_WRAPPER( I420_YUY2 )
181 VIDEO_FILTER_WRAPPER( I420_YVYU )
182 VIDEO_FILTER_WRAPPER( I420_UYVY )
183 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
184 VIDEO_FILTER_WRAPPER( I420_IUYV )
185 VIDEO_FILTER_WRAPPER( I420_cyuv )
187 #if defined (MODULE_NAME_IS_i420_yuy2)
188 VIDEO_FILTER_WRAPPER( I420_Y211 )
191 /*****************************************************************************
192 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
193 *****************************************************************************/
194 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
197 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
198 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
199 uint8_t *p_u = p_source->U_PIXELS;
200 uint8_t *p_v = p_source->V_PIXELS;
204 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
205 #define VEC_NEXT_LINES( ) \
207 p_line2 += p_dest->p->i_pitch; \
209 p_y2 += p_source->p[Y_PLANE].i_pitch;
211 #define VEC_LOAD_UV( ) \
212 u_vec = vec_ld( 0, p_u ); p_u += 16; \
213 v_vec = vec_ld( 0, p_v ); p_v += 16;
215 #define VEC_MERGE( a ) \
216 uv_vec = a( u_vec, v_vec ); \
217 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
218 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
219 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
220 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
221 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
222 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
224 vector unsigned char u_vec;
225 vector unsigned char v_vec;
226 vector unsigned char uv_vec;
227 vector unsigned char y_vec;
229 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
230 ( p_filter->fmt_in.video.i_height % 2 ) ) )
232 /* Width is a multiple of 32, we take 2 lines at a time */
233 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
236 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
239 VEC_MERGE( vec_mergeh );
240 VEC_MERGE( vec_mergel );
244 #warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
246 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
247 ( p_filter->fmt_in.video.i_height % 4 ) ) )
249 /* Width is only a multiple of 16, we take 4 lines at a time */
250 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
252 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
254 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
257 VEC_MERGE( vec_mergeh );
258 VEC_MERGE( vec_mergel );
261 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
263 VEC_MERGE( vec_mergeh );
265 /* Line 3 and 4, pixels 0 to 16 */
267 VEC_MERGE( vec_mergel );
269 /* Line 3 and 4, pixels 16 to ( width ) */
270 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
273 VEC_MERGE( vec_mergeh );
274 VEC_MERGE( vec_mergel );
281 /* Crap, use the C version */
282 #undef VEC_NEXT_LINES
287 const int i_source_margin = p_source->p[0].i_pitch
288 - p_source->p[0].i_visible_pitch;
289 const int i_source_margin_c = p_source->p[1].i_pitch
290 - p_source->p[1].i_visible_pitch;
291 const int i_dest_margin = p_dest->p->i_pitch
292 - p_dest->p->i_visible_pitch;
294 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
295 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
298 p_line2 += p_dest->p->i_pitch;
301 p_y2 += p_source->p[Y_PLANE].i_pitch;
303 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
304 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
312 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
314 MMX_CALL( MMX_YUV420_YUYV );
317 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
322 p_y1 += i_source_margin;
323 p_y2 += i_source_margin;
324 p_u += i_source_margin_c;
325 p_v += i_source_margin_c;
326 p_line1 += i_dest_margin;
327 p_line2 += i_dest_margin;
330 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
331 /* re-enable FPU registers */
335 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
339 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
341 ** SSE2 128 bits fetch/store instructions are faster
342 ** if memory access is 16 bytes aligned
345 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
346 ((intptr_t)p_line2|(intptr_t)p_y2))) )
348 /* use faster SSE2 aligned fetch and store */
349 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
352 p_line2 += p_dest->p->i_pitch;
355 p_y2 += p_source->p[Y_PLANE].i_pitch;
357 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
359 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
361 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
366 p_y1 += i_source_margin;
367 p_y2 += i_source_margin;
368 p_u += i_source_margin_c;
369 p_v += i_source_margin_c;
370 p_line1 += i_dest_margin;
371 p_line2 += i_dest_margin;
376 /* use slower SSE2 unaligned fetch and store */
377 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
380 p_line2 += p_dest->p->i_pitch;
383 p_y2 += p_source->p[Y_PLANE].i_pitch;
385 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
387 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
389 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
394 p_y1 += i_source_margin;
395 p_y2 += i_source_margin;
396 p_u += i_source_margin_c;
397 p_v += i_source_margin_c;
398 p_line1 += i_dest_margin;
399 p_line2 += i_dest_margin;
402 /* make sure all SSE2 stores are visible thereafter */
405 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
408 /*****************************************************************************
409 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
410 *****************************************************************************/
411 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
414 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
415 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
416 uint8_t *p_u = p_source->U_PIXELS;
417 uint8_t *p_v = p_source->V_PIXELS;
421 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
422 #define VEC_NEXT_LINES( ) \
424 p_line2 += p_dest->p->i_pitch; \
426 p_y2 += p_source->p[Y_PLANE].i_pitch;
428 #define VEC_LOAD_UV( ) \
429 u_vec = vec_ld( 0, p_u ); p_u += 16; \
430 v_vec = vec_ld( 0, p_v ); p_v += 16;
432 #define VEC_MERGE( a ) \
433 vu_vec = a( v_vec, u_vec ); \
434 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
435 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
436 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
437 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
438 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
439 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
441 vector unsigned char u_vec;
442 vector unsigned char v_vec;
443 vector unsigned char vu_vec;
444 vector unsigned char y_vec;
446 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
447 ( p_filter->fmt_in.video.i_height % 2 ) ) )
449 /* Width is a multiple of 32, we take 2 lines at a time */
450 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
453 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
456 VEC_MERGE( vec_mergeh );
457 VEC_MERGE( vec_mergel );
461 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
462 ( p_filter->fmt_in.video.i_height % 4 ) ) )
464 /* Width is only a multiple of 16, we take 4 lines at a time */
465 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
467 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
469 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
472 VEC_MERGE( vec_mergeh );
473 VEC_MERGE( vec_mergel );
476 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
478 VEC_MERGE( vec_mergeh );
480 /* Line 3 and 4, pixels 0 to 16 */
482 VEC_MERGE( vec_mergel );
484 /* Line 3 and 4, pixels 16 to ( width ) */
485 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
488 VEC_MERGE( vec_mergeh );
489 VEC_MERGE( vec_mergel );
495 /* Crap, use the C version */
496 #undef VEC_NEXT_LINES
501 const int i_source_margin = p_source->p[0].i_pitch
502 - p_source->p[0].i_visible_pitch;
503 const int i_source_margin_c = p_source->p[1].i_pitch
504 - p_source->p[1].i_visible_pitch;
505 const int i_dest_margin = p_dest->p->i_pitch
506 - p_dest->p->i_visible_pitch;
508 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
509 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
512 p_line2 += p_dest->p->i_pitch;
515 p_y2 += p_source->p[Y_PLANE].i_pitch;
517 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
519 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
525 MMX_CALL( MMX_YUV420_YVYU );
528 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
533 p_y1 += i_source_margin;
534 p_y2 += i_source_margin;
535 p_u += i_source_margin_c;
536 p_v += i_source_margin_c;
537 p_line1 += i_dest_margin;
538 p_line2 += i_dest_margin;
541 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
542 /* re-enable FPU registers */
546 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
550 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
552 ** SSE2 128 bits fetch/store instructions are faster
553 ** if memory access is 16 bytes aligned
555 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
556 ((intptr_t)p_line2|(intptr_t)p_y2))) )
558 /* use faster SSE2 aligned fetch and store */
559 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
562 p_line2 += p_dest->p->i_pitch;
565 p_y2 += p_source->p[Y_PLANE].i_pitch;
567 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
569 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
571 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
576 p_y1 += i_source_margin;
577 p_y2 += i_source_margin;
578 p_u += i_source_margin_c;
579 p_v += i_source_margin_c;
580 p_line1 += i_dest_margin;
581 p_line2 += i_dest_margin;
586 /* use slower SSE2 unaligned fetch and store */
587 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
590 p_line2 += p_dest->p->i_pitch;
593 p_y2 += p_source->p[Y_PLANE].i_pitch;
595 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
597 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
599 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
604 p_y1 += i_source_margin;
605 p_y2 += i_source_margin;
606 p_u += i_source_margin_c;
607 p_v += i_source_margin_c;
608 p_line1 += i_dest_margin;
609 p_line2 += i_dest_margin;
612 /* make sure all SSE2 stores are visible thereafter */
614 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
617 /*****************************************************************************
618 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
619 *****************************************************************************/
620 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
623 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
624 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
625 uint8_t *p_u = p_source->U_PIXELS;
626 uint8_t *p_v = p_source->V_PIXELS;
630 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
631 #define VEC_NEXT_LINES( ) \
633 p_line2 += p_dest->p->i_pitch; \
635 p_y2 += p_source->p[Y_PLANE].i_pitch;
637 #define VEC_LOAD_UV( ) \
638 u_vec = vec_ld( 0, p_u ); p_u += 16; \
639 v_vec = vec_ld( 0, p_v ); p_v += 16;
641 #define VEC_MERGE( a ) \
642 uv_vec = a( u_vec, v_vec ); \
643 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
644 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
645 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
646 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
647 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
648 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
650 vector unsigned char u_vec;
651 vector unsigned char v_vec;
652 vector unsigned char uv_vec;
653 vector unsigned char y_vec;
655 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
656 ( p_filter->fmt_in.video.i_height % 2 ) ) )
658 /* Width is a multiple of 32, we take 2 lines at a time */
659 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
662 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
665 VEC_MERGE( vec_mergeh );
666 VEC_MERGE( vec_mergel );
670 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
671 ( p_filter->fmt_in.video.i_height % 4 ) ) )
673 /* Width is only a multiple of 16, we take 4 lines at a time */
674 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
676 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
678 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
681 VEC_MERGE( vec_mergeh );
682 VEC_MERGE( vec_mergel );
685 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
687 VEC_MERGE( vec_mergeh );
689 /* Line 3 and 4, pixels 0 to 16 */
691 VEC_MERGE( vec_mergel );
693 /* Line 3 and 4, pixels 16 to ( width ) */
694 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
697 VEC_MERGE( vec_mergeh );
698 VEC_MERGE( vec_mergel );
704 /* Crap, use the C version */
705 #undef VEC_NEXT_LINES
710 const int i_source_margin = p_source->p[0].i_pitch
711 - p_source->p[0].i_visible_pitch;
712 const int i_source_margin_c = p_source->p[1].i_pitch
713 - p_source->p[1].i_visible_pitch;
714 const int i_dest_margin = p_dest->p->i_pitch
715 - p_dest->p->i_visible_pitch;
717 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
718 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
721 p_line2 += p_dest->p->i_pitch;
724 p_y2 += p_source->p[Y_PLANE].i_pitch;
726 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
728 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
734 MMX_CALL( MMX_YUV420_UYVY );
737 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
742 p_y1 += i_source_margin;
743 p_y2 += i_source_margin;
744 p_u += i_source_margin_c;
745 p_v += i_source_margin_c;
746 p_line1 += i_dest_margin;
747 p_line2 += i_dest_margin;
750 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
751 /* re-enable FPU registers */
755 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
759 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
761 ** SSE2 128 bits fetch/store instructions are faster
762 ** if memory access is 16 bytes aligned
764 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
765 ((intptr_t)p_line2|(intptr_t)p_y2))) )
767 /* use faster SSE2 aligned fetch and store */
768 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
771 p_line2 += p_dest->p->i_pitch;
774 p_y2 += p_source->p[Y_PLANE].i_pitch;
776 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
778 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
780 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
785 p_y1 += i_source_margin;
786 p_y2 += i_source_margin;
787 p_u += i_source_margin_c;
788 p_v += i_source_margin_c;
789 p_line1 += i_dest_margin;
790 p_line2 += i_dest_margin;
795 /* use slower SSE2 unaligned fetch and store */
796 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
799 p_line2 += p_dest->p->i_pitch;
802 p_y2 += p_source->p[Y_PLANE].i_pitch;
804 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
806 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
808 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
813 p_y1 += i_source_margin;
814 p_y2 += i_source_margin;
815 p_u += i_source_margin_c;
816 p_v += i_source_margin_c;
817 p_line1 += i_dest_margin;
818 p_line2 += i_dest_margin;
821 /* make sure all SSE2 stores are visible thereafter */
823 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
826 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
827 /*****************************************************************************
828 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
829 *****************************************************************************/
830 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
833 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
835 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
838 /*****************************************************************************
839 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
840 *****************************************************************************/
841 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
844 uint8_t *p_line1 = p_dest->p->p_pixels +
845 p_dest->p->i_visible_lines * p_dest->p->i_pitch
846 + p_dest->p->i_pitch;
847 uint8_t *p_line2 = p_dest->p->p_pixels +
848 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
849 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
850 uint8_t *p_u = p_source->U_PIXELS;
851 uint8_t *p_v = p_source->V_PIXELS;
855 const int i_source_margin = p_source->p[0].i_pitch
856 - p_source->p[0].i_visible_pitch;
857 const int i_source_margin_c = p_source->p[1].i_pitch
858 - p_source->p[1].i_visible_pitch;
859 const int i_dest_margin = p_dest->p->i_pitch
860 - p_dest->p->i_visible_pitch;
862 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
863 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
865 p_line1 -= 3 * p_dest->p->i_pitch;
866 p_line2 -= 3 * p_dest->p->i_pitch;
869 p_y2 += p_source->p[Y_PLANE].i_pitch;
871 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
873 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
879 MMX_CALL( MMX_YUV420_UYVY );
882 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
887 p_y1 += i_source_margin;
888 p_y2 += i_source_margin;
889 p_u += i_source_margin_c;
890 p_v += i_source_margin_c;
891 p_line1 += i_dest_margin;
892 p_line2 += i_dest_margin;
895 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
896 /* re-enable FPU registers */
900 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
902 ** SSE2 128 bits fetch/store instructions are faster
903 ** if memory access is 16 bytes aligned
905 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
906 ((intptr_t)p_line2|(intptr_t)p_y2))) )
908 /* use faster SSE2 aligned fetch and store */
909 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
912 p_line2 += p_dest->p->i_pitch;
915 p_y2 += p_source->p[Y_PLANE].i_pitch;
917 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
919 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
921 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
926 p_y1 += i_source_margin;
927 p_y2 += i_source_margin;
928 p_u += i_source_margin_c;
929 p_v += i_source_margin_c;
930 p_line1 += i_dest_margin;
931 p_line2 += i_dest_margin;
936 /* use slower SSE2 unaligned fetch and store */
937 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
940 p_line2 += p_dest->p->i_pitch;
943 p_y2 += p_source->p[Y_PLANE].i_pitch;
945 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
947 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
949 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
954 p_y1 += i_source_margin;
955 p_y2 += i_source_margin;
956 p_u += i_source_margin_c;
957 p_v += i_source_margin_c;
958 p_line1 += i_dest_margin;
959 p_line2 += i_dest_margin;
962 /* make sure all SSE2 stores are visible thereafter */
964 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
966 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
968 /*****************************************************************************
969 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
970 *****************************************************************************/
971 #if defined (MODULE_NAME_IS_i420_yuy2)
972 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
975 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
976 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
977 uint8_t *p_u = p_source->U_PIXELS;
978 uint8_t *p_v = p_source->V_PIXELS;
982 const int i_source_margin = p_source->p[0].i_pitch
983 - p_source->p[0].i_visible_pitch;
984 const int i_source_margin_c = p_source->p[1].i_pitch
985 - p_source->p[1].i_visible_pitch;
986 const int i_dest_margin = p_dest->p->i_pitch
987 - p_dest->p->i_visible_pitch;
989 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
992 p_line2 += p_dest->p->i_pitch;
995 p_y2 += p_source->p[Y_PLANE].i_pitch;
997 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
1003 p_y1 += i_source_margin;
1004 p_y2 += i_source_margin;
1005 p_u += i_source_margin_c;
1006 p_v += i_source_margin_c;
1007 p_line1 += i_dest_margin;
1008 p_line2 += i_dest_margin;