1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
37 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
41 #include "i420_yuy2.h"
43 #define SRC_FOURCC "I420,IYUV,YV12"
45 #if defined (MODULE_NAME_IS_i420_yuy2)
46 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
47 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
48 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
49 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
50 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
51 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
52 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
55 /*****************************************************************************
56 * Local and extern prototypes.
57 *****************************************************************************/
58 static int Activate ( vlc_object_t * );
60 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
61 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
62 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
63 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
64 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
65 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
66 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
67 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
68 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
69 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
70 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
72 #if defined (MODULE_NAME_IS_i420_yuy2)
73 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
74 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
77 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
78 /* Initialize MMX-specific constants */
79 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
80 static const uint64_t i_80w = 0x0000000080808080ULL;
83 /*****************************************************************************
85 *****************************************************************************/
87 #if defined (MODULE_NAME_IS_i420_yuy2)
88 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
89 set_capability( "video filter2", 80 )
90 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
91 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
92 set_capability( "video filter2", 160 )
93 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
94 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
95 set_capability( "video filter2", 250 )
96 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
98 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
99 set_capability( "video filter2", 250 )
101 set_callbacks( Activate, NULL )
104 /*****************************************************************************
105 * Activate: allocate a chroma function
106 *****************************************************************************
107 * This function allocates and initializes a chroma function
108 *****************************************************************************/
109 static int Activate( vlc_object_t *p_this )
111 filter_t *p_filter = (filter_t *)p_this;
113 if( p_filter->fmt_in.video.i_width & 1
114 || p_filter->fmt_in.video.i_height & 1 )
119 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
120 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
123 switch( p_filter->fmt_in.video.i_chroma )
127 switch( p_filter->fmt_out.video.i_chroma )
130 p_filter->pf_video_filter = I420_YUY2_Filter;
134 p_filter->pf_video_filter = I420_YVYU_Filter;
138 p_filter->pf_video_filter = I420_UYVY_Filter;
140 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
141 case VLC_FOURCC('I','U','Y','V'):
142 p_filter->pf_video_filter = I420_IUYV_Filter;
146 p_filter->pf_video_filter = I420_cyuv_Filter;
150 #if defined (MODULE_NAME_IS_i420_yuy2)
152 p_filter->pf_video_filter = I420_Y211_Filter;
169 static inline unsigned long long read_cycles(void)
171 unsigned long long v;
172 __asm__ __volatile__("rdtsc" : "=A" (v): );
178 /* Following functions are local */
180 VIDEO_FILTER_WRAPPER( I420_YUY2 )
181 VIDEO_FILTER_WRAPPER( I420_YVYU )
182 VIDEO_FILTER_WRAPPER( I420_UYVY )
183 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
184 VIDEO_FILTER_WRAPPER( I420_IUYV )
185 VIDEO_FILTER_WRAPPER( I420_cyuv )
187 #if defined (MODULE_NAME_IS_i420_yuy2)
188 VIDEO_FILTER_WRAPPER( I420_Y211 )
191 /*****************************************************************************
192 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
193 *****************************************************************************/
194 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
197 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
198 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
199 uint8_t *p_u = p_source->U_PIXELS;
200 uint8_t *p_v = p_source->V_PIXELS;
204 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
205 #define VEC_NEXT_LINES( ) \
207 p_line2 += p_dest->p->i_pitch; \
209 p_y2 += p_source->p[Y_PLANE].i_pitch;
211 #define VEC_LOAD_UV( ) \
212 u_vec = vec_ld( 0, p_u ); p_u += 16; \
213 v_vec = vec_ld( 0, p_v ); p_v += 16;
215 #define VEC_MERGE( a ) \
216 uv_vec = a( u_vec, v_vec ); \
217 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
218 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
219 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
220 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
221 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
222 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
224 vector unsigned char u_vec;
225 vector unsigned char v_vec;
226 vector unsigned char uv_vec;
227 vector unsigned char y_vec;
229 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
230 ( p_filter->fmt_in.video.i_height % 2 ) ) )
232 /* Width is a multiple of 32, we take 2 lines at a time */
233 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
236 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
239 VEC_MERGE( vec_mergeh );
240 VEC_MERGE( vec_mergel );
244 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
245 ( p_filter->fmt_in.video.i_height % 4 ) ) )
247 /* Width is only a multiple of 16, we take 4 lines at a time */
248 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
250 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
252 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
255 VEC_MERGE( vec_mergeh );
256 VEC_MERGE( vec_mergel );
259 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
261 VEC_MERGE( vec_mergeh );
263 /* Line 3 and 4, pixels 0 to 16 */
265 VEC_MERGE( vec_mergel );
267 /* Line 3 and 4, pixels 16 to ( width ) */
268 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
271 VEC_MERGE( vec_mergeh );
272 VEC_MERGE( vec_mergel );
278 /* Crap, use the C version */
279 #undef VEC_NEXT_LINES
284 const int i_source_margin = p_source->p[0].i_pitch
285 - p_source->p[0].i_visible_pitch;
286 const int i_source_margin_c = p_source->p[1].i_pitch
287 - p_source->p[1].i_visible_pitch;
288 const int i_dest_margin = p_dest->p->i_pitch
289 - p_dest->p->i_visible_pitch;
291 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
292 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
295 p_line2 += p_dest->p->i_pitch;
298 p_y2 += p_source->p[Y_PLANE].i_pitch;
300 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
301 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
309 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
311 MMX_CALL( MMX_YUV420_YUYV );
314 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
319 p_y1 += i_source_margin;
320 p_y2 += i_source_margin;
321 p_u += i_source_margin_c;
322 p_v += i_source_margin_c;
323 p_line1 += i_dest_margin;
324 p_line2 += i_dest_margin;
327 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
328 /* re-enable FPU registers */
332 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
336 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
338 ** SSE2 128 bits fetch/store instructions are faster
339 ** if memory access is 16 bytes aligned
342 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
343 ((intptr_t)p_line2|(intptr_t)p_y2))) )
345 /* use faster SSE2 aligned fetch and store */
346 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
349 p_line2 += p_dest->p->i_pitch;
352 p_y2 += p_source->p[Y_PLANE].i_pitch;
354 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
356 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
358 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
363 p_y1 += i_source_margin;
364 p_y2 += i_source_margin;
365 p_u += i_source_margin_c;
366 p_v += i_source_margin_c;
367 p_line1 += i_dest_margin;
368 p_line2 += i_dest_margin;
373 /* use slower SSE2 unaligned fetch and store */
374 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
377 p_line2 += p_dest->p->i_pitch;
380 p_y2 += p_source->p[Y_PLANE].i_pitch;
382 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
384 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
386 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
391 p_y1 += i_source_margin;
392 p_y2 += i_source_margin;
393 p_u += i_source_margin_c;
394 p_v += i_source_margin_c;
395 p_line1 += i_dest_margin;
396 p_line2 += i_dest_margin;
399 /* make sure all SSE2 stores are visible thereafter */
402 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
405 /*****************************************************************************
406 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
407 *****************************************************************************/
408 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
411 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
412 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
413 uint8_t *p_u = p_source->U_PIXELS;
414 uint8_t *p_v = p_source->V_PIXELS;
418 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
419 #define VEC_NEXT_LINES( ) \
421 p_line2 += p_dest->p->i_pitch; \
423 p_y2 += p_source->p[Y_PLANE].i_pitch;
425 #define VEC_LOAD_UV( ) \
426 u_vec = vec_ld( 0, p_u ); p_u += 16; \
427 v_vec = vec_ld( 0, p_v ); p_v += 16;
429 #define VEC_MERGE( a ) \
430 vu_vec = a( v_vec, u_vec ); \
431 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
432 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
433 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
434 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
435 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
436 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
438 vector unsigned char u_vec;
439 vector unsigned char v_vec;
440 vector unsigned char vu_vec;
441 vector unsigned char y_vec;
443 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
444 ( p_filter->fmt_in.video.i_height % 2 ) ) )
446 /* Width is a multiple of 32, we take 2 lines at a time */
447 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
450 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
453 VEC_MERGE( vec_mergeh );
454 VEC_MERGE( vec_mergel );
458 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
459 ( p_filter->fmt_in.video.i_height % 4 ) ) )
461 /* Width is only a multiple of 16, we take 4 lines at a time */
462 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
464 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
466 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
469 VEC_MERGE( vec_mergeh );
470 VEC_MERGE( vec_mergel );
473 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
475 VEC_MERGE( vec_mergeh );
477 /* Line 3 and 4, pixels 0 to 16 */
479 VEC_MERGE( vec_mergel );
481 /* Line 3 and 4, pixels 16 to ( width ) */
482 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
485 VEC_MERGE( vec_mergeh );
486 VEC_MERGE( vec_mergel );
492 /* Crap, use the C version */
493 #undef VEC_NEXT_LINES
498 const int i_source_margin = p_source->p[0].i_pitch
499 - p_source->p[0].i_visible_pitch;
500 const int i_source_margin_c = p_source->p[1].i_pitch
501 - p_source->p[1].i_visible_pitch;
502 const int i_dest_margin = p_dest->p->i_pitch
503 - p_dest->p->i_visible_pitch;
505 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
506 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
509 p_line2 += p_dest->p->i_pitch;
512 p_y2 += p_source->p[Y_PLANE].i_pitch;
514 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
516 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
522 MMX_CALL( MMX_YUV420_YVYU );
525 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
530 p_y1 += i_source_margin;
531 p_y2 += i_source_margin;
532 p_u += i_source_margin_c;
533 p_v += i_source_margin_c;
534 p_line1 += i_dest_margin;
535 p_line2 += i_dest_margin;
538 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
539 /* re-enable FPU registers */
543 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
547 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
549 ** SSE2 128 bits fetch/store instructions are faster
550 ** if memory access is 16 bytes aligned
552 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
553 ((intptr_t)p_line2|(intptr_t)p_y2))) )
555 /* use faster SSE2 aligned fetch and store */
556 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
559 p_line2 += p_dest->p->i_pitch;
562 p_y2 += p_source->p[Y_PLANE].i_pitch;
564 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
566 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
568 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
573 p_y1 += i_source_margin;
574 p_y2 += i_source_margin;
575 p_u += i_source_margin_c;
576 p_v += i_source_margin_c;
577 p_line1 += i_dest_margin;
578 p_line2 += i_dest_margin;
583 /* use slower SSE2 unaligned fetch and store */
584 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
587 p_line2 += p_dest->p->i_pitch;
590 p_y2 += p_source->p[Y_PLANE].i_pitch;
592 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
594 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
596 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
601 p_y1 += i_source_margin;
602 p_y2 += i_source_margin;
603 p_u += i_source_margin_c;
604 p_v += i_source_margin_c;
605 p_line1 += i_dest_margin;
606 p_line2 += i_dest_margin;
609 /* make sure all SSE2 stores are visible thereafter */
611 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
614 /*****************************************************************************
615 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
616 *****************************************************************************/
617 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
620 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
621 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
622 uint8_t *p_u = p_source->U_PIXELS;
623 uint8_t *p_v = p_source->V_PIXELS;
627 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
628 #define VEC_NEXT_LINES( ) \
630 p_line2 += p_dest->p->i_pitch; \
632 p_y2 += p_source->p[Y_PLANE].i_pitch;
634 #define VEC_LOAD_UV( ) \
635 u_vec = vec_ld( 0, p_u ); p_u += 16; \
636 v_vec = vec_ld( 0, p_v ); p_v += 16;
638 #define VEC_MERGE( a ) \
639 uv_vec = a( u_vec, v_vec ); \
640 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
641 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
642 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
643 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
644 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
645 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
647 vector unsigned char u_vec;
648 vector unsigned char v_vec;
649 vector unsigned char uv_vec;
650 vector unsigned char y_vec;
652 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
653 ( p_filter->fmt_in.video.i_height % 2 ) ) )
655 /* Width is a multiple of 32, we take 2 lines at a time */
656 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
659 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
662 VEC_MERGE( vec_mergeh );
663 VEC_MERGE( vec_mergel );
667 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
668 ( p_filter->fmt_in.video.i_height % 4 ) ) )
670 /* Width is only a multiple of 16, we take 4 lines at a time */
671 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
673 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
675 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
678 VEC_MERGE( vec_mergeh );
679 VEC_MERGE( vec_mergel );
682 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
684 VEC_MERGE( vec_mergeh );
686 /* Line 3 and 4, pixels 0 to 16 */
688 VEC_MERGE( vec_mergel );
690 /* Line 3 and 4, pixels 16 to ( width ) */
691 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
694 VEC_MERGE( vec_mergeh );
695 VEC_MERGE( vec_mergel );
701 /* Crap, use the C version */
702 #undef VEC_NEXT_LINES
707 const int i_source_margin = p_source->p[0].i_pitch
708 - p_source->p[0].i_visible_pitch;
709 const int i_source_margin_c = p_source->p[1].i_pitch
710 - p_source->p[1].i_visible_pitch;
711 const int i_dest_margin = p_dest->p->i_pitch
712 - p_dest->p->i_visible_pitch;
714 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
715 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
718 p_line2 += p_dest->p->i_pitch;
721 p_y2 += p_source->p[Y_PLANE].i_pitch;
723 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
725 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
731 MMX_CALL( MMX_YUV420_UYVY );
734 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
739 p_y1 += i_source_margin;
740 p_y2 += i_source_margin;
741 p_u += i_source_margin_c;
742 p_v += i_source_margin_c;
743 p_line1 += i_dest_margin;
744 p_line2 += i_dest_margin;
747 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
748 /* re-enable FPU registers */
752 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
756 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
758 ** SSE2 128 bits fetch/store instructions are faster
759 ** if memory access is 16 bytes aligned
761 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
762 ((intptr_t)p_line2|(intptr_t)p_y2))) )
764 /* use faster SSE2 aligned fetch and store */
765 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
768 p_line2 += p_dest->p->i_pitch;
771 p_y2 += p_source->p[Y_PLANE].i_pitch;
773 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
775 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
777 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
782 p_y1 += i_source_margin;
783 p_y2 += i_source_margin;
784 p_u += i_source_margin_c;
785 p_v += i_source_margin_c;
786 p_line1 += i_dest_margin;
787 p_line2 += i_dest_margin;
792 /* use slower SSE2 unaligned fetch and store */
793 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
796 p_line2 += p_dest->p->i_pitch;
799 p_y2 += p_source->p[Y_PLANE].i_pitch;
801 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
803 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
805 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
810 p_y1 += i_source_margin;
811 p_y2 += i_source_margin;
812 p_u += i_source_margin_c;
813 p_v += i_source_margin_c;
814 p_line1 += i_dest_margin;
815 p_line2 += i_dest_margin;
818 /* make sure all SSE2 stores are visible thereafter */
820 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
823 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
824 /*****************************************************************************
825 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
826 *****************************************************************************/
827 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
830 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
832 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
835 /*****************************************************************************
836 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
837 *****************************************************************************/
838 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
841 uint8_t *p_line1 = p_dest->p->p_pixels +
842 p_dest->p->i_visible_lines * p_dest->p->i_pitch
843 + p_dest->p->i_pitch;
844 uint8_t *p_line2 = p_dest->p->p_pixels +
845 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
846 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
847 uint8_t *p_u = p_source->U_PIXELS;
848 uint8_t *p_v = p_source->V_PIXELS;
852 const int i_source_margin = p_source->p[0].i_pitch
853 - p_source->p[0].i_visible_pitch;
854 const int i_source_margin_c = p_source->p[1].i_pitch
855 - p_source->p[1].i_visible_pitch;
856 const int i_dest_margin = p_dest->p->i_pitch
857 - p_dest->p->i_visible_pitch;
859 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
860 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
862 p_line1 -= 3 * p_dest->p->i_pitch;
863 p_line2 -= 3 * p_dest->p->i_pitch;
866 p_y2 += p_source->p[Y_PLANE].i_pitch;
868 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
870 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
876 MMX_CALL( MMX_YUV420_UYVY );
879 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
884 p_y1 += i_source_margin;
885 p_y2 += i_source_margin;
886 p_u += i_source_margin_c;
887 p_v += i_source_margin_c;
888 p_line1 += i_dest_margin;
889 p_line2 += i_dest_margin;
892 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
893 /* re-enable FPU registers */
897 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
899 ** SSE2 128 bits fetch/store instructions are faster
900 ** if memory access is 16 bytes aligned
902 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
903 ((intptr_t)p_line2|(intptr_t)p_y2))) )
905 /* use faster SSE2 aligned fetch and store */
906 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
909 p_line2 += p_dest->p->i_pitch;
912 p_y2 += p_source->p[Y_PLANE].i_pitch;
914 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
916 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
918 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
923 p_y1 += i_source_margin;
924 p_y2 += i_source_margin;
925 p_u += i_source_margin_c;
926 p_v += i_source_margin_c;
927 p_line1 += i_dest_margin;
928 p_line2 += i_dest_margin;
933 /* use slower SSE2 unaligned fetch and store */
934 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
937 p_line2 += p_dest->p->i_pitch;
940 p_y2 += p_source->p[Y_PLANE].i_pitch;
942 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
944 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
946 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
951 p_y1 += i_source_margin;
952 p_y2 += i_source_margin;
953 p_u += i_source_margin_c;
954 p_v += i_source_margin_c;
955 p_line1 += i_dest_margin;
956 p_line2 += i_dest_margin;
959 /* make sure all SSE2 stores are visible thereafter */
961 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
963 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
965 /*****************************************************************************
966 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
967 *****************************************************************************/
968 #if defined (MODULE_NAME_IS_i420_yuy2)
969 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
972 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
973 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
974 uint8_t *p_u = p_source->U_PIXELS;
975 uint8_t *p_v = p_source->V_PIXELS;
979 const int i_source_margin = p_source->p[0].i_pitch
980 - p_source->p[0].i_visible_pitch;
981 const int i_source_margin_c = p_source->p[1].i_pitch
982 - p_source->p[1].i_visible_pitch;
983 const int i_dest_margin = p_dest->p->i_pitch
984 - p_dest->p->i_visible_pitch;
986 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
989 p_line2 += p_dest->p->i_pitch;
992 p_y2 += p_source->p[Y_PLANE].i_pitch;
994 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
1000 p_y1 += i_source_margin;
1001 p_y2 += i_source_margin;
1002 p_u += i_source_margin_c;
1003 p_v += i_source_margin_c;
1004 p_line1 += i_dest_margin;
1005 p_line2 += i_dest_margin;