1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 VLC authors and VideoLAN
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
38 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
42 #include "i420_yuy2.h"
44 #define SRC_FOURCC "I420,IYUV,YV12"
46 #if defined (MODULE_NAME_IS_i420_yuy2)
47 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
49 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
50 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
51 # define VLC_TARGET VLC_MMX
52 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
53 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
54 # define VLC_TARGET VLC_SSE
55 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
56 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
60 /*****************************************************************************
61 * Local and extern prototypes.
62 *****************************************************************************/
63 static int Activate ( vlc_object_t * );
65 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
66 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
67 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
68 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
69 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
70 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
71 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
72 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
73 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
74 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
75 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
77 #if defined (MODULE_NAME_IS_i420_yuy2)
78 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
79 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
82 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
83 /* Initialize MMX-specific constants */
84 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
85 static const uint64_t i_80w = 0x0000000080808080ULL;
88 /*****************************************************************************
90 *****************************************************************************/
92 #if defined (MODULE_NAME_IS_i420_yuy2)
93 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
94 set_capability( "video filter2", 80 )
95 # define vlc_CPU_capable() (true)
96 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
97 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
98 set_capability( "video filter2", 160 )
99 # define vlc_CPU_capable() vlc_CPU_MMX()
100 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
101 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
102 set_capability( "video filter2", 250 )
103 # define vlc_CPU_capable() vlc_CPU_SSE2()
104 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
106 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
107 set_capability( "video filter2", 250 )
108 # define vlc_CPU_capable() vlc_CPU_ALTIVEC()
110 set_callbacks( Activate, NULL )
113 /*****************************************************************************
114 * Activate: allocate a chroma function
115 *****************************************************************************
116 * This function allocates and initializes a chroma function
117 *****************************************************************************/
118 static int Activate( vlc_object_t *p_this )
120 filter_t *p_filter = (filter_t *)p_this;
122 if( !vlc_CPU_capable() )
124 if( p_filter->fmt_in.video.i_width & 1
125 || p_filter->fmt_in.video.i_height & 1 )
130 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
131 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
134 switch( p_filter->fmt_in.video.i_chroma )
138 switch( p_filter->fmt_out.video.i_chroma )
141 p_filter->pf_video_filter = I420_YUY2_Filter;
145 p_filter->pf_video_filter = I420_YVYU_Filter;
149 p_filter->pf_video_filter = I420_UYVY_Filter;
151 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
152 case VLC_FOURCC('I','U','Y','V'):
153 p_filter->pf_video_filter = I420_IUYV_Filter;
157 p_filter->pf_video_filter = I420_cyuv_Filter;
161 #if defined (MODULE_NAME_IS_i420_yuy2)
163 p_filter->pf_video_filter = I420_Y211_Filter;
180 static inline unsigned long long read_cycles(void)
182 unsigned long long v;
183 __asm__ __volatile__("rdtsc" : "=A" (v): );
189 /* Following functions are local */
191 VIDEO_FILTER_WRAPPER( I420_YUY2 )
192 VIDEO_FILTER_WRAPPER( I420_YVYU )
193 VIDEO_FILTER_WRAPPER( I420_UYVY )
194 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
195 VIDEO_FILTER_WRAPPER( I420_IUYV )
196 VIDEO_FILTER_WRAPPER( I420_cyuv )
198 #if defined (MODULE_NAME_IS_i420_yuy2)
199 VIDEO_FILTER_WRAPPER( I420_Y211 )
202 /*****************************************************************************
203 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
204 *****************************************************************************/
206 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
209 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
210 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
211 uint8_t *p_u = p_source->U_PIXELS;
212 uint8_t *p_v = p_source->V_PIXELS;
216 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
217 #define VEC_NEXT_LINES( ) \
219 p_line2 += p_dest->p->i_pitch; \
221 p_y2 += p_source->p[Y_PLANE].i_pitch;
223 #define VEC_LOAD_UV( ) \
224 u_vec = vec_ld( 0, p_u ); p_u += 16; \
225 v_vec = vec_ld( 0, p_v ); p_v += 16;
227 #define VEC_MERGE( a ) \
228 uv_vec = a( u_vec, v_vec ); \
229 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
230 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
231 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
232 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
233 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
234 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
236 vector unsigned char u_vec;
237 vector unsigned char v_vec;
238 vector unsigned char uv_vec;
239 vector unsigned char y_vec;
241 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
242 ( p_filter->fmt_in.video.i_height % 2 ) ) )
244 /* Width is a multiple of 32, we take 2 lines at a time */
245 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
248 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
251 VEC_MERGE( vec_mergeh );
252 VEC_MERGE( vec_mergel );
256 #warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
258 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
259 ( p_filter->fmt_in.video.i_height % 4 ) ) )
261 /* Width is only a multiple of 16, we take 4 lines at a time */
262 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
264 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
266 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
269 VEC_MERGE( vec_mergeh );
270 VEC_MERGE( vec_mergel );
273 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
275 VEC_MERGE( vec_mergeh );
277 /* Line 3 and 4, pixels 0 to 16 */
279 VEC_MERGE( vec_mergel );
281 /* Line 3 and 4, pixels 16 to ( width ) */
282 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
285 VEC_MERGE( vec_mergeh );
286 VEC_MERGE( vec_mergel );
293 /* Crap, use the C version */
294 #undef VEC_NEXT_LINES
299 const int i_source_margin = p_source->p[0].i_pitch
300 - p_source->p[0].i_visible_pitch;
301 const int i_source_margin_c = p_source->p[1].i_pitch
302 - p_source->p[1].i_visible_pitch;
303 const int i_dest_margin = p_dest->p->i_pitch
304 - p_dest->p->i_visible_pitch;
306 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
307 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
310 p_line2 += p_dest->p->i_pitch;
313 p_y2 += p_source->p[Y_PLANE].i_pitch;
315 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
316 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
324 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
326 MMX_CALL( MMX_YUV420_YUYV );
329 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
334 p_y1 += i_source_margin;
335 p_y2 += i_source_margin;
336 p_u += i_source_margin_c;
337 p_v += i_source_margin_c;
338 p_line1 += i_dest_margin;
339 p_line2 += i_dest_margin;
342 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
343 /* re-enable FPU registers */
347 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
351 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
353 ** SSE2 128 bits fetch/store instructions are faster
354 ** if memory access is 16 bytes aligned
357 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
358 ((intptr_t)p_line2|(intptr_t)p_y2))) )
360 /* use faster SSE2 aligned fetch and store */
361 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
364 p_line2 += p_dest->p->i_pitch;
367 p_y2 += p_source->p[Y_PLANE].i_pitch;
369 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
371 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
373 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
378 p_y1 += i_source_margin;
379 p_y2 += i_source_margin;
380 p_u += i_source_margin_c;
381 p_v += i_source_margin_c;
382 p_line1 += i_dest_margin;
383 p_line2 += i_dest_margin;
388 /* use slower SSE2 unaligned fetch and store */
389 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
392 p_line2 += p_dest->p->i_pitch;
395 p_y2 += p_source->p[Y_PLANE].i_pitch;
397 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
399 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
401 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
406 p_y1 += i_source_margin;
407 p_y2 += i_source_margin;
408 p_u += i_source_margin_c;
409 p_v += i_source_margin_c;
410 p_line1 += i_dest_margin;
411 p_line2 += i_dest_margin;
414 /* make sure all SSE2 stores are visible thereafter */
417 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
420 /*****************************************************************************
421 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
422 *****************************************************************************/
424 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
427 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
428 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
429 uint8_t *p_u = p_source->U_PIXELS;
430 uint8_t *p_v = p_source->V_PIXELS;
434 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
435 #define VEC_NEXT_LINES( ) \
437 p_line2 += p_dest->p->i_pitch; \
439 p_y2 += p_source->p[Y_PLANE].i_pitch;
441 #define VEC_LOAD_UV( ) \
442 u_vec = vec_ld( 0, p_u ); p_u += 16; \
443 v_vec = vec_ld( 0, p_v ); p_v += 16;
445 #define VEC_MERGE( a ) \
446 vu_vec = a( v_vec, u_vec ); \
447 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
448 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
449 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
450 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
451 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
452 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
454 vector unsigned char u_vec;
455 vector unsigned char v_vec;
456 vector unsigned char vu_vec;
457 vector unsigned char y_vec;
459 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
460 ( p_filter->fmt_in.video.i_height % 2 ) ) )
462 /* Width is a multiple of 32, we take 2 lines at a time */
463 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
466 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
469 VEC_MERGE( vec_mergeh );
470 VEC_MERGE( vec_mergel );
474 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
475 ( p_filter->fmt_in.video.i_height % 4 ) ) )
477 /* Width is only a multiple of 16, we take 4 lines at a time */
478 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
480 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
482 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
485 VEC_MERGE( vec_mergeh );
486 VEC_MERGE( vec_mergel );
489 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
491 VEC_MERGE( vec_mergeh );
493 /* Line 3 and 4, pixels 0 to 16 */
495 VEC_MERGE( vec_mergel );
497 /* Line 3 and 4, pixels 16 to ( width ) */
498 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
501 VEC_MERGE( vec_mergeh );
502 VEC_MERGE( vec_mergel );
508 /* Crap, use the C version */
509 #undef VEC_NEXT_LINES
514 const int i_source_margin = p_source->p[0].i_pitch
515 - p_source->p[0].i_visible_pitch;
516 const int i_source_margin_c = p_source->p[1].i_pitch
517 - p_source->p[1].i_visible_pitch;
518 const int i_dest_margin = p_dest->p->i_pitch
519 - p_dest->p->i_visible_pitch;
521 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
522 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
525 p_line2 += p_dest->p->i_pitch;
528 p_y2 += p_source->p[Y_PLANE].i_pitch;
530 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
532 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
538 MMX_CALL( MMX_YUV420_YVYU );
541 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
546 p_y1 += i_source_margin;
547 p_y2 += i_source_margin;
548 p_u += i_source_margin_c;
549 p_v += i_source_margin_c;
550 p_line1 += i_dest_margin;
551 p_line2 += i_dest_margin;
554 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
555 /* re-enable FPU registers */
559 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
563 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
565 ** SSE2 128 bits fetch/store instructions are faster
566 ** if memory access is 16 bytes aligned
568 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
569 ((intptr_t)p_line2|(intptr_t)p_y2))) )
571 /* use faster SSE2 aligned fetch and store */
572 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
575 p_line2 += p_dest->p->i_pitch;
578 p_y2 += p_source->p[Y_PLANE].i_pitch;
580 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
582 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
584 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
589 p_y1 += i_source_margin;
590 p_y2 += i_source_margin;
591 p_u += i_source_margin_c;
592 p_v += i_source_margin_c;
593 p_line1 += i_dest_margin;
594 p_line2 += i_dest_margin;
599 /* use slower SSE2 unaligned fetch and store */
600 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
603 p_line2 += p_dest->p->i_pitch;
606 p_y2 += p_source->p[Y_PLANE].i_pitch;
608 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
610 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
612 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
617 p_y1 += i_source_margin;
618 p_y2 += i_source_margin;
619 p_u += i_source_margin_c;
620 p_v += i_source_margin_c;
621 p_line1 += i_dest_margin;
622 p_line2 += i_dest_margin;
625 /* make sure all SSE2 stores are visible thereafter */
627 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
630 /*****************************************************************************
631 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
632 *****************************************************************************/
634 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
637 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
638 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
639 uint8_t *p_u = p_source->U_PIXELS;
640 uint8_t *p_v = p_source->V_PIXELS;
644 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
645 #define VEC_NEXT_LINES( ) \
647 p_line2 += p_dest->p->i_pitch; \
649 p_y2 += p_source->p[Y_PLANE].i_pitch;
651 #define VEC_LOAD_UV( ) \
652 u_vec = vec_ld( 0, p_u ); p_u += 16; \
653 v_vec = vec_ld( 0, p_v ); p_v += 16;
655 #define VEC_MERGE( a ) \
656 uv_vec = a( u_vec, v_vec ); \
657 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
658 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
659 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
660 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
661 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
662 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
664 vector unsigned char u_vec;
665 vector unsigned char v_vec;
666 vector unsigned char uv_vec;
667 vector unsigned char y_vec;
669 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
670 ( p_filter->fmt_in.video.i_height % 2 ) ) )
672 /* Width is a multiple of 32, we take 2 lines at a time */
673 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
676 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
679 VEC_MERGE( vec_mergeh );
680 VEC_MERGE( vec_mergel );
684 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
685 ( p_filter->fmt_in.video.i_height % 4 ) ) )
687 /* Width is only a multiple of 16, we take 4 lines at a time */
688 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
690 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
692 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
695 VEC_MERGE( vec_mergeh );
696 VEC_MERGE( vec_mergel );
699 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
701 VEC_MERGE( vec_mergeh );
703 /* Line 3 and 4, pixels 0 to 16 */
705 VEC_MERGE( vec_mergel );
707 /* Line 3 and 4, pixels 16 to ( width ) */
708 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
711 VEC_MERGE( vec_mergeh );
712 VEC_MERGE( vec_mergel );
718 /* Crap, use the C version */
719 #undef VEC_NEXT_LINES
724 const int i_source_margin = p_source->p[0].i_pitch
725 - p_source->p[0].i_visible_pitch;
726 const int i_source_margin_c = p_source->p[1].i_pitch
727 - p_source->p[1].i_visible_pitch;
728 const int i_dest_margin = p_dest->p->i_pitch
729 - p_dest->p->i_visible_pitch;
731 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
732 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
735 p_line2 += p_dest->p->i_pitch;
738 p_y2 += p_source->p[Y_PLANE].i_pitch;
740 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
742 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
748 MMX_CALL( MMX_YUV420_UYVY );
751 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
756 p_y1 += i_source_margin;
757 p_y2 += i_source_margin;
758 p_u += i_source_margin_c;
759 p_v += i_source_margin_c;
760 p_line1 += i_dest_margin;
761 p_line2 += i_dest_margin;
764 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
765 /* re-enable FPU registers */
769 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
773 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
775 ** SSE2 128 bits fetch/store instructions are faster
776 ** if memory access is 16 bytes aligned
778 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
779 ((intptr_t)p_line2|(intptr_t)p_y2))) )
781 /* use faster SSE2 aligned fetch and store */
782 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
785 p_line2 += p_dest->p->i_pitch;
788 p_y2 += p_source->p[Y_PLANE].i_pitch;
790 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
792 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
794 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
799 p_y1 += i_source_margin;
800 p_y2 += i_source_margin;
801 p_u += i_source_margin_c;
802 p_v += i_source_margin_c;
803 p_line1 += i_dest_margin;
804 p_line2 += i_dest_margin;
809 /* use slower SSE2 unaligned fetch and store */
810 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
813 p_line2 += p_dest->p->i_pitch;
816 p_y2 += p_source->p[Y_PLANE].i_pitch;
818 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
820 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
822 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
827 p_y1 += i_source_margin;
828 p_y2 += i_source_margin;
829 p_u += i_source_margin_c;
830 p_v += i_source_margin_c;
831 p_line1 += i_dest_margin;
832 p_line2 += i_dest_margin;
835 /* make sure all SSE2 stores are visible thereafter */
837 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
840 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
841 /*****************************************************************************
842 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
843 *****************************************************************************/
844 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
847 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
849 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
852 /*****************************************************************************
853 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
854 *****************************************************************************/
856 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
859 uint8_t *p_line1 = p_dest->p->p_pixels +
860 p_dest->p->i_visible_lines * p_dest->p->i_pitch
861 + p_dest->p->i_pitch;
862 uint8_t *p_line2 = p_dest->p->p_pixels +
863 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
864 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
865 uint8_t *p_u = p_source->U_PIXELS;
866 uint8_t *p_v = p_source->V_PIXELS;
870 const int i_source_margin = p_source->p[0].i_pitch
871 - p_source->p[0].i_visible_pitch;
872 const int i_source_margin_c = p_source->p[1].i_pitch
873 - p_source->p[1].i_visible_pitch;
874 const int i_dest_margin = p_dest->p->i_pitch
875 - p_dest->p->i_visible_pitch;
877 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
878 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
880 p_line1 -= 3 * p_dest->p->i_pitch;
881 p_line2 -= 3 * p_dest->p->i_pitch;
884 p_y2 += p_source->p[Y_PLANE].i_pitch;
886 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
888 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
894 MMX_CALL( MMX_YUV420_UYVY );
897 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
902 p_y1 += i_source_margin;
903 p_y2 += i_source_margin;
904 p_u += i_source_margin_c;
905 p_v += i_source_margin_c;
906 p_line1 += i_dest_margin;
907 p_line2 += i_dest_margin;
910 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
911 /* re-enable FPU registers */
915 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
917 ** SSE2 128 bits fetch/store instructions are faster
918 ** if memory access is 16 bytes aligned
920 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
921 ((intptr_t)p_line2|(intptr_t)p_y2))) )
923 /* use faster SSE2 aligned fetch and store */
924 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
927 p_line2 += p_dest->p->i_pitch;
930 p_y2 += p_source->p[Y_PLANE].i_pitch;
932 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
934 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
936 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
941 p_y1 += i_source_margin;
942 p_y2 += i_source_margin;
943 p_u += i_source_margin_c;
944 p_v += i_source_margin_c;
945 p_line1 += i_dest_margin;
946 p_line2 += i_dest_margin;
951 /* use slower SSE2 unaligned fetch and store */
952 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
955 p_line2 += p_dest->p->i_pitch;
958 p_y2 += p_source->p[Y_PLANE].i_pitch;
960 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
962 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
964 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
969 p_y1 += i_source_margin;
970 p_y2 += i_source_margin;
971 p_u += i_source_margin_c;
972 p_v += i_source_margin_c;
973 p_line1 += i_dest_margin;
974 p_line2 += i_dest_margin;
977 /* make sure all SSE2 stores are visible thereafter */
979 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
981 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
983 /*****************************************************************************
984 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
985 *****************************************************************************/
986 #if defined (MODULE_NAME_IS_i420_yuy2)
987 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
990 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
991 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
992 uint8_t *p_u = p_source->U_PIXELS;
993 uint8_t *p_v = p_source->V_PIXELS;
997 const int i_source_margin = p_source->p[0].i_pitch
998 - p_source->p[0].i_visible_pitch;
999 const int i_source_margin_c = p_source->p[1].i_pitch
1000 - p_source->p[1].i_visible_pitch;
1001 const int i_dest_margin = p_dest->p->i_pitch
1002 - p_dest->p->i_visible_pitch;
1004 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
1007 p_line2 += p_dest->p->i_pitch;
1010 p_y2 += p_source->p[Y_PLANE].i_pitch;
1012 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
1018 p_y1 += i_source_margin;
1019 p_y2 += i_source_margin;
1020 p_u += i_source_margin_c;
1021 p_v += i_source_margin_c;
1022 p_line1 += i_dest_margin;
1023 p_line2 += i_dest_margin;