1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
38 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
42 #include "i420_yuy2.h"
44 #define SRC_FOURCC "I420,IYUV,YV12"
46 #if defined (MODULE_NAME_IS_i420_yuy2)
47 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
48 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
49 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
50 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
51 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
52 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
53 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
56 /*****************************************************************************
57 * Local and extern prototypes.
58 *****************************************************************************/
59 static int Activate ( vlc_object_t * );
61 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
62 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
63 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
64 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
65 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
66 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
68 #if defined (MODULE_NAME_IS_i420_yuy2)
69 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
72 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
73 /* Initialize MMX-specific constants */
74 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
75 static const uint64_t i_80w = 0x0000000080808080ULL;
78 /*****************************************************************************
80 *****************************************************************************/
82 #if defined (MODULE_NAME_IS_i420_yuy2)
83 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
84 set_capability( "chroma", 80 );
85 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
86 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
87 set_capability( "chroma", 100 );
88 add_requirement( MMX );
89 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
90 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) );
91 set_capability( "chroma", 120 );
92 add_requirement( SSE2 );
93 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
95 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
96 set_capability( "chroma", 100 );
97 add_requirement( ALTIVEC );
99 set_callbacks( Activate, NULL );
102 /*****************************************************************************
103 * Activate: allocate a chroma function
104 *****************************************************************************
105 * This function allocates and initializes a chroma function
106 *****************************************************************************/
107 static int Activate( vlc_object_t *p_this )
109 filter_t *p_filter = (filter_t *)p_this;
111 if( p_filter->fmt_in.video.i_width & 1
112 || p_filter->fmt_in.video.i_height & 1 )
117 switch( p_filter->fmt_in.video.i_chroma )
119 case VLC_FOURCC('Y','V','1','2'):
120 case VLC_FOURCC('I','4','2','0'):
121 case VLC_FOURCC('I','Y','U','V'):
122 switch( p_filter->fmt_out.video.i_chroma )
124 case VLC_FOURCC('Y','U','Y','2'):
125 case VLC_FOURCC('Y','U','N','V'):
126 p_filter->pf_video_filter_io = I420_YUY2;
129 case VLC_FOURCC('Y','V','Y','U'):
130 p_filter->pf_video_filter_io = I420_YVYU;
133 case VLC_FOURCC('U','Y','V','Y'):
134 case VLC_FOURCC('U','Y','N','V'):
135 case VLC_FOURCC('Y','4','2','2'):
136 p_filter->pf_video_filter_io = I420_UYVY;
138 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
139 case VLC_FOURCC('I','U','Y','V'):
140 p_filter->pf_video_filter_io = I420_IUYV;
143 case VLC_FOURCC('c','y','u','v'):
144 p_filter->pf_video_filter_io = I420_cyuv;
148 #if defined (MODULE_NAME_IS_i420_yuy2)
149 case VLC_FOURCC('Y','2','1','1'):
150 p_filter->pf_video_filter_io = I420_Y211;
167 static inline unsigned long long read_cycles(void)
169 unsigned long long v;
170 __asm__ __volatile__("rdtsc" : "=A" (v): );
176 /* Following functions are local */
177 /*****************************************************************************
178 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
179 *****************************************************************************/
180 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
183 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
184 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
185 uint8_t *p_u = p_source->U_PIXELS;
186 uint8_t *p_v = p_source->V_PIXELS;
190 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
191 #define VEC_NEXT_LINES( ) \
193 p_line2 += p_dest->p->i_pitch; \
195 p_y2 += p_source->p[Y_PLANE].i_pitch;
197 #define VEC_LOAD_UV( ) \
198 u_vec = vec_ld( 0, p_u ); p_u += 16; \
199 v_vec = vec_ld( 0, p_v ); p_v += 16;
201 #define VEC_MERGE( a ) \
202 uv_vec = a( u_vec, v_vec ); \
203 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
204 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
205 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
206 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
207 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
208 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
210 vector unsigned char u_vec;
211 vector unsigned char v_vec;
212 vector unsigned char uv_vec;
213 vector unsigned char y_vec;
215 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
216 ( p_filter->fmt_in.video.i_height % 2 ) ) )
218 /* Width is a multiple of 32, we take 2 lines at a time */
219 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
222 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
225 VEC_MERGE( vec_mergeh );
226 VEC_MERGE( vec_mergel );
230 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
231 ( p_filter->fmt_in.video.i_height % 4 ) ) )
233 /* Width is only a multiple of 16, we take 4 lines at a time */
234 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
236 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
238 for( i_x = p_fiter->fmt_in.video.i_width / 32 ; i_x-- ; )
241 VEC_MERGE( vec_mergeh );
242 VEC_MERGE( vec_mergel );
245 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
247 VEC_MERGE( vec_mergeh );
249 /* Line 3 and 4, pixels 0 to 16 */
251 VEC_MERGE( vec_mergel );
253 /* Line 3 and 4, pixels 16 to ( width ) */
254 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
257 VEC_MERGE( vec_mergeh );
258 VEC_MERGE( vec_mergel );
264 /* Crap, use the C version */
265 #undef VEC_NEXT_LINES
270 const int i_source_margin = p_source->p[0].i_pitch
271 - p_source->p[0].i_visible_pitch;
272 const int i_source_margin_c = p_source->p[1].i_pitch
273 - p_source->p[1].i_visible_pitch;
274 const int i_dest_margin = p_dest->p->i_pitch
275 - p_dest->p->i_visible_pitch;
277 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
278 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
281 p_line2 += p_dest->p->i_pitch;
284 p_y2 += p_source->p[Y_PLANE].i_pitch;
286 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
287 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
295 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
297 MMX_CALL( MMX_YUV420_YUYV );
300 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
305 p_y1 += i_source_margin;
306 p_y2 += i_source_margin;
307 p_u += i_source_margin_c;
308 p_v += i_source_margin_c;
309 p_line1 += i_dest_margin;
310 p_line2 += i_dest_margin;
313 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
314 /* re-enable FPU registers */
318 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
322 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
324 ** SSE2 128 bits fetch/store instructions are faster
325 ** if memory access is 16 bytes aligned
328 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
329 ((intptr_t)p_line2|(intptr_t)p_y2))) )
331 /* use faster SSE2 aligned fetch and store */
332 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
335 p_line2 += p_dest->p->i_pitch;
338 p_y2 += p_source->p[Y_PLANE].i_pitch;
340 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
342 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
344 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
349 p_y1 += i_source_margin;
350 p_y2 += i_source_margin;
351 p_u += i_source_margin_c;
352 p_v += i_source_margin_c;
353 p_line1 += i_dest_margin;
354 p_line2 += i_dest_margin;
359 /* use slower SSE2 unaligned fetch and store */
360 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
363 p_line2 += p_dest->p->i_pitch;
366 p_y2 += p_source->p[Y_PLANE].i_pitch;
368 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
370 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
372 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
377 p_y1 += i_source_margin;
378 p_y2 += i_source_margin;
379 p_u += i_source_margin_c;
380 p_v += i_source_margin_c;
381 p_line1 += i_dest_margin;
382 p_line2 += i_dest_margin;
385 /* make sure all SSE2 stores are visible thereafter */
388 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
391 /*****************************************************************************
392 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
393 *****************************************************************************/
394 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
397 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
398 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
399 uint8_t *p_u = p_source->U_PIXELS;
400 uint8_t *p_v = p_source->V_PIXELS;
404 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
405 #define VEC_NEXT_LINES( ) \
407 p_line2 += p_dest->p->i_pitch; \
409 p_y2 += p_source->p[Y_PLANE].i_pitch;
411 #define VEC_LOAD_UV( ) \
412 u_vec = vec_ld( 0, p_u ); p_u += 16; \
413 v_vec = vec_ld( 0, p_v ); p_v += 16;
415 #define VEC_MERGE( a ) \
416 vu_vec = a( v_vec, u_vec ); \
417 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
418 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
419 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
420 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
421 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
422 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
424 vector unsigned char u_vec;
425 vector unsigned char v_vec;
426 vector unsigned char vu_vec;
427 vector unsigned char y_vec;
429 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
430 ( p_filter->fmt_in.video.i_height % 2 ) ) )
432 /* Width is a multiple of 32, we take 2 lines at a time */
433 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
436 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
439 VEC_MERGE( vec_mergeh );
440 VEC_MERGE( vec_mergel );
444 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
445 ( p_filter->fmt_in.video.i_height % 4 ) ) )
447 /* Width is only a multiple of 16, we take 4 lines at a time */
448 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
450 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
452 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
455 VEC_MERGE( vec_mergeh );
456 VEC_MERGE( vec_mergel );
459 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
461 VEC_MERGE( vec_mergeh );
463 /* Line 3 and 4, pixels 0 to 16 */
465 VEC_MERGE( vec_mergel );
467 /* Line 3 and 4, pixels 16 to ( width ) */
468 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
471 VEC_MERGE( vec_mergeh );
472 VEC_MERGE( vec_mergel );
478 /* Crap, use the C version */
479 #undef VEC_NEXT_LINES
484 const int i_source_margin = p_source->p[0].i_pitch
485 - p_source->p[0].i_visible_pitch;
486 const int i_source_margin_c = p_source->p[1].i_pitch
487 - p_source->p[1].i_visible_pitch;
488 const int i_dest_margin = p_dest->p->i_pitch
489 - p_dest->p->i_visible_pitch;
491 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
492 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
495 p_line2 += p_dest->p->i_pitch;
498 p_y2 += p_source->p[Y_PLANE].i_pitch;
500 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
502 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
508 MMX_CALL( MMX_YUV420_YVYU );
511 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
516 p_y1 += i_source_margin;
517 p_y2 += i_source_margin;
518 p_u += i_source_margin_c;
519 p_v += i_source_margin_c;
520 p_line1 += i_dest_margin;
521 p_line2 += i_dest_margin;
524 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
525 /* re-enable FPU registers */
529 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
533 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
535 ** SSE2 128 bits fetch/store instructions are faster
536 ** if memory access is 16 bytes aligned
538 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
539 ((intptr_t)p_line2|(intptr_t)p_y2))) )
541 /* use faster SSE2 aligned fetch and store */
542 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
545 p_line2 += p_dest->p->i_pitch;
548 p_y2 += p_source->p[Y_PLANE].i_pitch;
550 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
552 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
554 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
559 p_y1 += i_source_margin;
560 p_y2 += i_source_margin;
561 p_u += i_source_margin_c;
562 p_v += i_source_margin_c;
563 p_line1 += i_dest_margin;
564 p_line2 += i_dest_margin;
569 /* use slower SSE2 unaligned fetch and store */
570 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
573 p_line2 += p_dest->p->i_pitch;
576 p_y2 += p_source->p[Y_PLANE].i_pitch;
578 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
580 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
582 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
587 p_y1 += i_source_margin;
588 p_y2 += i_source_margin;
589 p_u += i_source_margin_c;
590 p_v += i_source_margin_c;
591 p_line1 += i_dest_margin;
592 p_line2 += i_dest_margin;
595 /* make sure all SSE2 stores are visible thereafter */
597 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
600 /*****************************************************************************
601 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
602 *****************************************************************************/
603 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
606 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
607 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
608 uint8_t *p_u = p_source->U_PIXELS;
609 uint8_t *p_v = p_source->V_PIXELS;
613 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
614 #define VEC_NEXT_LINES( ) \
616 p_line2 += p_dest->p->i_pitch; \
618 p_y2 += p_source->p[Y_PLANE].i_pitch;
620 #define VEC_LOAD_UV( ) \
621 u_vec = vec_ld( 0, p_u ); p_u += 16; \
622 v_vec = vec_ld( 0, p_v ); p_v += 16;
624 #define VEC_MERGE( a ) \
625 uv_vec = a( u_vec, v_vec ); \
626 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
627 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
628 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
629 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
630 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
631 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
633 vector unsigned char u_vec;
634 vector unsigned char v_vec;
635 vector unsigned char uv_vec;
636 vector unsigned char y_vec;
638 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
639 ( p_filter->fmt_in.video.i_height % 2 ) ) )
641 /* Width is a multiple of 32, we take 2 lines at a time */
642 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
645 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
648 VEC_MERGE( vec_mergeh );
649 VEC_MERGE( vec_mergel );
653 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
654 ( p_filter->fmt_in.video.i_height % 4 ) ) )
656 /* Width is only a multiple of 16, we take 4 lines at a time */
657 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
659 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
661 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
664 VEC_MERGE( vec_mergeh );
665 VEC_MERGE( vec_mergel );
668 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
670 VEC_MERGE( vec_mergeh );
672 /* Line 3 and 4, pixels 0 to 16 */
674 VEC_MERGE( vec_mergel );
676 /* Line 3 and 4, pixels 16 to ( width ) */
677 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
680 VEC_MERGE( vec_mergeh );
681 VEC_MERGE( vec_mergel );
687 /* Crap, use the C version */
688 #undef VEC_NEXT_LINES
693 const int i_source_margin = p_source->p[0].i_pitch
694 - p_source->p[0].i_visible_pitch;
695 const int i_source_margin_c = p_source->p[1].i_pitch
696 - p_source->p[1].i_visible_pitch;
697 const int i_dest_margin = p_dest->p->i_pitch
698 - p_dest->p->i_visible_pitch;
700 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
701 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
704 p_line2 += p_dest->p->i_pitch;
707 p_y2 += p_source->p[Y_PLANE].i_pitch;
709 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
711 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
717 MMX_CALL( MMX_YUV420_UYVY );
720 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
725 p_y1 += i_source_margin;
726 p_y2 += i_source_margin;
727 p_u += i_source_margin_c;
728 p_v += i_source_margin_c;
729 p_line1 += i_dest_margin;
730 p_line2 += i_dest_margin;
733 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
734 /* re-enable FPU registers */
738 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
742 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
744 ** SSE2 128 bits fetch/store instructions are faster
745 ** if memory access is 16 bytes aligned
747 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
748 ((intptr_t)p_line2|(intptr_t)p_y2))) )
750 /* use faster SSE2 aligned fetch and store */
751 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
754 p_line2 += p_dest->p->i_pitch;
757 p_y2 += p_source->p[Y_PLANE].i_pitch;
759 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
761 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
763 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
768 p_y1 += i_source_margin;
769 p_y2 += i_source_margin;
770 p_u += i_source_margin_c;
771 p_v += i_source_margin_c;
772 p_line1 += i_dest_margin;
773 p_line2 += i_dest_margin;
778 /* use slower SSE2 unaligned fetch and store */
779 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
782 p_line2 += p_dest->p->i_pitch;
785 p_y2 += p_source->p[Y_PLANE].i_pitch;
787 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
789 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
791 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
796 p_y1 += i_source_margin;
797 p_y2 += i_source_margin;
798 p_u += i_source_margin_c;
799 p_v += i_source_margin_c;
800 p_line1 += i_dest_margin;
801 p_line2 += i_dest_margin;
804 /* make sure all SSE2 stores are visible thereafter */
806 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
809 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
810 /*****************************************************************************
811 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
812 *****************************************************************************/
813 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
816 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
818 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
821 /*****************************************************************************
822 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
823 *****************************************************************************/
824 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
827 uint8_t *p_line1 = p_dest->p->p_pixels +
828 p_dest->p->i_visible_lines * p_dest->p->i_pitch
829 + p_dest->p->i_pitch;
830 uint8_t *p_line2 = p_dest->p->p_pixels +
831 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
832 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
833 uint8_t *p_u = p_source->U_PIXELS;
834 uint8_t *p_v = p_source->V_PIXELS;
838 const int i_source_margin = p_source->p[0].i_pitch
839 - p_source->p[0].i_visible_pitch;
840 const int i_source_margin_c = p_source->p[1].i_pitch
841 - p_source->p[1].i_visible_pitch;
842 const int i_dest_margin = p_dest->p->i_pitch
843 - p_dest->p->i_visible_pitch;
845 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
846 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
848 p_line1 -= 3 * p_dest->p->i_pitch;
849 p_line2 -= 3 * p_dest->p->i_pitch;
852 p_y2 += p_source->p[Y_PLANE].i_pitch;
854 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
856 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
862 MMX_CALL( MMX_YUV420_UYVY );
865 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
870 p_y1 += i_source_margin;
871 p_y2 += i_source_margin;
872 p_u += i_source_margin_c;
873 p_v += i_source_margin_c;
874 p_line1 += i_dest_margin;
875 p_line2 += i_dest_margin;
878 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
879 /* re-enable FPU registers */
883 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
885 ** SSE2 128 bits fetch/store instructions are faster
886 ** if memory access is 16 bytes aligned
888 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
889 ((intptr_t)p_line2|(intptr_t)p_y2))) )
891 /* use faster SSE2 aligned fetch and store */
892 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
895 p_line2 += p_dest->p->i_pitch;
898 p_y2 += p_source->p[Y_PLANE].i_pitch;
900 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
902 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
904 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
909 p_y1 += i_source_margin;
910 p_y2 += i_source_margin;
911 p_u += i_source_margin_c;
912 p_v += i_source_margin_c;
913 p_line1 += i_dest_margin;
914 p_line2 += i_dest_margin;
919 /* use slower SSE2 unaligned fetch and store */
920 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
923 p_line2 += p_dest->p->i_pitch;
926 p_y2 += p_source->p[Y_PLANE].i_pitch;
928 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
930 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
932 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
937 p_y1 += i_source_margin;
938 p_y2 += i_source_margin;
939 p_u += i_source_margin_c;
940 p_v += i_source_margin_c;
941 p_line1 += i_dest_margin;
942 p_line2 += i_dest_margin;
945 /* make sure all SSE2 stores are visible thereafter */
947 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
949 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
951 /*****************************************************************************
952 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
953 *****************************************************************************/
954 #if defined (MODULE_NAME_IS_i420_yuy2)
955 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
958 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
959 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
960 uint8_t *p_u = p_source->U_PIXELS;
961 uint8_t *p_v = p_source->V_PIXELS;
965 const int i_source_margin = p_source->p[0].i_pitch
966 - p_source->p[0].i_visible_pitch;
967 const int i_source_margin_c = p_source->p[1].i_pitch
968 - p_source->p[1].i_visible_pitch;
969 const int i_dest_margin = p_dest->p->i_pitch
970 - p_dest->p->i_visible_pitch;
972 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
975 p_line2 += p_dest->p->i_pitch;
978 p_y2 += p_source->p[Y_PLANE].i_pitch;
980 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
986 p_y1 += i_source_margin;
987 p_y2 += i_source_margin;
988 p_u += i_source_margin_c;
989 p_v += i_source_margin_c;
990 p_line1 += i_dest_margin;
991 p_line2 += i_dest_margin;