1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
38 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
42 #include "i420_yuy2.h"
44 #define SRC_FOURCC "I420,IYUV,YV12"
46 #if defined (MODULE_NAME_IS_i420_yuy2)
47 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
48 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
49 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
50 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
51 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
52 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
53 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
56 /*****************************************************************************
57 * Local and extern prototypes.
58 *****************************************************************************/
59 static int Activate ( vlc_object_t * );
61 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
62 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
63 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
64 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
65 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
66 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
67 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
68 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
69 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
70 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
71 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
73 #if defined (MODULE_NAME_IS_i420_yuy2)
74 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
75 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
78 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
79 /* Initialize MMX-specific constants */
80 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
81 static const uint64_t i_80w = 0x0000000080808080ULL;
84 /*****************************************************************************
86 *****************************************************************************/
88 #if defined (MODULE_NAME_IS_i420_yuy2)
89 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
90 set_capability( "video filter2", 80 )
91 # define vlc_CPU_capable() (true)
92 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
93 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
94 set_capability( "video filter2", 160 )
95 # define vlc_CPU_capable() vlc_CPU_MMX()
96 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
97 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
98 set_capability( "video filter2", 250 )
99 # define vlc_CPU_capable() vlc_CPU_SSE2()
100 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
102 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
103 set_capability( "video filter2", 250 )
104 # define vlc_CPU_capable() vlc_CPU_ALTIVEC()
106 set_callbacks( Activate, NULL )
109 /*****************************************************************************
110 * Activate: allocate a chroma function
111 *****************************************************************************
112 * This function allocates and initializes a chroma function
113 *****************************************************************************/
114 static int Activate( vlc_object_t *p_this )
116 filter_t *p_filter = (filter_t *)p_this;
118 if( !vlc_CPU_capable() )
120 if( p_filter->fmt_in.video.i_width & 1
121 || p_filter->fmt_in.video.i_height & 1 )
126 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
127 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
130 switch( p_filter->fmt_in.video.i_chroma )
134 switch( p_filter->fmt_out.video.i_chroma )
137 p_filter->pf_video_filter = I420_YUY2_Filter;
141 p_filter->pf_video_filter = I420_YVYU_Filter;
145 p_filter->pf_video_filter = I420_UYVY_Filter;
147 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
148 case VLC_FOURCC('I','U','Y','V'):
149 p_filter->pf_video_filter = I420_IUYV_Filter;
153 p_filter->pf_video_filter = I420_cyuv_Filter;
157 #if defined (MODULE_NAME_IS_i420_yuy2)
159 p_filter->pf_video_filter = I420_Y211_Filter;
176 static inline unsigned long long read_cycles(void)
178 unsigned long long v;
179 __asm__ __volatile__("rdtsc" : "=A" (v): );
185 /* Following functions are local */
187 VIDEO_FILTER_WRAPPER( I420_YUY2 )
188 VIDEO_FILTER_WRAPPER( I420_YVYU )
189 VIDEO_FILTER_WRAPPER( I420_UYVY )
190 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
191 VIDEO_FILTER_WRAPPER( I420_IUYV )
192 VIDEO_FILTER_WRAPPER( I420_cyuv )
194 #if defined (MODULE_NAME_IS_i420_yuy2)
195 VIDEO_FILTER_WRAPPER( I420_Y211 )
198 /*****************************************************************************
199 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
200 *****************************************************************************/
201 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
204 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
205 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
206 uint8_t *p_u = p_source->U_PIXELS;
207 uint8_t *p_v = p_source->V_PIXELS;
211 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
212 #define VEC_NEXT_LINES( ) \
214 p_line2 += p_dest->p->i_pitch; \
216 p_y2 += p_source->p[Y_PLANE].i_pitch;
218 #define VEC_LOAD_UV( ) \
219 u_vec = vec_ld( 0, p_u ); p_u += 16; \
220 v_vec = vec_ld( 0, p_v ); p_v += 16;
222 #define VEC_MERGE( a ) \
223 uv_vec = a( u_vec, v_vec ); \
224 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
225 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
226 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
227 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
228 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
229 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
231 vector unsigned char u_vec;
232 vector unsigned char v_vec;
233 vector unsigned char uv_vec;
234 vector unsigned char y_vec;
236 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
237 ( p_filter->fmt_in.video.i_height % 2 ) ) )
239 /* Width is a multiple of 32, we take 2 lines at a time */
240 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
243 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
246 VEC_MERGE( vec_mergeh );
247 VEC_MERGE( vec_mergel );
251 #warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
253 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
254 ( p_filter->fmt_in.video.i_height % 4 ) ) )
256 /* Width is only a multiple of 16, we take 4 lines at a time */
257 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
259 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
261 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
264 VEC_MERGE( vec_mergeh );
265 VEC_MERGE( vec_mergel );
268 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
270 VEC_MERGE( vec_mergeh );
272 /* Line 3 and 4, pixels 0 to 16 */
274 VEC_MERGE( vec_mergel );
276 /* Line 3 and 4, pixels 16 to ( width ) */
277 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
280 VEC_MERGE( vec_mergeh );
281 VEC_MERGE( vec_mergel );
288 /* Crap, use the C version */
289 #undef VEC_NEXT_LINES
294 const int i_source_margin = p_source->p[0].i_pitch
295 - p_source->p[0].i_visible_pitch;
296 const int i_source_margin_c = p_source->p[1].i_pitch
297 - p_source->p[1].i_visible_pitch;
298 const int i_dest_margin = p_dest->p->i_pitch
299 - p_dest->p->i_visible_pitch;
301 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
302 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
305 p_line2 += p_dest->p->i_pitch;
308 p_y2 += p_source->p[Y_PLANE].i_pitch;
310 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
311 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
319 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
321 MMX_CALL( MMX_YUV420_YUYV );
324 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
329 p_y1 += i_source_margin;
330 p_y2 += i_source_margin;
331 p_u += i_source_margin_c;
332 p_v += i_source_margin_c;
333 p_line1 += i_dest_margin;
334 p_line2 += i_dest_margin;
337 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
338 /* re-enable FPU registers */
342 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
346 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
348 ** SSE2 128 bits fetch/store instructions are faster
349 ** if memory access is 16 bytes aligned
352 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
353 ((intptr_t)p_line2|(intptr_t)p_y2))) )
355 /* use faster SSE2 aligned fetch and store */
356 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
359 p_line2 += p_dest->p->i_pitch;
362 p_y2 += p_source->p[Y_PLANE].i_pitch;
364 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
366 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
368 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
373 p_y1 += i_source_margin;
374 p_y2 += i_source_margin;
375 p_u += i_source_margin_c;
376 p_v += i_source_margin_c;
377 p_line1 += i_dest_margin;
378 p_line2 += i_dest_margin;
383 /* use slower SSE2 unaligned fetch and store */
384 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
387 p_line2 += p_dest->p->i_pitch;
390 p_y2 += p_source->p[Y_PLANE].i_pitch;
392 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
394 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
396 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
401 p_y1 += i_source_margin;
402 p_y2 += i_source_margin;
403 p_u += i_source_margin_c;
404 p_v += i_source_margin_c;
405 p_line1 += i_dest_margin;
406 p_line2 += i_dest_margin;
409 /* make sure all SSE2 stores are visible thereafter */
412 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
415 /*****************************************************************************
416 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
417 *****************************************************************************/
418 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
421 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
422 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
423 uint8_t *p_u = p_source->U_PIXELS;
424 uint8_t *p_v = p_source->V_PIXELS;
428 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
429 #define VEC_NEXT_LINES( ) \
431 p_line2 += p_dest->p->i_pitch; \
433 p_y2 += p_source->p[Y_PLANE].i_pitch;
435 #define VEC_LOAD_UV( ) \
436 u_vec = vec_ld( 0, p_u ); p_u += 16; \
437 v_vec = vec_ld( 0, p_v ); p_v += 16;
439 #define VEC_MERGE( a ) \
440 vu_vec = a( v_vec, u_vec ); \
441 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
442 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
443 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
444 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
445 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
446 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
448 vector unsigned char u_vec;
449 vector unsigned char v_vec;
450 vector unsigned char vu_vec;
451 vector unsigned char y_vec;
453 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
454 ( p_filter->fmt_in.video.i_height % 2 ) ) )
456 /* Width is a multiple of 32, we take 2 lines at a time */
457 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
460 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
463 VEC_MERGE( vec_mergeh );
464 VEC_MERGE( vec_mergel );
468 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
469 ( p_filter->fmt_in.video.i_height % 4 ) ) )
471 /* Width is only a multiple of 16, we take 4 lines at a time */
472 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
474 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
476 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
479 VEC_MERGE( vec_mergeh );
480 VEC_MERGE( vec_mergel );
483 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
485 VEC_MERGE( vec_mergeh );
487 /* Line 3 and 4, pixels 0 to 16 */
489 VEC_MERGE( vec_mergel );
491 /* Line 3 and 4, pixels 16 to ( width ) */
492 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
495 VEC_MERGE( vec_mergeh );
496 VEC_MERGE( vec_mergel );
502 /* Crap, use the C version */
503 #undef VEC_NEXT_LINES
508 const int i_source_margin = p_source->p[0].i_pitch
509 - p_source->p[0].i_visible_pitch;
510 const int i_source_margin_c = p_source->p[1].i_pitch
511 - p_source->p[1].i_visible_pitch;
512 const int i_dest_margin = p_dest->p->i_pitch
513 - p_dest->p->i_visible_pitch;
515 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
516 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
519 p_line2 += p_dest->p->i_pitch;
522 p_y2 += p_source->p[Y_PLANE].i_pitch;
524 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
526 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
532 MMX_CALL( MMX_YUV420_YVYU );
535 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
540 p_y1 += i_source_margin;
541 p_y2 += i_source_margin;
542 p_u += i_source_margin_c;
543 p_v += i_source_margin_c;
544 p_line1 += i_dest_margin;
545 p_line2 += i_dest_margin;
548 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
549 /* re-enable FPU registers */
553 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
557 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
559 ** SSE2 128 bits fetch/store instructions are faster
560 ** if memory access is 16 bytes aligned
562 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
563 ((intptr_t)p_line2|(intptr_t)p_y2))) )
565 /* use faster SSE2 aligned fetch and store */
566 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
569 p_line2 += p_dest->p->i_pitch;
572 p_y2 += p_source->p[Y_PLANE].i_pitch;
574 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
576 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
578 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
583 p_y1 += i_source_margin;
584 p_y2 += i_source_margin;
585 p_u += i_source_margin_c;
586 p_v += i_source_margin_c;
587 p_line1 += i_dest_margin;
588 p_line2 += i_dest_margin;
593 /* use slower SSE2 unaligned fetch and store */
594 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
597 p_line2 += p_dest->p->i_pitch;
600 p_y2 += p_source->p[Y_PLANE].i_pitch;
602 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
604 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
606 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
611 p_y1 += i_source_margin;
612 p_y2 += i_source_margin;
613 p_u += i_source_margin_c;
614 p_v += i_source_margin_c;
615 p_line1 += i_dest_margin;
616 p_line2 += i_dest_margin;
619 /* make sure all SSE2 stores are visible thereafter */
621 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
624 /*****************************************************************************
625 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
626 *****************************************************************************/
627 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
630 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
631 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
632 uint8_t *p_u = p_source->U_PIXELS;
633 uint8_t *p_v = p_source->V_PIXELS;
637 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
638 #define VEC_NEXT_LINES( ) \
640 p_line2 += p_dest->p->i_pitch; \
642 p_y2 += p_source->p[Y_PLANE].i_pitch;
644 #define VEC_LOAD_UV( ) \
645 u_vec = vec_ld( 0, p_u ); p_u += 16; \
646 v_vec = vec_ld( 0, p_v ); p_v += 16;
648 #define VEC_MERGE( a ) \
649 uv_vec = a( u_vec, v_vec ); \
650 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
651 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
652 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
653 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
654 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
655 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
657 vector unsigned char u_vec;
658 vector unsigned char v_vec;
659 vector unsigned char uv_vec;
660 vector unsigned char y_vec;
662 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
663 ( p_filter->fmt_in.video.i_height % 2 ) ) )
665 /* Width is a multiple of 32, we take 2 lines at a time */
666 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
669 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
672 VEC_MERGE( vec_mergeh );
673 VEC_MERGE( vec_mergel );
677 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
678 ( p_filter->fmt_in.video.i_height % 4 ) ) )
680 /* Width is only a multiple of 16, we take 4 lines at a time */
681 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
683 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
685 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
688 VEC_MERGE( vec_mergeh );
689 VEC_MERGE( vec_mergel );
692 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
694 VEC_MERGE( vec_mergeh );
696 /* Line 3 and 4, pixels 0 to 16 */
698 VEC_MERGE( vec_mergel );
700 /* Line 3 and 4, pixels 16 to ( width ) */
701 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
704 VEC_MERGE( vec_mergeh );
705 VEC_MERGE( vec_mergel );
711 /* Crap, use the C version */
712 #undef VEC_NEXT_LINES
717 const int i_source_margin = p_source->p[0].i_pitch
718 - p_source->p[0].i_visible_pitch;
719 const int i_source_margin_c = p_source->p[1].i_pitch
720 - p_source->p[1].i_visible_pitch;
721 const int i_dest_margin = p_dest->p->i_pitch
722 - p_dest->p->i_visible_pitch;
724 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
725 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
728 p_line2 += p_dest->p->i_pitch;
731 p_y2 += p_source->p[Y_PLANE].i_pitch;
733 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
735 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
741 MMX_CALL( MMX_YUV420_UYVY );
744 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
749 p_y1 += i_source_margin;
750 p_y2 += i_source_margin;
751 p_u += i_source_margin_c;
752 p_v += i_source_margin_c;
753 p_line1 += i_dest_margin;
754 p_line2 += i_dest_margin;
757 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
758 /* re-enable FPU registers */
762 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
766 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
768 ** SSE2 128 bits fetch/store instructions are faster
769 ** if memory access is 16 bytes aligned
771 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
772 ((intptr_t)p_line2|(intptr_t)p_y2))) )
774 /* use faster SSE2 aligned fetch and store */
775 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
778 p_line2 += p_dest->p->i_pitch;
781 p_y2 += p_source->p[Y_PLANE].i_pitch;
783 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
785 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
787 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
792 p_y1 += i_source_margin;
793 p_y2 += i_source_margin;
794 p_u += i_source_margin_c;
795 p_v += i_source_margin_c;
796 p_line1 += i_dest_margin;
797 p_line2 += i_dest_margin;
802 /* use slower SSE2 unaligned fetch and store */
803 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
806 p_line2 += p_dest->p->i_pitch;
809 p_y2 += p_source->p[Y_PLANE].i_pitch;
811 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
813 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
815 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
820 p_y1 += i_source_margin;
821 p_y2 += i_source_margin;
822 p_u += i_source_margin_c;
823 p_v += i_source_margin_c;
824 p_line1 += i_dest_margin;
825 p_line2 += i_dest_margin;
828 /* make sure all SSE2 stores are visible thereafter */
830 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
833 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
834 /*****************************************************************************
835 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
836 *****************************************************************************/
837 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
840 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
842 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
845 /*****************************************************************************
846 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
847 *****************************************************************************/
848 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
851 uint8_t *p_line1 = p_dest->p->p_pixels +
852 p_dest->p->i_visible_lines * p_dest->p->i_pitch
853 + p_dest->p->i_pitch;
854 uint8_t *p_line2 = p_dest->p->p_pixels +
855 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
856 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
857 uint8_t *p_u = p_source->U_PIXELS;
858 uint8_t *p_v = p_source->V_PIXELS;
862 const int i_source_margin = p_source->p[0].i_pitch
863 - p_source->p[0].i_visible_pitch;
864 const int i_source_margin_c = p_source->p[1].i_pitch
865 - p_source->p[1].i_visible_pitch;
866 const int i_dest_margin = p_dest->p->i_pitch
867 - p_dest->p->i_visible_pitch;
869 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
870 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
872 p_line1 -= 3 * p_dest->p->i_pitch;
873 p_line2 -= 3 * p_dest->p->i_pitch;
876 p_y2 += p_source->p[Y_PLANE].i_pitch;
878 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
880 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
886 MMX_CALL( MMX_YUV420_UYVY );
889 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
894 p_y1 += i_source_margin;
895 p_y2 += i_source_margin;
896 p_u += i_source_margin_c;
897 p_v += i_source_margin_c;
898 p_line1 += i_dest_margin;
899 p_line2 += i_dest_margin;
902 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
903 /* re-enable FPU registers */
907 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
909 ** SSE2 128 bits fetch/store instructions are faster
910 ** if memory access is 16 bytes aligned
912 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
913 ((intptr_t)p_line2|(intptr_t)p_y2))) )
915 /* use faster SSE2 aligned fetch and store */
916 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
919 p_line2 += p_dest->p->i_pitch;
922 p_y2 += p_source->p[Y_PLANE].i_pitch;
924 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
926 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
928 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
933 p_y1 += i_source_margin;
934 p_y2 += i_source_margin;
935 p_u += i_source_margin_c;
936 p_v += i_source_margin_c;
937 p_line1 += i_dest_margin;
938 p_line2 += i_dest_margin;
943 /* use slower SSE2 unaligned fetch and store */
944 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
947 p_line2 += p_dest->p->i_pitch;
950 p_y2 += p_source->p[Y_PLANE].i_pitch;
952 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
954 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
956 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
961 p_y1 += i_source_margin;
962 p_y2 += i_source_margin;
963 p_u += i_source_margin_c;
964 p_v += i_source_margin_c;
965 p_line1 += i_dest_margin;
966 p_line2 += i_dest_margin;
969 /* make sure all SSE2 stores are visible thereafter */
971 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
973 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
975 /*****************************************************************************
976 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
977 *****************************************************************************/
978 #if defined (MODULE_NAME_IS_i420_yuy2)
979 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
982 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
983 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
984 uint8_t *p_u = p_source->U_PIXELS;
985 uint8_t *p_v = p_source->V_PIXELS;
989 const int i_source_margin = p_source->p[0].i_pitch
990 - p_source->p[0].i_visible_pitch;
991 const int i_source_margin_c = p_source->p[1].i_pitch
992 - p_source->p[1].i_visible_pitch;
993 const int i_dest_margin = p_dest->p->i_pitch
994 - p_dest->p->i_visible_pitch;
996 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
999 p_line2 += p_dest->p->i_pitch;
1002 p_y2 += p_source->p[Y_PLANE].i_pitch;
1004 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
1010 p_y1 += i_source_margin;
1011 p_y2 += i_source_margin;
1012 p_u += i_source_margin_c;
1013 p_v += i_source_margin_c;
1014 p_line1 += i_dest_margin;
1015 p_line2 += i_dest_margin;