X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=modules%2Fvideo_chroma%2Fi420_yuy2.c;h=d9ffebee1fba48e41218e2b9afe9501766ed2892;hb=a5c83dda798f93cc7a76bbb50d89352117e6ec46;hp=8a76fcb46f61cd64021fbf2a737117007f01b3a4;hpb=9acaa4b2e175fb575070d684acdb178bc7a542d2;p=vlc diff --git a/modules/video_chroma/i420_yuy2.c b/modules/video_chroma/i420_yuy2.c index 8a76fcb46f..d9ffebee1f 100644 --- a/modules/video_chroma/i420_yuy2.c +++ b/modules/video_chroma/i420_yuy2.c @@ -25,10 +25,14 @@ /***************************************************************************** * Preamble *****************************************************************************/ -#include /* strerror() */ -#include /* malloc(), free() */ -#include +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include #include #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H) @@ -54,15 +58,21 @@ *****************************************************************************/ static int Activate ( vlc_object_t * ); -static void I420_YUY2 ( vout_thread_t *, picture_t *, picture_t * ); -static void I420_YVYU ( vout_thread_t *, picture_t *, picture_t * ); -static void I420_UYVY ( vout_thread_t *, picture_t *, picture_t * ); +static void I420_YUY2 ( filter_t *, picture_t *, picture_t * ); +static void I420_YVYU ( filter_t *, picture_t *, picture_t * ); +static void I420_UYVY ( filter_t *, picture_t *, picture_t * ); +static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * ); +static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * ); +static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * ); #if !defined (MODULE_NAME_IS_i420_yuy2_altivec) -static void I420_IUYV ( vout_thread_t *, picture_t *, picture_t * ); -static void I420_cyuv ( vout_thread_t *, picture_t *, picture_t * ); +static void I420_IUYV ( filter_t *, picture_t *, picture_t * ); +static void I420_cyuv ( filter_t *, picture_t *, picture_t * ); +static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * ); +static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * ); #endif #if defined (MODULE_NAME_IS_i420_yuy2) -static void I420_Y211 ( vout_thread_t *, picture_t *, picture_t * ); +static void I420_Y211 ( filter_t *, picture_t *, picture_t * ); +static picture_t *I420_Y211_Filter ( filter_t *, picture_t * ); #endif #ifdef MODULE_NAME_IS_i420_yuy2_mmx @@ -76,20 +86,20 @@ static const uint64_t i_80w = 0x0000000080808080ULL; *****************************************************************************/ vlc_module_begin(); #if defined (MODULE_NAME_IS_i420_yuy2) - set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) ); - set_capability( "chroma", 80 ); + set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) ); + set_capability( "video filter2", 80 ); #elif defined (MODULE_NAME_IS_i420_yuy2_mmx) - set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) ); - set_capability( "chroma", 100 ); + set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) ); + set_capability( "video filter2", 100 ); add_requirement( MMX ); #elif defined (MODULE_NAME_IS_i420_yuy2_sse2) - set_description( _("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) ); - set_capability( "chroma", 120 ); + set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) ); + set_capability( "video filter2", 120 ); add_requirement( SSE2 ); #elif defined (MODULE_NAME_IS_i420_yuy2_altivec) set_description( _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) ); - set_capability( "chroma", 100 ); + set_capability( "video filter2", 100 ); add_requirement( ALTIVEC ); #endif set_callbacks( Activate, NULL ); @@ -102,47 +112,48 @@ vlc_module_end(); *****************************************************************************/ static int Activate( vlc_object_t *p_this ) { - vout_thread_t *p_vout = (vout_thread_t *)p_this; + filter_t *p_filter = (filter_t *)p_this; - if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 ) + if( p_filter->fmt_in.video.i_width & 1 + || p_filter->fmt_in.video.i_height & 1 ) { return -1; } - switch( p_vout->render.i_chroma ) + switch( p_filter->fmt_in.video.i_chroma ) { case VLC_FOURCC('Y','V','1','2'): case VLC_FOURCC('I','4','2','0'): case VLC_FOURCC('I','Y','U','V'): - switch( p_vout->output.i_chroma ) + switch( p_filter->fmt_out.video.i_chroma ) { case VLC_FOURCC('Y','U','Y','2'): case VLC_FOURCC('Y','U','N','V'): - p_vout->chroma.pf_convert = I420_YUY2; + p_filter->pf_video_filter = I420_YUY2_Filter; break; case VLC_FOURCC('Y','V','Y','U'): - p_vout->chroma.pf_convert = I420_YVYU; + p_filter->pf_video_filter = I420_YVYU_Filter; break; case VLC_FOURCC('U','Y','V','Y'): case VLC_FOURCC('U','Y','N','V'): case VLC_FOURCC('Y','4','2','2'): - p_vout->chroma.pf_convert = I420_UYVY; + p_filter->pf_video_filter = I420_UYVY_Filter; break; #if !defined (MODULE_NAME_IS_i420_yuy2_altivec) case VLC_FOURCC('I','U','Y','V'): - p_vout->chroma.pf_convert = I420_IUYV; + p_filter->pf_video_filter = I420_IUYV_Filter; break; case VLC_FOURCC('c','y','u','v'): - p_vout->chroma.pf_convert = I420_cyuv; + p_filter->pf_video_filter = I420_cyuv_Filter; break; #endif #if defined (MODULE_NAME_IS_i420_yuy2) case VLC_FOURCC('Y','2','1','1'): - p_vout->chroma.pf_convert = I420_Y211; + p_filter->pf_video_filter = I420_Y211_Filter; break; #endif @@ -169,11 +180,23 @@ static inline unsigned long long read_cycles(void) #endif /* Following functions are local */ + +VIDEO_FILTER_WRAPPER( I420_YUY2 ) +VIDEO_FILTER_WRAPPER( I420_YVYU ) +VIDEO_FILTER_WRAPPER( I420_UYVY ) +#if !defined (MODULE_NAME_IS_i420_yuy2_altivec) +VIDEO_FILTER_WRAPPER( I420_IUYV ) +VIDEO_FILTER_WRAPPER( I420_cyuv ) +#endif +#if defined (MODULE_NAME_IS_i420_yuy2) +VIDEO_FILTER_WRAPPER( I420_Y211 ) +#endif + /***************************************************************************** * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2 *****************************************************************************/ -static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, - picture_t *p_dest ) +static void I420_YUY2( filter_t *p_filter, picture_t *p_source, + picture_t *p_dest ) { uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels; uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS; @@ -207,14 +230,14 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, vector unsigned char uv_vec; vector unsigned char y_vec; - if( !( ( p_vout->render.i_width % 32 ) | - ( p_vout->render.i_height % 2 ) ) ) + if( !( ( p_filter->fmt_in.video.i_width % 32 ) | + ( p_filter->fmt_in.video.i_height % 2 ) ) ) { /* Width is a multiple of 32, we take 2 lines at a time */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { VEC_NEXT_LINES( ); - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -222,15 +245,15 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, } } } - else if( !( ( p_vout->render.i_width % 16 ) | - ( p_vout->render.i_height % 4 ) ) ) + else if( !( ( p_filter->fmt_in.video.i_width % 16 ) | + ( p_filter->fmt_in.video.i_height % 4 ) ) ) { /* Width is only a multiple of 16, we take 4 lines at a time */ - for( i_y = p_vout->render.i_height / 4 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; ) { /* Line 1 and 2, pixels 0 to ( width - 16 ) */ VEC_NEXT_LINES( ); - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -246,7 +269,7 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, VEC_MERGE( vec_mergel ); /* Line 3 and 4, pixels 16 to ( width ) */ - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -270,7 +293,7 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, - p_dest->p->i_visible_pitch; #if !defined(MODULE_NAME_IS_i420_yuy2_sse2) - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -279,7 +302,7 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, p_y2 += p_source->p[Y_PLANE].i_pitch; #if !defined (MODULE_NAME_IS_i420_yuy2_mmx) - for( i_x = p_vout->render.i_width / 8; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; ) { C_YUV420_YUYV( ); C_YUV420_YUYV( ); @@ -287,12 +310,12 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, C_YUV420_YUYV( ); } #else - for( i_x = p_vout->render.i_width / 8 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; ) { MMX_CALL( MMX_YUV420_YUYV ); } #endif - for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; ) { C_YUV420_YUYV( ); } @@ -307,7 +330,7 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, #if defined (MODULE_NAME_IS_i420_yuy2_mmx) /* re-enable FPU registers */ - __asm__ __volatile__ ( "emms" ); + MMX_END; #endif #if defined (MODULE_NAME_IS_i420_yuy2_altivec) @@ -316,15 +339,15 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, #else // defined(MODULE_NAME_IS_i420_yuy2_sse2) /* - ** SSE2 128 bits fetch/store instructions are faster + ** SSE2 128 bits fetch/store instructions are faster ** if memory access is 16 bytes aligned */ if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch| - ((int)p_line2|(int)p_y2))) ) + ((intptr_t)p_line2|(intptr_t)p_y2))) ) { /* use faster SSE2 aligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -332,11 +355,11 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_YUYV( ); } @@ -348,13 +371,11 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, p_line1 += i_dest_margin; p_line2 += i_dest_margin; } - /* make sure all SSE2 stores are visible thereafter */ - __asm__ __volatile__ ( "sfence" ); } else { /* use slower SSE2 unaligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -362,11 +383,11 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_YUYV( ); } @@ -379,6 +400,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, p_line2 += i_dest_margin; } } + /* make sure all SSE2 stores are visible thereafter */ + SSE2_END; #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2) } @@ -386,8 +409,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, /***************************************************************************** * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2 *****************************************************************************/ -static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, - picture_t *p_dest ) +static void I420_YVYU( filter_t *p_filter, picture_t *p_source, + picture_t *p_dest ) { uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels; uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS; @@ -421,14 +444,14 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, vector unsigned char vu_vec; vector unsigned char y_vec; - if( !( ( p_vout->render.i_width % 32 ) | - ( p_vout->render.i_height % 2 ) ) ) + if( !( ( p_filter->fmt_in.video.i_width % 32 ) | + ( p_filter->fmt_in.video.i_height % 2 ) ) ) { /* Width is a multiple of 32, we take 2 lines at a time */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { VEC_NEXT_LINES( ); - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -436,15 +459,15 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, } } } - else if( !( ( p_vout->render.i_width % 16 ) | - ( p_vout->render.i_height % 4 ) ) ) + else if( !( ( p_filter->fmt_in.video.i_width % 16 ) | + ( p_filter->fmt_in.video.i_height % 4 ) ) ) { /* Width is only a multiple of 16, we take 4 lines at a time */ - for( i_y = p_vout->render.i_height / 4 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; ) { /* Line 1 and 2, pixels 0 to ( width - 16 ) */ VEC_NEXT_LINES( ); - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -460,7 +483,7 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, VEC_MERGE( vec_mergel ); /* Line 3 and 4, pixels 16 to ( width ) */ - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -484,7 +507,7 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, - p_dest->p->i_visible_pitch; #if !defined(MODULE_NAME_IS_i420_yuy2_sse2) - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -492,7 +515,7 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 8 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; ) { #if !defined (MODULE_NAME_IS_i420_yuy2_mmx) C_YUV420_YVYU( ); @@ -503,7 +526,7 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, MMX_CALL( MMX_YUV420_YVYU ); #endif } - for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; ) { C_YUV420_YVYU( ); } @@ -518,7 +541,7 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, #if defined (MODULE_NAME_IS_i420_yuy2_mmx) /* re-enable FPU registers */ - __asm__ __volatile__ ( "emms" ); + MMX_END; #endif #if defined (MODULE_NAME_IS_i420_yuy2_altivec) @@ -527,14 +550,14 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, #else // defined(MODULE_NAME_IS_i420_yuy2_sse2) /* - ** SSE2 128 bits fetch/store instructions are faster + ** SSE2 128 bits fetch/store instructions are faster ** if memory access is 16 bytes aligned */ if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch| - ((int)p_line2|(int)p_y2))) ) + ((intptr_t)p_line2|(intptr_t)p_y2))) ) { /* use faster SSE2 aligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -542,11 +565,11 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_YVYU( ); } @@ -558,13 +581,11 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, p_line1 += i_dest_margin; p_line2 += i_dest_margin; } - /* make sure all SSE2 stores are visible thereafter */ - __asm__ __volatile__ ( "sfence" ); } else { /* use slower SSE2 unaligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -572,11 +593,11 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_YVYU( ); } @@ -589,14 +610,16 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, p_line2 += i_dest_margin; } } + /* make sure all SSE2 stores are visible thereafter */ + SSE2_END; #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2) } /***************************************************************************** * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2 *****************************************************************************/ -static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, - picture_t *p_dest ) +static void I420_UYVY( filter_t *p_filter, picture_t *p_source, + picture_t *p_dest ) { uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels; uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS; @@ -630,14 +653,14 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, vector unsigned char uv_vec; vector unsigned char y_vec; - if( !( ( p_vout->render.i_width % 32 ) | - ( p_vout->render.i_height % 2 ) ) ) + if( !( ( p_filter->fmt_in.video.i_width % 32 ) | + ( p_filter->fmt_in.video.i_height % 2 ) ) ) { /* Width is a multiple of 32, we take 2 lines at a time */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { VEC_NEXT_LINES( ); - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -645,15 +668,15 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, } } } - else if( !( ( p_vout->render.i_width % 16 ) | - ( p_vout->render.i_height % 4 ) ) ) + else if( !( ( p_filter->fmt_in.video.i_width % 16 ) | + ( p_filter->fmt_in.video.i_height % 4 ) ) ) { /* Width is only a multiple of 16, we take 4 lines at a time */ - for( i_y = p_vout->render.i_height / 4 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; ) { /* Line 1 and 2, pixels 0 to ( width - 16 ) */ VEC_NEXT_LINES( ); - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -669,7 +692,7 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, VEC_MERGE( vec_mergel ); /* Line 3 and 4, pixels 16 to ( width ) */ - for( i_x = p_vout->render.i_width / 32 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; ) { VEC_LOAD_UV( ); VEC_MERGE( vec_mergeh ); @@ -693,7 +716,7 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, - p_dest->p->i_visible_pitch; #if !defined(MODULE_NAME_IS_i420_yuy2_sse2) - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -701,7 +724,7 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 8 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; ) { #if !defined (MODULE_NAME_IS_i420_yuy2_mmx) C_YUV420_UYVY( ); @@ -712,7 +735,7 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, MMX_CALL( MMX_YUV420_UYVY ); #endif } - for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x--; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; ) { C_YUV420_UYVY( ); } @@ -727,7 +750,7 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, #if defined (MODULE_NAME_IS_i420_yuy2_mmx) /* re-enable FPU registers */ - __asm__ __volatile__ ( "emms" ); + MMX_END; #endif #if defined (MODULE_NAME_IS_i420_yuy2_altivec) @@ -736,14 +759,14 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, #else // defined(MODULE_NAME_IS_i420_yuy2_sse2) /* - ** SSE2 128 bits fetch/store instructions are faster + ** SSE2 128 bits fetch/store instructions are faster ** if memory access is 16 bytes aligned */ if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch| - ((int)p_line2|(int)p_y2))) ) + ((intptr_t)p_line2|(intptr_t)p_y2))) ) { /* use faster SSE2 aligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -751,11 +774,11 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_UYVY( ); } @@ -767,13 +790,11 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, p_line1 += i_dest_margin; p_line2 += i_dest_margin; } - /* make sure all SSE2 stores are visible thereafter */ - __asm__ __volatile__ ( "sfence" ); } else { /* use slower SSE2 unaligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -781,11 +802,11 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_UYVY( ); } @@ -798,6 +819,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, p_line2 += i_dest_margin; } } + /* make sure all SSE2 stores are visible thereafter */ + SSE2_END; #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2) } @@ -805,18 +828,19 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, /***************************************************************************** * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2 *****************************************************************************/ -static void I420_IUYV( vout_thread_t *p_vout, picture_t *p_source, - picture_t *p_dest ) +static void I420_IUYV( filter_t *p_filter, picture_t *p_source, + picture_t *p_dest ) { + VLC_UNUSED(p_source); VLC_UNUSED(p_dest); /* FIXME: TODO ! */ - msg_Err( p_vout, "I420_IUYV unimplemented, please harass " ); + msg_Err( p_filter, "I420_IUYV unimplemented, please harass " ); } /***************************************************************************** * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2 *****************************************************************************/ -static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, - picture_t *p_dest ) +static void I420_cyuv( filter_t *p_filter, picture_t *p_source, + picture_t *p_dest ) { uint8_t *p_line1 = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch @@ -837,7 +861,7 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, - p_dest->p->i_visible_pitch; #if !defined(MODULE_NAME_IS_i420_yuy2_sse2) - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 -= 3 * p_dest->p->i_pitch; p_line2 -= 3 * p_dest->p->i_pitch; @@ -845,7 +869,7 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 8 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; ) { #if !defined (MODULE_NAME_IS_i420_yuy2_mmx) C_YUV420_UYVY( ); @@ -856,7 +880,7 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, MMX_CALL( MMX_YUV420_UYVY ); #endif } - for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; ) { C_YUV420_UYVY( ); } @@ -871,19 +895,19 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, #if defined (MODULE_NAME_IS_i420_yuy2_mmx) /* re-enable FPU registers */ - __asm__ __volatile__ ( "emms" ); + MMX_END; #endif #else // defined(MODULE_NAME_IS_i420_yuy2_sse2) /* - ** SSE2 128 bits fetch/store instructions are faster + ** SSE2 128 bits fetch/store instructions are faster ** if memory access is 16 bytes aligned */ if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch| - ((int)p_line2|(int)p_y2))) ) + ((intptr_t)p_line2|(intptr_t)p_y2))) ) { /* use faster SSE2 aligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -891,11 +915,11 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_UYVY( ); } @@ -907,13 +931,11 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, p_line1 += i_dest_margin; p_line2 += i_dest_margin; } - /* make sure all SSE2 stores are visible thereafter */ - __asm__ __volatile__ ( "sfence" ); } else { /* use slower SSE2 unaligned fetch and store */ - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -921,11 +943,11 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; ) { SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED ); } - for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) + for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; ) { C_YUV420_UYVY( ); } @@ -938,6 +960,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, p_line2 += i_dest_margin; } } + /* make sure all SSE2 stores are visible thereafter */ + SSE2_END; #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2) } #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec) @@ -946,8 +970,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source, * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1 *****************************************************************************/ #if defined (MODULE_NAME_IS_i420_yuy2) -static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source, - picture_t *p_dest ) +static void I420_Y211( filter_t *p_filter, picture_t *p_source, + picture_t *p_dest ) { uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels; uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS; @@ -963,7 +987,7 @@ static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source, const int i_dest_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch; - for( i_y = p_vout->render.i_height / 2 ; i_y-- ; ) + for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; ) { p_line1 = p_line2; p_line2 += p_dest->p->i_pitch; @@ -971,7 +995,7 @@ static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source, p_y1 = p_y2; p_y2 += p_source->p[Y_PLANE].i_pitch; - for( i_x = p_vout->render.i_width / 8 ; i_x-- ; ) + for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; ) { C_YUV420_Y211( ); C_YUV420_Y211( );