* i420_yuy2.c : YUV to YUV conversion module for vlc
*****************************************************************************
* Copyright (C) 2000, 2001 VideoLAN
- * $Id: i420_yuy2.c,v 1.6 2004/01/26 16:54:56 titer Exp $
+ * $Id$
*
* Authors: Samuel Hocevar <sam@zoy.org>
*
#include <vlc/vlc.h>
#include <vlc/vout.h>
+#ifdef HAVE_ALTIVEC_H
+# include <altivec.h>
+#endif
+
#include "i420_yuy2.h"
#define SRC_FOURCC "I420,IYUV,YV12"
i_80w = 0x0000000080808080ULL;
#elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
set_description(
- _("Altivec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
+ _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
set_capability( "chroma", 100 );
add_requirement( ALTIVEC );
#endif
int i_x, i_y;
- const int i_source_margin = p_source->p->i_pitch
- - p_source->p->i_visible_pitch;
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch;
-
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
+#define VEC_NEXT_LINES( ) \
+ p_line1 = p_line2; \
+ p_line2 += p_dest->p->i_pitch; \
+ p_y1 = p_y2; \
+ p_y2 += p_source->p[Y_PLANE].i_pitch;
+
+#define VEC_LOAD_UV( ) \
+ u_vec = vec_ld( 0, p_u ); p_u += 16; \
+ v_vec = vec_ld( 0, p_v ); p_v += 16;
+
+#define VEC_MERGE( a ) \
+ uv_vec = a( u_vec, v_vec ); \
+ y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
+ vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
+ vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
+ y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
+ vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
+ vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
+
vector unsigned char u_vec;
vector unsigned char v_vec;
vector unsigned char uv_vec;
vector unsigned char y_vec;
- int high = 1;
+
+ if( !( ( p_vout->render.i_width % 32 ) |
+ ( p_vout->render.i_height % 2 ) ) )
+ {
+ /* Width is a multiple of 32, we take 2 lines at a time */
+ for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
+ {
+ VEC_NEXT_LINES( );
+ for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
+ {
+ VEC_LOAD_UV( );
+ VEC_MERGE( vec_mergeh );
+ VEC_MERGE( vec_mergel );
+ }
+ }
+ }
+ else if( !( ( p_vout->render.i_width % 16 ) |
+ ( p_vout->render.i_height % 4 ) ) )
+ {
+ /* Width is only a multiple of 16, we take 4 lines at a time */
+ for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
+ {
+ /* Line 1 and 2, pixels 0 to ( width - 16 ) */
+ VEC_NEXT_LINES( );
+ for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
+ {
+ VEC_LOAD_UV( );
+ VEC_MERGE( vec_mergeh );
+ VEC_MERGE( vec_mergel );
+ }
+
+ /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
+ VEC_LOAD_UV( );
+ VEC_MERGE( vec_mergeh );
+
+ /* Line 3 and 4, pixels 0 to 16 */
+ VEC_NEXT_LINES( );
+ VEC_MERGE( vec_mergel );
+
+ /* Line 3 and 4, pixels 16 to ( width ) */
+ for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
+ {
+ VEC_LOAD_UV( );
+ VEC_MERGE( vec_mergeh );
+ VEC_MERGE( vec_mergel );
+ }
+ }
+ }
+ else
+ {
+ /* Crap, use the C version */
+#undef VEC_NEXT_LINES
+#undef VEC_LOAD_UV
+#undef VEC_MERGE
#endif
+ const int i_source_margin = p_source->p[0].i_pitch
+ - p_source->p[0].i_visible_pitch;
+ const int i_source_margin_c = p_source->p[1].i_pitch
+ - p_source->p[1].i_visible_pitch;
+ const int i_dest_margin = p_dest->p->i_pitch
+ - p_dest->p->i_visible_pitch;
+
for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
{
p_line1 = p_line2;
p_y1 = p_y2;
p_y2 += p_source->p[Y_PLANE].i_pitch;
-#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
- /* FIXME Thats only works for sizes multiple of 16 */
- for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+#if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
+ for( i_x = p_vout->render.i_width / 2 ; i_x-- ; )
{
- if( high )
- {
- u_vec = vec_ld( 0, p_u ); p_u += 16;
- v_vec = vec_ld( 0, p_v ); p_v += 16;
- uv_vec = vec_mergeh( u_vec, v_vec );
- }
- else
- {
- uv_vec = vec_mergel( u_vec, v_vec );
- }
- y_vec = vec_ld( 0, p_y1 ); p_y1 += 16;
- vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16;
- vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16;
- y_vec = vec_ld( 0, p_y2 ); p_y2 += 16;
- vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
- vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
- high = !high;
+ C_YUV420_YUYV( );
}
#else
for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
{
-#if defined (MODULE_NAME_IS_i420_yuy2)
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
-#else
MMX_CALL( MMX_YUV420_YUYV );
-#endif
+ }
+ for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+ {
+ C_YUV420_YUYV( );
}
#endif
p_y1 += i_source_margin;
p_y2 += i_source_margin;
+ p_u += i_source_margin_c;
+ p_v += i_source_margin_c;
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
+
+#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
+ }
+#endif
}
/*****************************************************************************
int i_x, i_y;
- const int i_source_margin = p_source->p->i_pitch
- - p_source->p->i_visible_pitch;
+ const int i_source_margin = p_source->p[0].i_pitch
+ - p_source->p[0].i_visible_pitch;
+ const int i_source_margin_c = p_source->p[1].i_pitch
+ - p_source->p[1].i_visible_pitch;
const int i_dest_margin = p_dest->p->i_pitch
- p_dest->p->i_visible_pitch;
p_y1 += i_source_margin;
p_y2 += i_source_margin;
+ p_u += i_source_margin_c;
+ p_v += i_source_margin_c;
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
int i_x, i_y;
- const int i_source_margin = p_source->p->i_pitch
- - p_source->p->i_visible_pitch;
+ const int i_source_margin = p_source->p[0].i_pitch
+ - p_source->p[0].i_visible_pitch;
+ const int i_source_margin_c = p_source->p[1].i_pitch
+ - p_source->p[1].i_visible_pitch;
const int i_dest_margin = p_dest->p->i_pitch
- p_dest->p->i_visible_pitch;
p_y1 += i_source_margin;
p_y2 += i_source_margin;
+ p_u += i_source_margin_c;
+ p_v += i_source_margin_c;
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
picture_t *p_dest )
{
uint8_t *p_line1 = p_dest->p->p_pixels +
- p_dest->p->i_lines * p_dest->p->i_pitch
+ p_dest->p->i_visible_lines * p_dest->p->i_pitch
+ p_dest->p->i_pitch;
uint8_t *p_line2 = p_dest->p->p_pixels +
- p_dest->p->i_lines * p_dest->p->i_pitch;
+ p_dest->p->i_visible_lines * p_dest->p->i_pitch;
uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
uint8_t *p_u = p_source->U_PIXELS;
uint8_t *p_v = p_source->V_PIXELS;
int i_x, i_y;
- const int i_source_margin = p_source->p->i_pitch
- - p_source->p->i_visible_pitch;
+ const int i_source_margin = p_source->p[0].i_pitch
+ - p_source->p[0].i_visible_pitch;
+ const int i_source_margin_c = p_source->p[1].i_pitch
+ - p_source->p[1].i_visible_pitch;
const int i_dest_margin = p_dest->p->i_pitch
- p_dest->p->i_visible_pitch;
p_y1 += i_source_margin;
p_y2 += i_source_margin;
+ p_u += i_source_margin_c;
+ p_v += i_source_margin_c;
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
int i_x, i_y;
- const int i_source_margin = p_source->p->i_pitch
- - p_source->p->i_visible_pitch;
+ const int i_source_margin = p_source->p[0].i_pitch
+ - p_source->p[0].i_visible_pitch;
+ const int i_source_margin_c = p_source->p[1].i_pitch
+ - p_source->p[1].i_visible_pitch;
const int i_dest_margin = p_dest->p->i_pitch
- p_dest->p->i_visible_pitch;
p_y1 += i_source_margin;
p_y2 += i_source_margin;
+ p_u += i_source_margin_c;
+ p_v += i_source_margin_c;
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}