]> git.sesse.net Git - vlc/blobdiff - modules/video_chroma/i420_yuy2.c
* modules/video_output/x11/xcommon.c: 24 bits screen depth uses 32bits per pixels.
[vlc] / modules / video_chroma / i420_yuy2.c
index 9aa3acb5cdfee7183562061cba2fb57a3690be2b..d137b83dc0099a9e7f5dd0665ace67dae5cd6ee9 100644 (file)
@@ -2,7 +2,7 @@
  * i420_yuy2.c : YUV to YUV conversion module for vlc
  *****************************************************************************
  * Copyright (C) 2000, 2001 VideoLAN
- * $Id: i420_yuy2.c,v 1.1 2002/08/04 17:23:43 sam Exp $
+ * $Id$
  *
  * Authors: Samuel Hocevar <sam@zoy.org>
  *
 /*****************************************************************************
  * Preamble
  *****************************************************************************/
-#include <errno.h>                                                 /* ENOMEM */
 #include <string.h>                                            /* strerror() */
 #include <stdlib.h>                                      /* malloc(), free() */
 
 #include <vlc/vlc.h>
 #include <vlc/vout.h>
 
+#ifdef HAVE_ALTIVEC_H
+#   include <altivec.h>
+#endif
+
 #include "i420_yuy2.h"
 
 #define SRC_FOURCC  "I420,IYUV,YV12"
 
 #if defined (MODULE_NAME_IS_i420_yuy2)
 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
-#else
+#elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
+#elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
+#    define DEST_FOURCC "YUY2,YUNV"
 #endif
 
 /*****************************************************************************
 static int  Activate ( vlc_object_t * );
 
 static void I420_YUY2           ( vout_thread_t *, picture_t *, picture_t * );
+#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
 static void I420_YVYU           ( vout_thread_t *, picture_t *, picture_t * );
 static void I420_UYVY           ( vout_thread_t *, picture_t *, picture_t * );
 static void I420_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
 static void I420_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
+#endif
 #if defined (MODULE_NAME_IS_i420_yuy2)
 static void I420_Y211           ( vout_thread_t *, picture_t *, picture_t * );
 #endif
 
 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
-static unsigned long long i_00ffw;
-static unsigned long long i_80w;
+static uint64_t i_00ffw;
+static uint64_t i_80w;
 #endif
 
 /*****************************************************************************
@@ -65,15 +72,20 @@ static unsigned long long i_80w;
  *****************************************************************************/
 vlc_module_begin();
 #if defined (MODULE_NAME_IS_i420_yuy2)
-    set_description( _("conversions from " SRC_FOURCC " to " DEST_FOURCC) );
+    set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
     set_capability( "chroma", 80 );
 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
     set_capability( "chroma", 100 );
     add_requirement( MMX );
     /* Initialize MMX-specific constants */
-    i_00ffw = 0x00ff00ff00ff00ff;
-    i_80w   = 0x0000000080808080;
+    i_00ffw = 0x00ff00ff00ff00ffULL;
+    i_80w   = 0x0000000080808080ULL;
+#elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
+    set_description(
+            _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
+    set_capability( "chroma", 100 );
+    add_requirement( ALTIVEC );
 #endif
     set_callbacks( Activate, NULL );
 vlc_module_end();
@@ -104,6 +116,7 @@ static int Activate( vlc_object_t *p_this )
                     p_vout->chroma.pf_convert = I420_YUY2;
                     break;
 
+#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
                 case VLC_FOURCC('Y','V','Y','U'):
                     p_vout->chroma.pf_convert = I420_YVYU;
                     break;
@@ -121,6 +134,7 @@ static int Activate( vlc_object_t *p_this )
                 case VLC_FOURCC('c','y','u','v'):
                     p_vout->chroma.pf_convert = I420_cyuv;
                     break;
+#endif
 
 #if defined (MODULE_NAME_IS_i420_yuy2)
                 case VLC_FOURCC('Y','2','1','1'):
@@ -148,13 +162,93 @@ static int Activate( vlc_object_t *p_this )
 static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    u8 *p_line1, *p_line2 = p_dest->p->p_pixels;
-    u8 *p_y1, *p_y2 = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
+    uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;
 
+#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
+#define VEC_NEXT_LINES( ) \
+    p_line1  = p_line2; \
+    p_line2 += p_dest->p->i_pitch; \
+    p_y1     = p_y2; \
+    p_y2    += p_source->p[Y_PLANE].i_pitch;
+
+#define VEC_LOAD_UV( ) \
+    u_vec = vec_ld( 0, p_u ); p_u += 16; \
+    v_vec = vec_ld( 0, p_v ); p_v += 16;
+
+#define VEC_MERGE( a ) \
+    uv_vec = a( u_vec, v_vec ); \
+    y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
+    vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
+    vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
+    y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
+    vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
+    vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
+
+    vector unsigned char u_vec;
+    vector unsigned char v_vec;
+    vector unsigned char uv_vec;
+    vector unsigned char y_vec;
+
+    if( !( ( p_vout->render.i_width % 32 ) |
+           ( p_vout->render.i_height % 2 ) ) )
+    {
+        /* Width is a multiple of 32, we take 2 lines at a time */
+        for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
+        {
+            VEC_NEXT_LINES( );
+            for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
+            {
+                VEC_LOAD_UV( );
+                VEC_MERGE( vec_mergeh );
+                VEC_MERGE( vec_mergel );
+            }
+        }
+    }
+    else if( !( ( p_vout->render.i_width % 16 ) |
+                ( p_vout->render.i_height % 4 ) ) )
+    {
+        /* Width is only a multiple of 16, we take 4 lines at a time */
+        for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
+        {
+            /* Line 1 and 2, pixels 0 to ( width - 16 ) */
+            VEC_NEXT_LINES( );
+            for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
+            {
+                VEC_LOAD_UV( );
+                VEC_MERGE( vec_mergeh );
+                VEC_MERGE( vec_mergel );
+            }
+
+            /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
+            VEC_LOAD_UV( );
+            VEC_MERGE( vec_mergeh );
+
+            /* Line 3 and 4, pixels 0 to 16 */
+            VEC_NEXT_LINES( );
+            VEC_MERGE( vec_mergel );
+
+            /* Line 3 and 4, pixels 16 to ( width ) */
+            for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
+            {
+                VEC_LOAD_UV( );
+                VEC_MERGE( vec_mergeh );
+                VEC_MERGE( vec_mergel );
+            }
+        }
+    }
+    else
+    {
+        /* Crap, use the C version */
+#undef VEC_NEXT_LINES
+#undef VEC_LOAD_UV
+#undef VEC_MERGE
+#endif
+
     const int i_source_margin = p_source->p->i_pitch
                                  - p_source->p->i_visible_pitch;
     const int i_dest_margin = p_dest->p->i_pitch
@@ -168,35 +262,44 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
         p_y1 = p_y2;
         p_y2 += p_source->p[Y_PLANE].i_pitch;
 
-        for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
+#if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
+        for( i_x = p_vout->render.i_width / 2 ; i_x-- ; )
         {
-#if defined (MODULE_NAME_IS_i420_yuy2)
-            C_YUV420_YUYV( );
-            C_YUV420_YUYV( );
-            C_YUV420_YUYV( );
             C_YUV420_YUYV( );
+        }
 #else
+        for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
+        {
             MMX_CALL( MMX_YUV420_YUYV );
-#endif
         }
+        for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+        {
+            C_YUV420_YUYV( );
+        }
+#endif
 
         p_y1 += i_source_margin;
         p_y2 += i_source_margin;
         p_line1 += i_dest_margin;
         p_line2 += i_dest_margin;
     }
+
+#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
+    }
+#endif
 }
 
 /*****************************************************************************
  * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
  *****************************************************************************/
+#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
 static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    u8 *p_line1, *p_line2 = p_dest->p->p_pixels;
-    u8 *p_y1, *p_y2 = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
+    uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;
 
@@ -238,10 +341,10 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
 static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    u8 *p_line1, *p_line2 = p_dest->p->p_pixels;
-    u8 *p_y1, *p_y2 = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
+    uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;
 
@@ -293,12 +396,14 @@ static void I420_IUYV( vout_thread_t *p_vout, picture_t *p_source,
 static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    u8 *p_line1 = p_dest->p->p_pixels + p_dest->p->i_lines * p_dest->p->i_pitch
-                                      + p_dest->p->i_pitch;
-    u8 *p_line2 = p_dest->p->p_pixels + p_dest->p->i_lines * p_dest->p->i_pitch;
-    u8 *p_y1, *p_y2 = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line1 = p_dest->p->p_pixels +
+                       p_dest->p->i_visible_lines * p_dest->p->i_pitch
+                       + p_dest->p->i_pitch;
+    uint8_t *p_line2 = p_dest->p->p_pixels +
+                       p_dest->p->i_visible_lines * p_dest->p->i_pitch;
+    uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;
 
@@ -333,6 +438,7 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
         p_line2 += i_dest_margin;
     }
 }
+#endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
 
 /*****************************************************************************
  * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
@@ -341,10 +447,10 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
 static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    u8 *p_line1, *p_line2 = p_dest->p->p_pixels;
-    u8 *p_y1, *p_y2 = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
+    uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;