]> git.sesse.net Git - vlc/blobdiff - modules/video_chroma/i422_yuy2.c
Removes trailing spaces. Removes tabs.
[vlc] / modules / video_chroma / i422_yuy2.c
index 35e032889432ca44a81fbda172d0885c95032aa0..ec1a90305da92b31fd5e811cb81131f2d05dc0fd 100644 (file)
@@ -5,12 +5,13 @@
  * $Id$
  *
  * Authors: Samuel Hocevar <sam@zoy.org>
+ *          Damien Fouilleul <damienf@videolan.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 /*****************************************************************************
  * Preamble
  *****************************************************************************/
-#include <string.h>                                            /* strerror() */
-#include <stdlib.h>                                      /* malloc(), free() */
 
 #include <vlc/vlc.h>
-#include <vlc/vout.h>
+#include <vlc_vout.h>
 
 #include "i422_yuy2.h"
 
@@ -66,6 +65,10 @@ vlc_module_begin();
     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
     set_capability( "chroma", 100 );
     add_requirement( MMX );
+#elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
+    set_description( _("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) );
+    set_capability( "chroma", 120 );
+    add_requirement( SSE2 );
 #endif
     set_callbacks( Activate, NULL );
 vlc_module_end();
@@ -130,8 +133,7 @@ static int Activate( vlc_object_t *p_this )
         default:
             return -1;
     }
-    
-    return 0; 
+    return 0;
 }
 
 /* Following functions are local */
@@ -142,17 +144,66 @@ static int Activate( vlc_object_t *p_this )
 static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    uint8_t *p_pixels = p_dest->p->p_pixels;
-    int      i_pitch  = p_dest->p->i_pitch;
+    uint8_t *p_line = p_dest->p->p_pixels;
     uint8_t *p_y = p_source->Y_PIXELS;
     uint8_t *p_u = p_source->U_PIXELS;
     uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;
 
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YUYV_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YUYV( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YUYV_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YUYV( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
     for( i_y = p_vout->render.i_height ; i_y-- ; )
     {
-       uint8_t *p_line = p_pixels;
         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
         {
 #if defined (MODULE_NAME_IS_i422_yuy2)
@@ -160,15 +211,24 @@ static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
             C_YUV422_YUYV( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".p2align 3" MMX_YUV422_YUYV
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 16; p_y += 8; p_u += 4; p_v += 4;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_YUYV );
 #endif
         }
-       p_pixels += i_pitch;
+        for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+        {
+            C_YUV422_YUYV( p_line, p_y, p_u, p_v );
+        }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
     }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#endif
+
+#endif
 }
 
 /*****************************************************************************
@@ -177,17 +237,66 @@ static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
 static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    uint8_t *p_pixels = p_dest->p->p_pixels;
-    int      i_pitch  = p_dest->p->i_pitch;
+    uint8_t *p_line = p_dest->p->p_pixels;
     uint8_t *p_y = p_source->Y_PIXELS;
     uint8_t *p_u = p_source->U_PIXELS;
     uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;
 
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YVYU_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YVYU( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YVYU_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YVYU( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
     for( i_y = p_vout->render.i_height ; i_y-- ; )
     {
-       uint8_t *p_line = p_pixels;
         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
         {
 #if defined (MODULE_NAME_IS_i422_yuy2)
@@ -195,15 +304,24 @@ static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
             C_YUV422_YVYU( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".p2align 3" MMX_YUV422_YVYU
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 16; p_y += 8; p_u += 4; p_v += 4;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_YVYU );
 #endif
         }
-       p_pixels += i_pitch;
+        for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+        {
+            C_YUV422_YVYU( p_line, p_y, p_u, p_v );
+        }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
     }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#endif
+
+#endif
 }
 
 /*****************************************************************************
@@ -212,17 +330,66 @@ static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
 static void I422_UYVY( vout_thread_t *p_vout, picture_t *p_source,
                                               picture_t *p_dest )
 {
-    uint8_t *p_pixels = p_dest->p->p_pixels;
-    int      i_pitch  = p_dest->p->i_pitch;
+    uint8_t *p_line = p_dest->p->p_pixels;
     uint8_t *p_y = p_source->Y_PIXELS;
     uint8_t *p_u = p_source->U_PIXELS;
     uint8_t *p_v = p_source->V_PIXELS;
 
     int i_x, i_y;
 
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
     for( i_y = p_vout->render.i_height ; i_y-- ; )
     {
-       uint8_t *p_line = p_pixels;
         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
         {
 #if defined (MODULE_NAME_IS_i422_yuy2)
@@ -230,15 +397,24 @@ static void I422_UYVY( vout_thread_t *p_vout, picture_t *p_source,
             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".p2align 3" MMX_YUV422_UYVY
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 16; p_y += 8; p_u += 4; p_v += 4;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_UYVY );
 #endif
         }
-       p_pixels += i_pitch;
+        for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+        {
+            C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+        }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
     }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#endif
+
+#endif
 }
 
 /*****************************************************************************
@@ -264,6 +440,61 @@ static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
 
     int i_x, i_y;
 
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            p_line -= 2 * p_dest->p->i_pitch;
+
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            p_line -= 2 * p_dest->p->i_pitch;
+
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
     for( i_y = p_vout->render.i_height ; i_y-- ; )
     {
         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
@@ -275,14 +506,22 @@ static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
             C_YUV422_UYVY( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".p2align 3" MMX_YUV422_UYVY
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 16; p_y += 8; p_u += 4; p_v += 4;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_UYVY );
 #endif
         }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
     }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
+    SSE2_END;
+#endif
+
+#endif
 }
 
 /*****************************************************************************