Removes trailing spaces. Removes tabs.

[vlc] / modules / video_chroma / i422_yuy2.c
diff --git a/modules/video_chroma/i422_yuy2.c b/modules/video_chroma/i422_yuy2.c

index 2b50b840a359bc4e0464661fc7ca6162902ed3c1..ec1a90305da92b31fd5e811cb81131f2d05dc0fd 100644 (file)
--- a/modules/video_chroma/i422_yuy2.c
+++ b/modules/video_chroma/i422_yuy2.c
@@ -1,16 +1,17 @@
  /*****************************************************************************
   * i422_yuy2.c : YUV to YUV conversion module for vlc
   *****************************************************************************
- * Copyright (C) 2000, 2001 VideoLAN
- * $Id: i422_yuy2.c,v 1.1 2002/08/04 17:23:43 sam Exp $
+ * Copyright (C) 2000, 2001 the VideoLAN team
+ * $Id$
   *
   * Authors: Samuel Hocevar <sam@zoy.org>
+ *          Damien Fouilleul <damienf@videolan.org>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 2 of the License, or
   * (at your option) any later version.
- * 
+ *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -18,18 +19,15 @@
   *
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
   *****************************************************************************/
  
  /*****************************************************************************
   * Preamble
   *****************************************************************************/
-#include <errno.h>                                                 /* ENOMEM */
-#include <string.h>                                            /* strerror() */
-#include <stdlib.h>                                      /* malloc(), free() */
  
  #include <vlc/vlc.h>
-#include <vlc/vout.h>
+#include <vlc_vout.h>
  
  #include "i422_yuy2.h"
  
@@ -52,6 +50,8 @@ static void I422_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
  static void I422_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
  #if defined (MODULE_NAME_IS_i422_yuy2)
  static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
+static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
+static void I422_YV12           ( vout_thread_t *, picture_t *, picture_t * );
  #endif
  
  /*****************************************************************************
@@ -59,12 +59,16 @@ static void I422_Y211           ( vout_thread_t *, picture_t *, picture_t * );
   *****************************************************************************/
  vlc_module_begin();
  #if defined (MODULE_NAME_IS_i422_yuy2)
-    set_description( _("conversions from " SRC_FOURCC " to " DEST_FOURCC) );
+    set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
      set_capability( "chroma", 80 );
  #elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
      set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
      set_capability( "chroma", 100 );
      add_requirement( MMX );
+#elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
+    set_description( _("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) );
+    set_capability( "chroma", 120 );
+    add_requirement( SSE2 );
  #endif
      set_callbacks( Activate, NULL );
  vlc_module_end();
@@ -115,6 +119,10 @@ static int Activate( vlc_object_t *p_this )
                  case VLC_FOURCC('Y','2','1','1'):
                      p_vout->chroma.pf_convert = I422_Y211;
                      break;
+
+                case VLC_FOURCC('Y','V','1','2'):
+                    p_vout->chroma.pf_convert = I422_YV12;
+                    break;
  #endif
  
                  default:
@@ -125,8 +133,7 @@ static int Activate( vlc_object_t *p_this )
          default:
              return -1;
      }
-    
-    return 0; 
+    return 0;
  }
  
  /* Following functions are local */
@@ -137,13 +144,64 @@ static int Activate( vlc_object_t *p_this )
  static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
                                                picture_t *p_dest )
  {
-    u8 *p_line = p_dest->p->p_pixels;
-    u8 *p_y = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line = p_dest->p->p_pixels;
+    uint8_t *p_y = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
  
      int i_x, i_y;
  
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YUYV_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YUYV( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YUYV_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YUYV( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
      for( i_y = p_vout->render.i_height ; i_y-- ; )
      {
          for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
@@ -153,19 +211,24 @@ static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
              C_YUV422_YUYV( p_line, p_y, p_u, p_v );
              C_YUV422_YUYV( p_line, p_y, p_u, p_v );
              C_YUV422_YUYV( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".align 8" MMX_YUV422_YUYV
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
-
-            __asm__( ".align 8" MMX_YUV422_YUYV
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_YUYV );
  #endif
          }
+        for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+        {
+            C_YUV422_YUYV( p_line, p_y, p_u, p_v );
+        }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
      }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#endif
+
+#endif
  }
  
  /*****************************************************************************
@@ -174,13 +237,64 @@ static void I422_YUY2( vout_thread_t *p_vout, picture_t *p_source,
  static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
                                                picture_t *p_dest )
  {
-    u8 *p_line = p_dest->p->p_pixels;
-    u8 *p_y = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line = p_dest->p->p_pixels;
+    uint8_t *p_y = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
  
      int i_x, i_y;
  
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YVYU_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YVYU( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_YVYU_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_YVYU( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
      for( i_y = p_vout->render.i_height ; i_y-- ; )
      {
          for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
@@ -190,19 +304,24 @@ static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
              C_YUV422_YVYU( p_line, p_y, p_u, p_v );
              C_YUV422_YVYU( p_line, p_y, p_u, p_v );
              C_YUV422_YVYU( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".align 8" MMX_YUV422_YVYU
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
-
-            __asm__( ".align 8" MMX_YUV422_YVYU
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_YVYU );
  #endif
          }
+        for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+        {
+            C_YUV422_YVYU( p_line, p_y, p_u, p_v );
+        }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
      }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#endif
+
+#endif
  }
  
  /*****************************************************************************
@@ -211,13 +330,64 @@ static void I422_YVYU( vout_thread_t *p_vout, picture_t *p_source,
  static void I422_UYVY( vout_thread_t *p_vout, picture_t *p_source,
                                                picture_t *p_dest )
  {
-    u8 *p_line = p_dest->p->p_pixels;
-    u8 *p_y = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line = p_dest->p->p_pixels;
+    uint8_t *p_y = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
  
      int i_x, i_y;
  
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
      for( i_y = p_vout->render.i_height ; i_y-- ; )
      {
          for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
@@ -227,19 +397,24 @@ static void I422_UYVY( vout_thread_t *p_vout, picture_t *p_source,
              C_YUV422_UYVY( p_line, p_y, p_u, p_v );
              C_YUV422_UYVY( p_line, p_y, p_u, p_v );
              C_YUV422_UYVY( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".align 8" MMX_YUV422_UYVY
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
-
-            __asm__( ".align 8" MMX_YUV422_UYVY
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_UYVY );
  #endif
          }
+        for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+        {
+            C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+        }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
      }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#endif
+
+#endif
  }
  
  /*****************************************************************************
@@ -258,13 +433,68 @@ static void I422_IUYV( vout_thread_t *p_vout, picture_t *p_source,
  static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
                                                picture_t *p_dest )
  {
-    u8 *p_line = p_dest->p->p_pixels + p_dest->p->i_lines * p_dest->p->i_pitch;
-    u8 *p_y = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
+    uint8_t *p_y = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
  
      int i_x, i_y;
  
+    const int i_source_margin = p_source->p[0].i_pitch
+                                 - p_source->p[0].i_visible_pitch;
+    const int i_source_margin_c = p_source->p[1].i_pitch
+                                 - p_source->p[1].i_visible_pitch;
+    const int i_dest_margin = p_dest->p->i_pitch
+                               - p_dest->p->i_visible_pitch;
+
+#if defined (MODULE_NAME_IS_i422_yuy2_sse2)
+
+    if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
+        ((intptr_t)p_line|(intptr_t)p_y))) )
+    {
+        /* use faster SSE2 aligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            p_line -= 2 * p_dest->p->i_pitch;
+
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    else {
+        /* use slower SSE2 unaligned fetch and store */
+        for( i_y = p_vout->render.i_height ; i_y-- ; )
+        {
+            p_line -= 2 * p_dest->p->i_pitch;
+
+            for( i_x = p_vout->render.i_width / 16 ; i_x-- ; )
+            {
+                SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
+            }
+            for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; )
+            {
+                C_YUV422_UYVY( p_line, p_y, p_u, p_v );
+            }
+            p_y += i_source_margin;
+            p_u += i_source_margin_c;
+            p_v += i_source_margin_c;
+            p_line += i_dest_margin;
+        }
+    }
+    SSE2_END;
+
+#else
+
      for( i_y = p_vout->render.i_height ; i_y-- ; )
      {
          for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
@@ -276,19 +506,22 @@ static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
              C_YUV422_UYVY( p_line, p_y, p_u, p_v );
              C_YUV422_UYVY( p_line, p_y, p_u, p_v );
              C_YUV422_UYVY( p_line, p_y, p_u, p_v );
-#else
-            __asm__( ".align 8" MMX_YUV422_UYVY
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
-
-            __asm__( ".align 8" MMX_YUV422_UYVY
-                     : : "r" (p_line), "r" (p_y), "r" (p_u), "r" (p_v) ); 
-
-            p_line += 8; p_y += 4; p_u += 2; p_v += 2;
+#elif defined (MODULE_NAME_IS_i422_yuy2_mmx)
+            MMX_CALL( MMX_YUV422_UYVY );
  #endif
          }
+        p_y += i_source_margin;
+        p_u += i_source_margin_c;
+        p_v += i_source_margin_c;
+        p_line += i_dest_margin;
      }
+#if defined (MODULE_NAME_IS_i422_yuy2_mmx)
+    MMX_END;
+#elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
+    SSE2_END;
+#endif
+
+#endif
  }
  
  /*****************************************************************************
@@ -298,10 +531,10 @@ static void I422_cyuv( vout_thread_t *p_vout, picture_t *p_source,
  static void I422_Y211( vout_thread_t *p_vout, picture_t *p_source,
                                                picture_t *p_dest )
  {
-    u8 *p_line = p_dest->p->p_pixels + p_dest->p->i_lines * p_dest->p->i_pitch;
-    u8 *p_y = p_source->Y_PIXELS;
-    u8 *p_u = p_source->U_PIXELS;
-    u8 *p_v = p_source->V_PIXELS;
+    uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
+    uint8_t *p_y = p_source->Y_PIXELS;
+    uint8_t *p_u = p_source->U_PIXELS;
+    uint8_t *p_v = p_source->V_PIXELS;
  
      int i_x, i_y;
  
@@ -316,3 +549,35 @@ static void I422_Y211( vout_thread_t *p_vout, picture_t *p_source,
  }
  #endif
  
+
+/*****************************************************************************
+ * I422_YV12: planar YUV 4:2:2 to planar YV12
+ *****************************************************************************/
+#if defined (MODULE_NAME_IS_i422_yuy2)
+static void I422_YV12( vout_thread_t *p_vout, picture_t *p_source,
+                                              picture_t *p_dest )
+{
+    uint16_t i_dpy = p_dest->p[Y_PLANE].i_pitch;
+    uint16_t i_spy = p_source->p[Y_PLANE].i_pitch;
+    uint16_t i_dpuv = p_dest->p[U_PLANE].i_pitch;
+    uint16_t i_spuv = p_source->p[U_PLANE].i_pitch;
+    uint16_t i_width = p_vout->render.i_width;
+    uint16_t i_y = p_vout->render.i_height;
+    uint8_t *p_dy = p_dest->Y_PIXELS + (i_y-1)*i_dpy;
+    uint8_t *p_y = p_source->Y_PIXELS + (i_y-1)*i_spy;
+    uint8_t *p_du = p_dest->U_PIXELS + (i_y/2-1)*i_dpuv;
+    uint8_t *p_u = p_source->U_PIXELS + (i_y-1)*i_spuv;
+    uint8_t *p_dv = p_dest->V_PIXELS + (i_y/2-1)*i_dpuv;
+    uint8_t *p_v = p_source->V_PIXELS + (i_y-1)*i_spuv;
+    i_y /= 2;
+
+    for ( ; i_y--; )
+    {
+        memcpy(p_dy, p_y, i_width); p_dy -= i_dpy; p_y -= i_spy;
+        memcpy(p_dy, p_y, i_width); p_dy -= i_dpy; p_y -= i_spy;
+        memcpy(p_du, p_u, i_width/2); p_du -= i_dpuv; p_u -= 2*i_spuv;
+        memcpy(p_dv, p_v, i_width/2); p_dv -= i_dpuv; p_v -= 2*i_spuv;
+    }
+
+}
+#endif