]> git.sesse.net Git - vlc/blobdiff - modules/video_filter/deinterlace.c
Use <vlc_cpu.h>
[vlc] / modules / video_filter / deinterlace.c
index 38626674a587e0cf185e91fdb21260afd3ae1f8f..de49514574390a09fd2b30f1e53f8f548c381776 100644 (file)
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deinterlace.c : deinterlacer plugin for vlc
  *****************************************************************************
- * Copyright (C) 2000, 2001, 2002, 2003 the VideoLAN team
+ * Copyright (C) 2000-2009 the VideoLAN team
  * $Id$
  *
  * Author: Sam Hocevar <sam@zoy.org>
@@ -40,6 +40,7 @@
 #include <vlc_vout.h>
 #include <vlc_sout.h>
 #include <vlc_filter.h>
+#include <vlc_cpu.h>
 
 #ifdef CAN_COMPILE_MMXEXT
 #   include "mmx.h"
@@ -93,8 +94,11 @@ static void EndMMX       ( void );
 #if defined(CAN_COMPILE_3DNOW)
 static void End3DNow     ( void );
 #endif
+#if defined __ARM_NEON__
+static void MergeNEON (void *, const void *, const void *, size_t);
+#endif
 
-static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method );
+static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method );
 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
 
 static int OpenFilter( vlc_object_t *p_this );
@@ -191,10 +195,11 @@ static int Control( vout_thread_t *p_vout, int i_query, va_list args )
 static int Create( vlc_object_t *p_this )
 {
     vout_thread_t *p_vout = (vout_thread_t *)p_this;
-    vlc_value_t val;
+    vout_sys_t *p_sys;
+    char *psz_mode;
 
     /* Allocate structure */
-    p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
+    p_sys = p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
     if( p_vout->p_sys == NULL )
         return VLC_ENOMEM;
 
@@ -205,67 +210,72 @@ static int Create( vlc_object_t *p_this )
     p_vout->pf_display = NULL;
     p_vout->pf_control = Control;
 
-    p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
-    p_vout->p_sys->b_double_rate = false;
-    p_vout->p_sys->b_half_height = true;
-    p_vout->p_sys->last_date = 0;
-    p_vout->p_sys->p_vout = 0;
-    vlc_mutex_init( &p_vout->p_sys->filter_lock );
+    p_sys->i_mode = DEINTERLACE_DISCARD;
+    p_sys->b_double_rate = false;
+    p_sys->b_half_height = true;
+    p_sys->last_date = 0;
+    p_sys->p_vout = 0;
+    vlc_mutex_init( &p_sys->filter_lock );
 
 #if defined(CAN_COMPILE_C_ALTIVEC)
     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
     {
-        p_vout->p_sys->pf_merge = MergeAltivec;
-        p_vout->p_sys->pf_end_merge = NULL;
+        p_sys->pf_merge = MergeAltivec;
+        p_sys->pf_end_merge = NULL;
     }
     else
 #endif
 #if defined(CAN_COMPILE_SSE)
     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
     {
-        p_vout->p_sys->pf_merge = MergeSSE2;
-        p_vout->p_sys->pf_end_merge = EndMMX;
+        p_sys->pf_merge = MergeSSE2;
+        p_sys->pf_end_merge = EndMMX;
     }
     else
 #endif
 #if defined(CAN_COMPILE_MMXEXT)
     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
     {
-        p_vout->p_sys->pf_merge = MergeMMXEXT;
-        p_vout->p_sys->pf_end_merge = EndMMX;
+        p_sys->pf_merge = MergeMMXEXT;
+        p_sys->pf_end_merge = EndMMX;
     }
     else
 #endif
 #if defined(CAN_COMPILE_3DNOW)
     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
     {
-        p_vout->p_sys->pf_merge = Merge3DNow;
-        p_vout->p_sys->pf_end_merge = End3DNow;
+        p_sys->pf_merge = Merge3DNow;
+        p_sys->pf_end_merge = End3DNow;
     }
     else
 #endif
+#if defined __ARM_NEON__
+    if( vlc_CPU() & CPU_CAPABILITY_NEON )
     {
-        p_vout->p_sys->pf_merge = MergeGeneric;
-        p_vout->p_sys->pf_end_merge = NULL;
+        p_sys->pf_merge = MergeNEON;
+        p_sys->pf_end_merge = NULL;
+    }
+    else
+#endif
+    {
+        p_sys->pf_merge = MergeGeneric;
+        p_sys->pf_end_merge = NULL;
     }
 
     /* Look what method was requested */
-    var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING );
-    var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL );
+    psz_mode = var_CreateGetString( p_vout, "deinterlace-mode" );
 
-    if( val.psz_string == NULL )
+    if( !psz_mode )
     {
         msg_Err( p_vout, "configuration variable deinterlace-mode empty" );
         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
 
-        val.psz_string = strdup( "discard" );
+        psz_mode = strdup( "discard" );
     }
 
-    msg_Dbg( p_vout, "using %s deinterlace mode", val.psz_string );
-
-    SetFilterMethod( p_vout, val.psz_string );
+    SetFilterMethod( p_vout, psz_mode );
 
-    free( val.psz_string );
+    free( psz_mode );
 
     return VLC_SUCCESS;
 }
@@ -273,40 +283,41 @@ static int Create( vlc_object_t *p_this )
 /*****************************************************************************
  * SetFilterMethod: setup the deinterlace method to use.
  *****************************************************************************/
-static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
+static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method )
 {
+    vout_sys_t *p_sys = p_vout->p_sys;
     if( !strcmp( psz_method, "mean" ) )
     {
-        p_vout->p_sys->i_mode = DEINTERLACE_MEAN;
-        p_vout->p_sys->b_double_rate = false;
-        p_vout->p_sys->b_half_height = true;
+        p_sys->i_mode = DEINTERLACE_MEAN;
+        p_sys->b_double_rate = false;
+        p_sys->b_half_height = true;
     }
     else if( !strcmp( psz_method, "blend" )
              || !strcmp( psz_method, "average" )
              || !strcmp( psz_method, "combine-fields" ) )
     {
-        p_vout->p_sys->i_mode = DEINTERLACE_BLEND;
-        p_vout->p_sys->b_double_rate = false;
-        p_vout->p_sys->b_half_height = false;
+        p_sys->i_mode = DEINTERLACE_BLEND;
+        p_sys->b_double_rate = false;
+        p_sys->b_half_height = false;
     }
     else if( !strcmp( psz_method, "bob" )
              || !strcmp( psz_method, "progressive-scan" ) )
     {
-        p_vout->p_sys->i_mode = DEINTERLACE_BOB;
-        p_vout->p_sys->b_double_rate = true;
-        p_vout->p_sys->b_half_height = false;
+        p_sys->i_mode = DEINTERLACE_BOB;
+        p_sys->b_double_rate = true;
+        p_sys->b_half_height = false;
     }
     else if( !strcmp( psz_method, "linear" ) )
     {
-        p_vout->p_sys->i_mode = DEINTERLACE_LINEAR;
-        p_vout->p_sys->b_double_rate = true;
-        p_vout->p_sys->b_half_height = false;
+        p_sys->i_mode = DEINTERLACE_LINEAR;
+        p_sys->b_double_rate = true;
+        p_sys->b_half_height = false;
     }
     else if( !strcmp( psz_method, "x" ) )
     {
-        p_vout->p_sys->i_mode = DEINTERLACE_X;
-        p_vout->p_sys->b_double_rate = false;
-        p_vout->p_sys->b_half_height = false;
+        p_sys->i_mode = DEINTERLACE_X;
+        p_sys->b_double_rate = false;
+        p_sys->b_half_height = false;
     }
     else
     {
@@ -315,9 +326,9 @@ static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
             msg_Err( p_vout, "no valid deinterlace mode provided, "
                      "using \"discard\"" );
 
-        p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
-        p_vout->p_sys->b_double_rate = false;
-        p_vout->p_sys->b_half_height = !b_i422;
+        p_sys->i_mode = DEINTERLACE_DISCARD;
+        p_sys->b_double_rate = false;
+        p_sys->b_half_height = !b_i422;
     }
 
     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
@@ -1060,7 +1071,7 @@ static void MergeAltivec( void *_p_dest, const void *_p_s1,
     uint8_t *p_end  = p_dest + i_bytes - 15;
 
     /* Use C until the first 16-bytes aligned destination pixel */
-    while( (int)p_dest & 0xF )
+    while( (uintptr_t)p_dest & 0xF )
     {
         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
     }
@@ -1120,6 +1131,64 @@ static void MergeAltivec( void *_p_dest, const void *_p_s1,
 }
 #endif
 
+#ifdef __ARM_NEON__
+static void MergeNEON (void *restrict out, const void *in1,
+                       const void *in2, size_t n)
+{
+    uint8_t *outp = out;
+    const uint8_t *in1p = in1;
+    const uint8_t *in2p = in2;
+    size_t mis = ((uintptr_t)outp) & 15;
+
+    if (mis)
+    {
+        MergeGeneric (outp, in1p, in2p, mis);
+        outp += mis;
+        in1p += mis;
+        in2p += mis;
+        n -= mis;
+    }
+
+    uint8_t *end = outp + (n & ~15);
+
+    if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
+        while (outp < end)
+            asm volatile (
+                "vld1.u8  {q0-q1}, [%[in1]]!\n"
+                "vld1.u8  {q2-q3}, [%[in2]]!\n"
+                "vhadd.u8 q4, q0, q2\n"
+                "vld1.u8  {q6-q7}, [%[in1]]!\n"
+                "vhadd.u8 q5, q1, q3\n"
+                "vld1.u8  {q8-q9}, [%[in2]]!\n"
+                "vhadd.u8 q10, q6, q8\n"
+                "vhadd.u8 q11, q7, q9\n"
+                "vst1.u8  {q4-q5}, [%[out],:128]!\n"
+                "vst1.u8  {q10-q11}, [%[out],:128]!\n"
+                : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
+                :
+                : "q0", "q1", "q2", "memory");
+    else
+         while (outp < end)
+            asm volatile (
+                "vld1.u8  {q0-q1}, [%[in1],:128]!\n"
+                "vld1.u8  {q2-q3}, [%[in2],:128]!\n"
+                "vhadd.u8 q4, q0, q2\n"
+                "vld1.u8  {q6-q7}, [%[in1],:128]!\n"
+                "vhadd.u8 q5, q1, q3\n"
+                "vld1.u8  {q8-q9}, [%[in2],:128]!\n"
+                "vhadd.u8 q10, q6, q8\n"
+                "vhadd.u8 q11, q7, q9\n"
+                "vst1.u8  {q4-q5}, [%[out],:128]!\n"
+                "vst1.u8  {q10-q11}, [%[out],:128]!\n"
+                : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
+                :
+                : "q0", "q1", "q2", "memory");
+    n &= 15;
+    if (n)
+        MergeGeneric (outp, in1p, in2p, n);
+}
+#endif
+
 /*****************************************************************************
  * RenderX: This algo works on a 8x8 block basic, it copies the top field
  * and apply a process to recreate the bottom field :
@@ -1681,42 +1750,43 @@ static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
 {
     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
     vout_thread_t * p_vout = (vout_thread_t *)p_this;
+    vout_sys_t *p_sys = p_vout->p_sys;
 
     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
 
-    vlc_mutex_lock( &p_vout->p_sys->filter_lock );
-    const bool b_old_half_height = p_vout->p_sys->b_half_height;
+    vlc_mutex_lock( &p_sys->filter_lock );
+    const bool b_old_half_height = p_sys->b_half_height;
 
     SetFilterMethod( p_vout, newval.psz_string );
 
-    if( !b_old_half_height == !p_vout->p_sys->b_half_height )
+    if( !b_old_half_height == !p_sys->b_half_height )
     {
-        vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
+        vlc_mutex_unlock( &p_sys->filter_lock );
         return VLC_SUCCESS;
     }
 
     /* We need to kill the old vout */
-    if( p_vout->p_sys->p_vout )
+    if( p_sys->p_vout )
     {
-        vout_filter_DelChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
-        vout_CloseAndRelease( p_vout->p_sys->p_vout );
+        vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
+        vout_CloseAndRelease( p_sys->p_vout );
     }
 
     /* Try to open a new video output */
-    p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
+    p_sys->p_vout = SpawnRealVout( p_vout );
 
-    if( p_vout->p_sys->p_vout == NULL )
+    if( p_sys->p_vout == NULL )
     {
         /* Everything failed */
         msg_Err( p_vout, "cannot open vout, aborting" );
 
-        vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
+        vlc_mutex_unlock( &p_sys->filter_lock );
         return VLC_EGENERIC;
     }
 
-    vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
+    vout_filter_AddChild( p_vout, p_sys->p_vout, MouseEvent );
 
-    vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
+    vlc_mutex_unlock( &p_sys->filter_lock );
     return VLC_SUCCESS;
 }