]> git.sesse.net Git - vlc/blobdiff - modules/video_filter/sepia.c
Qt: change tools accessor
[vlc] / modules / video_filter / sepia.c
index 7d4fb98c5a543b02649e4b631d0d5fb9d0bb5330..c0f3ad1a77a1e6b6bf14008238c4bd9a0a8b87b2 100644 (file)
@@ -1,24 +1,24 @@
 /*****************************************************************************
  * sepia.c : Sepia video plugin for vlc
  *****************************************************************************
- * Copyright (C) 2010 the VideoLAN team
+ * Copyright (C) 2010 VLC authors and VideoLAN
  * $Id$
  *
  * Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
 /*****************************************************************************
@@ -33,6 +33,7 @@
 #include <vlc_plugin.h>
 #include <vlc_filter.h>
 #include <vlc_cpu.h>
+#include <vlc_atomic.h>
 
 #include <assert.h>
 #include "filter_picture.h"
@@ -47,7 +48,6 @@ static void RVSepia( picture_t *, picture_t *, int );
 static void PlanarI420Sepia( picture_t *, picture_t *, int);
 static void PackedYUVSepia( picture_t *, picture_t *, int);
 static picture_t *Filter( filter_t *, picture_t * );
-inline void Sepia8ySSE2( uint8_t *, const uint8_t *, int );
 static const char *const ppsz_filter_options[] = {
     "intensity", NULL
 };
@@ -102,8 +102,7 @@ static const struct
 struct filter_sys_t
 {
     SepiaFunction pf_sepia;
-    int i_intensity;
-    vlc_spinlock_t lock;
+    atomic_int i_intensity;
 };
 
 /*****************************************************************************
@@ -140,11 +139,8 @@ static int Create( vlc_object_t *p_this )
 
     config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
                        p_filter->p_cfg );
-    p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
-                       CFG_PREFIX "intensity" );
-
-    vlc_spin_init( &p_sys->lock );
-
+    atomic_init( &p_sys->i_intensity,
+             var_CreateGetIntegerCommand( p_filter, CFG_PREFIX "intensity" ) );
     var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
 
     p_filter->pf_video_filter = Filter;
@@ -163,7 +159,6 @@ static void Destroy( vlc_object_t *p_this )
 
     var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
 
-    vlc_spin_destroy( &p_filter->p_sys->lock );
     free( p_filter->p_sys );
 }
 
@@ -177,14 +172,11 @@ static void Destroy( vlc_object_t *p_this )
 static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
 {
     picture_t *p_outpic;
-    int intensity;
 
     if( !p_pic ) return NULL;
 
     filter_sys_t *p_sys = p_filter->p_sys;
-    vlc_spin_lock( &p_sys->lock );
-    intensity = p_sys->i_intensity;
-    vlc_spin_unlock( &p_sys->lock );
+    int intensity = atomic_load( &p_sys->i_intensity );
 
     p_outpic = filter_NewPicture( p_filter );
     if( !p_outpic )
@@ -199,6 +191,113 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
     return CopyInfoAndRelease( p_outpic, p_pic );
 }
 
+#if defined(CAN_COMPILE_SSE2)
+/*****************************************************************************
+ * Sepia8ySSE2
+ *****************************************************************************
+ * This function applies sepia effect to eight bytes of yellow using SSE4.1
+ * instructions. It copies those 8 bytes to 128b register and fills the gaps
+ * with zeroes and following operations are made with word-operating instructs.
+ *****************************************************************************/
+VLC_SSE
+static inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
+                         int i_intensity_spread)
+{
+    __asm__ volatile (
+        // y = y - y / 4 + i_intensity / 4
+        "movq            (%1), %%xmm1\n"
+        "punpcklbw     %%xmm7, %%xmm1\n"
+        "movq            (%1), %%xmm2\n" // store bytes as words with 0s in between
+        "punpcklbw     %%xmm7, %%xmm2\n"
+        "movd              %2, %%xmm3\n"
+        "pshufd    $0, %%xmm3, %%xmm3\n"
+        "psrlw             $2, %%xmm2\n"    // rotate right 2
+        "psubusb       %%xmm1, %%xmm2\n"    // subtract
+        "psrlw             $2, %%xmm3\n"
+        "paddsb        %%xmm1, %%xmm3\n"    // add
+        "packuswb      %%xmm2, %%xmm1\n"    // pack back to bytes
+        "movq          %%xmm1, (%0)  \n"    // load to dest
+        :
+        :"r" (dst), "r"(src), "r"(i_intensity_spread)
+        :"memory", "xmm1", "xmm2", "xmm3");
+}
+
+VLC_SSE
+static void PlanarI420SepiaSSE( picture_t *p_pic, picture_t *p_outpic,
+                                int i_intensity )
+{
+    /* prepared values to copy for U and V channels */
+    const uint8_t filling_const_8u = 128 - i_intensity / 6;
+    const uint8_t filling_const_8v = 128 + i_intensity / 14;
+    /* prepared value for faster broadcasting in xmm register */
+    int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
+
+    __asm__ volatile(
+        "pxor      %%xmm7, %%xmm7\n"
+        ::: "xmm7");
+
+    /* iterate for every two visible line in the frame */
+    for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
+    {
+        const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
+        const int i_dy_line2_start = (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
+        const int i_du_line_start =  (y / 2) * p_outpic->p[U_PLANE].i_pitch;
+        const int i_dv_line_start =  (y / 2) * p_outpic->p[V_PLANE].i_pitch;
+        int x = 0;
+        /* iterate for every visible line in the frame (eight values at once) */
+        for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16 )
+        {
+            /* Compute yellow channel values with asm function */
+            Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
+                        &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
+                        i_intensity_spread );
+            Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
+                        &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
+                        i_intensity_spread );
+            Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
+                        &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
+                        i_intensity_spread );
+            Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
+                        &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
+                        i_intensity_spread );
+            /* Copy precomputed values to destination memory location */
+            memset(&p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
+                   filling_const_8u, 8 );
+            memset(&p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
+                   filling_const_8v, 8 );
+        }
+        /* Completing the job, the cycle above takes really big chunks, so
+           this makes sure the job will be done completely */
+        for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2 )
+        {
+            // y = y - y/4 {to prevent overflow} + intensity / 4
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
+                (i_intensity >> 2);
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
+                (i_intensity >> 2);
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
+                (i_intensity >> 2);
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
+                (i_intensity >> 2);
+            // u = 128 {half => B&W} - intensity / 6
+            p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
+                filling_const_8u;
+            // v = 128 {half => B&W} + intensity / 14
+            p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
+                filling_const_8v;
+        }
+    }
+}
+#endif
+
 /*****************************************************************************
  * PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
  *****************************************************************************
@@ -210,131 +309,53 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
 static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
                                int i_intensity )
 {
+#if defined(CAN_COMPILE_SSE2)
+    if (vlc_CPU_SSE2())
+        return PlanarI420SepiaSSE( p_pic, p_outpic, i_intensity );
+#endif
+
     // prepared values to copy for U and V channels
     const uint8_t filling_const_8u = 128 - i_intensity / 6;
     const uint8_t filling_const_8v = 128 + i_intensity / 14;
 
-#if defined(CAN_COMPILE_SSE2)
-    if (vlc_CPU() & CPU_CAPABILITY_SSE2)
+    /* iterate for every two visible line in the frame */
+    for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
     {
-        /* prepared value for faster broadcasting in xmm register */
-        int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
-
-        __asm__ volatile(
-            "pxor      %%xmm7, %%xmm7\n"
-        ::);
-
+        const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
+        const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
+        const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
+        const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
+        // to prevent sigsegv if one pic is smaller (theoretically)
+        int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
+                  < p_outpic->p[Y_PLANE].i_visible_pitch
+                  ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
+                  (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
         /* iterate for every two visible line in the frame */
-        for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
+        for( int x = 0; x < i_picture_size_limit; x += 2)
         {
-            const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
-            const int i_dy_line2_start =
-            (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
-            const int i_du_line_start =
-            (y / 2) * p_outpic->p[U_PLANE].i_pitch;
-            const int i_dv_line_start =
-            (y / 2) * p_outpic->p[V_PLANE].i_pitch;
-            int x = 0;
-            /* iterate for every visible line in the frame (eight values at once) */
-            for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16 )
-            {
-                /* Compute yellow channel values with asm function */
-                Sepia8ySSE2(
-                    &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
-                    &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
-                    i_intensity_spread );
-                Sepia8ySSE2(
-                    &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
-                    &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
-                    i_intensity_spread );
-                Sepia8ySSE2(
-                    &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
-                    &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
-                    i_intensity_spread );
-                Sepia8ySSE2(
-                    &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
-                    &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
-                    i_intensity_spread );
-                /* Copy precomputed values to destination memory location */
-                vlc_memset(
-                    &p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
-                    filling_const_8u, 8 );
-                vlc_memset(
-                    &p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
-                    filling_const_8v, 8 );
-            }
-            /* Completing the job, the cycle above takes really big chunks, so
-              this makes sure the job will be done completely */
-            for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2 )
-            {
-                // y = y - y/4 {to prevent overflow} + intensity / 4
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
-                    (i_intensity >> 2);
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
-                    (i_intensity >> 2);
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
-                    (i_intensity >> 2);
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
-                    (i_intensity >> 2);
-                // u = 128 {half => B&W} - intensity / 6
-                p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
-                    filling_const_8u;
-                // v = 128 {half => B&W} + intensity / 14
-                p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
-                    filling_const_8v;
-            }
-        }
-    }
-    else
-#endif
-    {
-        /* iterate for every two visible line in the frame */
-        for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
-        {
-            const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
-            const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
-            const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
-            const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
-            // to prevent sigsegv if one pic is smaller (theoretically)
-            int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
-                      < p_outpic->p[Y_PLANE].i_visible_pitch
-                      ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
-                      (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
-            /* iterate for every two visible line in the frame */
-            for( int x = 0; x < i_picture_size_limit; x += 2)
-            {
-                // y = y - y/4 {to prevent overflow} + intensity / 4
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
-                    (i_intensity >> 2);
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
-                    (i_intensity >> 2);
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
-                    (i_intensity >> 2);
-                p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
-                    p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
-                    (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
-                    (i_intensity >> 2);
-                // u = 128 {half => B&W} - intensity / 6
-                p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
-                    filling_const_8u;
-                // v = 128 {half => B&W} + intensity / 14
-                p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
-                    filling_const_8v;
-            }
+            // y = y - y/4 {to prevent overflow} + intensity / 4
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
+                (i_intensity >> 2);
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
+                (i_intensity >> 2);
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
+                (i_intensity >> 2);
+            p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
+                p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
+                (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
+                (i_intensity >> 2);
+            // u = 128 {half => B&W} - intensity / 6
+            p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
+                filling_const_8u;
+            // v = 128 {half => B&W} + intensity / 14
+            p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
+                filling_const_8v;
         }
     }
 }
@@ -458,37 +479,6 @@ static void RVSepia( picture_t *p_pic, picture_t *p_outpic, int i_intensity )
 #undef FIX
 }
 
-/*****************************************************************************
- * Sepia8ySSE2
- *****************************************************************************
- * This function applies sepia effect to eight bytes of yellow using SSE4.1
- * instructions. It copies those 8 bytes to 128b register and fills the gaps
- * with zeroes and following operations are made with word-operating instructs.
- *****************************************************************************/
-inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
-                         int i_intensity_spread)
-{
-#if defined(CAN_COMPILE_SSE2)
-    __asm__ volatile (
-        // y = y - y / 4 + i_intensity / 4
-        "movq            (%1), %%xmm1\n"
-        "punpcklbw     %%xmm7, %%xmm1\n"
-        "movq            (%1), %%xmm2\n" // store bytes as words with 0s in between
-        "punpcklbw     %%xmm7, %%xmm2\n"
-        "movd              %2, %%xmm3\n"
-        "pshufd    $0, %%xmm3, %%xmm3\n"
-        "psrlw             $2, %%xmm2\n"    // rotate right 2
-        "psubusb       %%xmm1, %%xmm2\n"    // subtract
-        "psrlw             $2, %%xmm3\n"
-        "paddsb        %%xmm1, %%xmm3\n"    // add
-        "packuswb      %%xmm2, %%xmm1\n"    // pack back to bytes
-        "movq          %%xmm1, (%0)  \n"    // load to dest
-        :
-        :"r" (dst), "r"(src), "r"(i_intensity_spread)
-        :"memory");
-#endif
-}
-
 static int FilterCallback ( vlc_object_t *p_this, char const *psz_var,
                             vlc_value_t oldval, vlc_value_t newval,
                             void *p_data )
@@ -497,9 +487,6 @@ static int FilterCallback ( vlc_object_t *p_this, char const *psz_var,
     filter_t *p_filter = (filter_t*)p_this;
     filter_sys_t *p_sys = p_filter->p_sys;
 
-    vlc_spin_lock( &p_sys->lock );
-    p_sys->i_intensity = newval.i_int;
-    vlc_spin_unlock( &p_sys->lock );
-
+    atomic_store( &p_sys->i_intensity, newval.i_int );
     return VLC_SUCCESS;
 }