/*****************************************************************************
* sepia.c : Sepia video plugin for vlc
*****************************************************************************
- * Copyright (C) 2010 the VideoLAN team
+ * Copyright (C) 2010 VLC authors and VideoLAN
* $Id$
*
* Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
#include <vlc_plugin.h>
#include <vlc_filter.h>
#include <vlc_cpu.h>
+#include <vlc_atomic.h>
#include <assert.h>
#include "filter_picture.h"
struct filter_sys_t
{
SepiaFunction pf_sepia;
- int i_intensity;
- vlc_spinlock_t lock;
+ atomic_int i_intensity;
};
/*****************************************************************************
config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
p_filter->p_cfg );
- p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
- CFG_PREFIX "intensity" );
-
- vlc_spin_init( &p_sys->lock );
-
+ atomic_init( &p_sys->i_intensity,
+ var_CreateGetIntegerCommand( p_filter, CFG_PREFIX "intensity" ) );
var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
p_filter->pf_video_filter = Filter;
var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
- vlc_spin_destroy( &p_filter->p_sys->lock );
free( p_filter->p_sys );
}
static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
{
picture_t *p_outpic;
- int intensity;
if( !p_pic ) return NULL;
filter_sys_t *p_sys = p_filter->p_sys;
- vlc_spin_lock( &p_sys->lock );
- intensity = p_sys->i_intensity;
- vlc_spin_unlock( &p_sys->lock );
+ int intensity = atomic_load( &p_sys->i_intensity );
p_outpic = filter_NewPicture( p_filter );
if( !p_outpic )
* instructions. It copies those 8 bytes to 128b register and fills the gaps
* with zeroes and following operations are made with word-operating instructs.
*****************************************************************************/
+VLC_SSE
static inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
int i_intensity_spread)
{
"movq %%xmm1, (%0) \n" // load to dest
:
:"r" (dst), "r"(src), "r"(i_intensity_spread)
- :"memory");
+ :"memory", "xmm1", "xmm2", "xmm3");
+}
+
+VLC_SSE
+static void PlanarI420SepiaSSE( picture_t *p_pic, picture_t *p_outpic,
+ int i_intensity )
+{
+ /* prepared values to copy for U and V channels */
+ const uint8_t filling_const_8u = 128 - i_intensity / 6;
+ const uint8_t filling_const_8v = 128 + i_intensity / 14;
+ /* prepared value for faster broadcasting in xmm register */
+ int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
+
+ __asm__ volatile(
+ "pxor %%xmm7, %%xmm7\n"
+ ::: "xmm7");
+
+ /* iterate for every two visible line in the frame */
+ for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
+ {
+ const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
+ const int i_dy_line2_start = (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
+ const int i_du_line_start = (y / 2) * p_outpic->p[U_PLANE].i_pitch;
+ const int i_dv_line_start = (y / 2) * p_outpic->p[V_PLANE].i_pitch;
+ int x = 0;
+ /* iterate for every visible line in the frame (eight values at once) */
+ for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16 )
+ {
+ /* Compute yellow channel values with asm function */
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
+ i_intensity_spread );
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
+ i_intensity_spread );
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
+ i_intensity_spread );
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
+ i_intensity_spread );
+ /* Copy precomputed values to destination memory location */
+ memset(&p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
+ filling_const_8u, 8 );
+ memset(&p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
+ filling_const_8v, 8 );
+ }
+ /* Completing the job, the cycle above takes really big chunks, so
+ this makes sure the job will be done completely */
+ for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2 )
+ {
+ // y = y - y/4 {to prevent overflow} + intensity / 4
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
+ (i_intensity >> 2);
+ // u = 128 {half => B&W} - intensity / 6
+ p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
+ filling_const_8u;
+ // v = 128 {half => B&W} + intensity / 14
+ p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
+ filling_const_8v;
+ }
+ }
}
#endif
static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
int i_intensity )
{
+#if defined(CAN_COMPILE_SSE2)
+ if (vlc_CPU_SSE2())
+ return PlanarI420SepiaSSE( p_pic, p_outpic, i_intensity );
+#endif
+
// prepared values to copy for U and V channels
const uint8_t filling_const_8u = 128 - i_intensity / 6;
const uint8_t filling_const_8v = 128 + i_intensity / 14;
-#if defined(CAN_COMPILE_SSE2)
- if (vlc_CPU() & CPU_CAPABILITY_SSE2)
+ /* iterate for every two visible line in the frame */
+ for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
{
- /* prepared value for faster broadcasting in xmm register */
- int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
-
- __asm__ volatile(
- "pxor %%xmm7, %%xmm7\n"
- ::);
-
+ const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
+ const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
+ const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
+ const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
+ // to prevent sigsegv if one pic is smaller (theoretically)
+ int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
+ < p_outpic->p[Y_PLANE].i_visible_pitch
+ ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
+ (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
/* iterate for every two visible line in the frame */
- for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
+ for( int x = 0; x < i_picture_size_limit; x += 2)
{
- const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
- const int i_dy_line2_start =
- (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
- const int i_du_line_start =
- (y / 2) * p_outpic->p[U_PLANE].i_pitch;
- const int i_dv_line_start =
- (y / 2) * p_outpic->p[V_PLANE].i_pitch;
- int x = 0;
- /* iterate for every visible line in the frame (eight values at once) */
- for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16 )
- {
- /* Compute yellow channel values with asm function */
- Sepia8ySSE2(
- &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
- &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
- i_intensity_spread );
- Sepia8ySSE2(
- &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
- &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
- i_intensity_spread );
- Sepia8ySSE2(
- &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
- &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
- i_intensity_spread );
- Sepia8ySSE2(
- &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
- &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
- i_intensity_spread );
- /* Copy precomputed values to destination memory location */
- vlc_memset(
- &p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
- filling_const_8u, 8 );
- vlc_memset(
- &p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
- filling_const_8v, 8 );
- }
- /* Completing the job, the cycle above takes really big chunks, so
- this makes sure the job will be done completely */
- for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2 )
- {
- // y = y - y/4 {to prevent overflow} + intensity / 4
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
- (i_intensity >> 2);
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
- (i_intensity >> 2);
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
- (i_intensity >> 2);
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
- (i_intensity >> 2);
- // u = 128 {half => B&W} - intensity / 6
- p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
- filling_const_8u;
- // v = 128 {half => B&W} + intensity / 14
- p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
- filling_const_8v;
- }
- }
- }
- else
-#endif
- {
- /* iterate for every two visible line in the frame */
- for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
- {
- const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
- const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
- const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
- const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
- // to prevent sigsegv if one pic is smaller (theoretically)
- int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
- < p_outpic->p[Y_PLANE].i_visible_pitch
- ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
- (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
- /* iterate for every two visible line in the frame */
- for( int x = 0; x < i_picture_size_limit; x += 2)
- {
- // y = y - y/4 {to prevent overflow} + intensity / 4
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
- (i_intensity >> 2);
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
- (i_intensity >> 2);
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
- (i_intensity >> 2);
- p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
- p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
- (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
- (i_intensity >> 2);
- // u = 128 {half => B&W} - intensity / 6
- p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
- filling_const_8u;
- // v = 128 {half => B&W} + intensity / 14
- p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
- filling_const_8v;
- }
+ // y = y - y/4 {to prevent overflow} + intensity / 4
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
+ (i_intensity >> 2);
+ // u = 128 {half => B&W} - intensity / 6
+ p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
+ filling_const_8u;
+ // v = 128 {half => B&W} + intensity / 14
+ p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
+ filling_const_8v;
}
}
}
filter_t *p_filter = (filter_t*)p_this;
filter_sys_t *p_sys = p_filter->p_sys;
- vlc_spin_lock( &p_sys->lock );
- p_sys->i_intensity = newval.i_int;
- vlc_spin_unlock( &p_sys->lock );
-
+ atomic_store( &p_sys->i_intensity, newval.i_int );
return VLC_SUCCESS;
}