/*****************************************************************************
* sepia.c : Sepia video plugin for vlc
*****************************************************************************
- * Copyright (C) 2010 the VideoLAN team
+ * Copyright (C) 2010 VLC authors and VideoLAN
* $Id$
*
* Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
#include <vlc_common.h>
#include <vlc_plugin.h>
#include <vlc_filter.h>
+#include <vlc_cpu.h>
+#include <vlc_atomic.h>
#include <assert.h>
#include "filter_picture.h"
static void PlanarI420Sepia( picture_t *, picture_t *, int);
static void PackedYUVSepia( picture_t *, picture_t *, int);
static picture_t *Filter( filter_t *, picture_t * );
-
static const char *const ppsz_filter_options[] = {
"intensity", NULL
};
set_category( CAT_VIDEO )
set_subcategory( SUBCAT_VIDEO_VFILTER )
set_capability( "video filter2", 0 )
- add_integer_with_range( CFG_PREFIX "intensity", 100, 0, 255, NULL,
+ add_integer_with_range( CFG_PREFIX "intensity", 120, 0, 255,
SEPIA_INTENSITY_TEXT, SEPIA_INTENSITY_LONGTEXT,
false )
set_callbacks( Create, Destroy )
struct filter_sys_t
{
SepiaFunction pf_sepia;
- int i_intensity;
- vlc_spinlock_t lock;
+ atomic_int i_intensity;
};
/*****************************************************************************
config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
p_filter->p_cfg );
- p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
- CFG_PREFIX "intensity" );
-
- vlc_spin_init( &p_sys->lock );
-
+ atomic_init( &p_sys->i_intensity,
+ var_CreateGetIntegerCommand( p_filter, CFG_PREFIX "intensity" ) );
var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
p_filter->pf_video_filter = Filter;
var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
- vlc_spin_destroy( &p_filter->p_sys->lock );
free( p_filter->p_sys );
}
static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
{
picture_t *p_outpic;
- int intensity;
if( !p_pic ) return NULL;
filter_sys_t *p_sys = p_filter->p_sys;
- vlc_spin_lock( &p_sys->lock );
- intensity = p_sys->i_intensity;
- vlc_spin_unlock( &p_sys->lock );
+ int intensity = atomic_load( &p_sys->i_intensity );
p_outpic = filter_NewPicture( p_filter );
if( !p_outpic )
return CopyInfoAndRelease( p_outpic, p_pic );
}
+#if defined(CAN_COMPILE_SSE2)
+/*****************************************************************************
+ * Sepia8ySSE2
+ *****************************************************************************
+ * This function applies sepia effect to eight bytes of yellow using SSE4.1
+ * instructions. It copies those 8 bytes to 128b register and fills the gaps
+ * with zeroes and following operations are made with word-operating instructs.
+ *****************************************************************************/
+VLC_SSE
+static inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
+ int i_intensity_spread)
+{
+ __asm__ volatile (
+ // y = y - y / 4 + i_intensity / 4
+ "movq (%1), %%xmm1\n"
+ "punpcklbw %%xmm7, %%xmm1\n"
+ "movq (%1), %%xmm2\n" // store bytes as words with 0s in between
+ "punpcklbw %%xmm7, %%xmm2\n"
+ "movd %2, %%xmm3\n"
+ "pshufd $0, %%xmm3, %%xmm3\n"
+ "psrlw $2, %%xmm2\n" // rotate right 2
+ "psubusb %%xmm1, %%xmm2\n" // subtract
+ "psrlw $2, %%xmm3\n"
+ "paddsb %%xmm1, %%xmm3\n" // add
+ "packuswb %%xmm2, %%xmm1\n" // pack back to bytes
+ "movq %%xmm1, (%0) \n" // load to dest
+ :
+ :"r" (dst), "r"(src), "r"(i_intensity_spread)
+ :"memory", "xmm1", "xmm2", "xmm3");
+}
+
+VLC_SSE
+static void PlanarI420SepiaSSE( picture_t *p_pic, picture_t *p_outpic,
+ int i_intensity )
+{
+ /* prepared values to copy for U and V channels */
+ const uint8_t filling_const_8u = 128 - i_intensity / 6;
+ const uint8_t filling_const_8v = 128 + i_intensity / 14;
+ /* prepared value for faster broadcasting in xmm register */
+ int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
+
+ __asm__ volatile(
+ "pxor %%xmm7, %%xmm7\n"
+ ::: "xmm7");
+
+ /* iterate for every two visible line in the frame */
+ for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
+ {
+ const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
+ const int i_dy_line2_start = (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
+ const int i_du_line_start = (y / 2) * p_outpic->p[U_PLANE].i_pitch;
+ const int i_dv_line_start = (y / 2) * p_outpic->p[V_PLANE].i_pitch;
+ int x = 0;
+ /* iterate for every visible line in the frame (eight values at once) */
+ for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16 )
+ {
+ /* Compute yellow channel values with asm function */
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
+ i_intensity_spread );
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
+ i_intensity_spread );
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
+ i_intensity_spread );
+ Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
+ &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
+ i_intensity_spread );
+ /* Copy precomputed values to destination memory location */
+ memset(&p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
+ filling_const_8u, 8 );
+ memset(&p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
+ filling_const_8v, 8 );
+ }
+ /* Completing the job, the cycle above takes really big chunks, so
+ this makes sure the job will be done completely */
+ for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2 )
+ {
+ // y = y - y/4 {to prevent overflow} + intensity / 4
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
+ (i_intensity >> 2);
+ p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
+ p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
+ (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
+ (i_intensity >> 2);
+ // u = 128 {half => B&W} - intensity / 6
+ p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
+ filling_const_8u;
+ // v = 128 {half => B&W} + intensity / 14
+ p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
+ filling_const_8v;
+ }
+ }
+}
+#endif
+
/*****************************************************************************
* PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
*****************************************************************************
static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
int i_intensity )
{
+#if defined(CAN_COMPILE_SSE2)
+ if (vlc_CPU_SSE2())
+ return PlanarI420SepiaSSE( p_pic, p_outpic, i_intensity );
+#endif
+
// prepared values to copy for U and V channels
const uint8_t filling_const_8u = 128 - i_intensity / 6;
const uint8_t filling_const_8v = 128 + i_intensity / 14;
+
/* iterate for every two visible line in the frame */
for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
{
const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
// to prevent sigsegv if one pic is smaller (theoretically)
- int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
- < p_outpic->p[Y_PLANE].i_visible_pitch
+ int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
+ < p_outpic->p[Y_PLANE].i_visible_pitch
? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
(p_outpic->p[Y_PLANE].i_visible_pitch - 1);
/* iterate for every two visible line in the frame */
* p_pic->p[0].i_pitch;
p_out = p_outpic->p[0].p_pixels;
- while( p_in < p_in_end )
{
- p_line_end = p_in + p_pic->p[0].i_visible_pitch;
- while( p_in < p_line_end )
+ while( p_in < p_in_end )
{
- /* calculate new, sepia values */
- p_out[i_yindex] =
- p_in[i_yindex] - (p_in[i_yindex] >> 2) + (i_intensity >> 2);
- p_out[i_yindex + 2] =
- p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2)
- + (i_intensity >> 2);
- p_out[i_uindex] = filling_const_8u;
- p_out[i_vindex] = filling_const_8v;
- p_in += 4;
- p_out += 4;
+ p_line_end = p_in + p_pic->p[0].i_visible_pitch;
+ while( p_in < p_line_end )
+ {
+ /* calculate new, sepia values */
+ p_out[i_yindex] =
+ p_in[i_yindex] - (p_in[i_yindex] >> 2) + (i_intensity >> 2);
+ p_out[i_yindex + 2] =
+ p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2)
+ + (i_intensity >> 2);
+ p_out[i_uindex] = filling_const_8u;
+ p_out[i_vindex] = filling_const_8v;
+ p_in += 4;
+ p_out += 4;
+ }
+ p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
+ p_out += p_outpic->p[0].i_pitch
+ - p_outpic->p[0].i_visible_pitch;
}
- p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
- p_out += p_outpic->p[0].i_pitch
- - p_outpic->p[0].i_visible_pitch;
}
}
#define ONE_HALF (1 << (SCALEBITS - 1))
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
- int i_r, i_g, i_b;
bool b_isRV32 = p_pic->format.i_chroma == VLC_CODEC_RGB32;
int i_rindex = 0, i_gindex = 1, i_bindex = 2;
filter_t *p_filter = (filter_t*)p_this;
filter_sys_t *p_sys = p_filter->p_sys;
- vlc_spin_lock( &p_sys->lock );
- p_sys->i_intensity = newval.i_int;
- vlc_spin_unlock( &p_sys->lock );
-
+ atomic_store( &p_sys->i_intensity, newval.i_int );
return VLC_SUCCESS;
}