git.sesse.net Git - vlc/blob - modules/video_filter/sepia.c

   1 /*****************************************************************************
   2  * sepia.c : Sepia video plugin for vlc
   3  *****************************************************************************
   4  * Copyright (C) 2010 the VideoLAN team
   5  * $Id$
   6  *
   7  * Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  22  *****************************************************************************/
  23
  24 /*****************************************************************************
  25  * Preamble
  26  *****************************************************************************/
  27
  28 #ifdef HAVE_CONFIG_H
  29 # include "config.h"
  30 #endif
  31
  32 #include <vlc_common.h>
  33 #include <vlc_plugin.h>
  34 #include <vlc_filter.h>
  35 #include <vlc_cpu.h>
  36
  37 #include <assert.h>
  38 #include "filter_picture.h"
  39
  40 /*****************************************************************************
  41  * Local prototypes
  42  *****************************************************************************/
  43 static int  Create      ( vlc_object_t * );
  44 static void Destroy     ( vlc_object_t * );
  45
  46 static void RVSepia( picture_t *, picture_t *, int );
  47 static void PlanarI420Sepia( picture_t *, picture_t *, int);
  48 static void PackedYUVSepia( picture_t *, picture_t *, int);
  49 static picture_t *Filter( filter_t *, picture_t * );
  50 inline void Sepia8ySSE41( uint8_t *, const uint8_t *, volatile uint8_t * );
  51 inline void Memcpy8BMMX( uint8_t *, const uint8_t * );
  52 static const char *const ppsz_filter_options[] = {
  53     "intensity", NULL
  54 };
  55
  56 /*****************************************************************************
  57  * Module descriptor
  58  *****************************************************************************/
  59 #define SEPIA_INTENSITY_TEXT N_("Sepia intensity")
  60 #define SEPIA_INTENSITY_LONGTEXT N_("Intensity of sepia effect" )
  61
  62 #define CFG_PREFIX "sepia-"
  63
  64 vlc_module_begin ()
  65     set_description( N_("Sepia video filter") )
  66     set_shortname( N_("Sepia" ) )
  67     set_help( N_("Gives video a warmer tone by applying sepia effect") )
  68     set_category( CAT_VIDEO )
  69     set_subcategory( SUBCAT_VIDEO_VFILTER )
  70     set_capability( "video filter2", 0 )
  71     add_integer_with_range( CFG_PREFIX "intensity", 100, 0, 255,
  72                            SEPIA_INTENSITY_TEXT, SEPIA_INTENSITY_LONGTEXT,
  73                            false )
  74     set_callbacks( Create, Destroy )
  75 vlc_module_end ()
  76
  77 /*****************************************************************************
  78  * callback prototypes
  79  *****************************************************************************/
  80 static int FilterCallback( vlc_object_t *, char const *,
  81                            vlc_value_t, vlc_value_t, void * );
  82
  83 typedef void (*SepiaFunction)( picture_t *, picture_t *, int );
  84
  85 static const struct
  86 {
  87     vlc_fourcc_t i_chroma;
  88     SepiaFunction pf_sepia;
  89 } p_sepia_cfg[] = {
  90     { VLC_CODEC_I420, PlanarI420Sepia },
  91     { VLC_CODEC_RGB24, RVSepia },
  92     { VLC_CODEC_RGB32, RVSepia },
  93     { VLC_CODEC_UYVY, PackedYUVSepia },
  94     { VLC_CODEC_VYUY, PackedYUVSepia },
  95     { VLC_CODEC_YUYV, PackedYUVSepia },
  96     { VLC_CODEC_YVYU, PackedYUVSepia },
  97     { 0, NULL }
  98 };
  99
 100 /*****************************************************************************
 101  * filter_sys_t: adjust filter method descriptor
 102  *****************************************************************************/
 103 struct filter_sys_t
 104 {
 105     SepiaFunction pf_sepia;
 106     int i_intensity;
 107     vlc_spinlock_t lock;
 108 };
 109
 110 /*****************************************************************************
 111  * Create: allocates Sepia video thread output method
 112  *****************************************************************************
 113  * This function allocates and initializes a Sepia vout method.
 114  *****************************************************************************/
 115 static int Create( vlc_object_t *p_this )
 116 {
 117     filter_t *p_filter = (filter_t *)p_this;
 118     filter_sys_t *p_sys;
 119
 120     /* Allocate structure */
 121     p_sys = p_filter->p_sys = malloc( sizeof( filter_sys_t ) );
 122     if( p_filter->p_sys == NULL )
 123         return VLC_ENOMEM;
 124
 125     p_sys->pf_sepia = NULL;
 126
 127     for( int i = 0; p_sepia_cfg[i].i_chroma != 0; i++ )
 128     {
 129         if( p_sepia_cfg[i].i_chroma != p_filter->fmt_in.video.i_chroma )
 130             continue;
 131         p_sys->pf_sepia = p_sepia_cfg[i].pf_sepia;
 132     }
 133
 134     if( p_sys->pf_sepia == NULL )
 135     {
 136         msg_Err( p_filter, "Unsupported input chroma (%4.4s)",
 137                 (char*)&(p_filter->fmt_in.video.i_chroma) );
 138         free( p_sys );
 139         return VLC_EGENERIC;
 140     }
 141
 142     config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
 143                        p_filter->p_cfg );
 144     p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
 145                        CFG_PREFIX "intensity" );
 146
 147     vlc_spin_init( &p_sys->lock );
 148
 149     var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
 150
 151     p_filter->pf_video_filter = Filter;
 152
 153     return VLC_SUCCESS;
 154 }
 155
 156 /*****************************************************************************
 157  * Destroy: destroy sepia video thread output method
 158  *****************************************************************************
 159  * Terminate an output method
 160  *****************************************************************************/
 161 static void Destroy( vlc_object_t *p_this )
 162 {
 163     filter_t *p_filter = (filter_t *)p_this;
 164
 165     var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
 166
 167     vlc_spin_destroy( &p_filter->p_sys->lock );
 168     free( p_filter->p_sys );
 169 }
 170
 171 /*****************************************************************************
 172  * Render: displays previously rendered output
 173  *****************************************************************************
 174  * This function send the currently rendered image to sepia image, waits
 175  * until it is displayed and switch the two rendering buffers, preparing next
 176  * frame.
 177  *****************************************************************************/
 178 static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
 179 {
 180     picture_t *p_outpic;
 181     int intensity;
 182
 183     if( !p_pic ) return NULL;
 184
 185     filter_sys_t *p_sys = p_filter->p_sys;
 186     vlc_spin_lock( &p_sys->lock );
 187     intensity = p_sys->i_intensity;
 188     vlc_spin_unlock( &p_sys->lock );
 189
 190     p_outpic = filter_NewPicture( p_filter );
 191     if( !p_outpic )
 192     {
 193         msg_Warn( p_filter, "can't get output picture" );
 194         picture_Release( p_pic );
 195         return NULL;
 196     }
 197
 198     p_sys->pf_sepia( p_pic, p_outpic, intensity );
 199
 200     return CopyInfoAndRelease( p_outpic, p_pic );
 201 }
 202
 203 /*****************************************************************************
 204  * PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
 205  *****************************************************************************
 206  * This function applies sepia effect to one frame of the video by iterating
 207  * through video lines. We iterate for every two lines and for every two pixels
 208  * in line to calculate new sepia values for four y components as well for u
 209  * and v components.
 210  *****************************************************************************/
 211 static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
 212                                int i_intensity )
 213 {
 214     // prepared values to copy for U and V channels
 215     const uint8_t filling_const_8u = 128 - i_intensity / 6;
 216     const uint8_t filling_const_8v = 128 + i_intensity / 14;
 217
 218     #if defined(CAN_COMPILE_SSE4_1) && 1
 219     if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
 220     {
 221         /*prepare array of values to copy with mmx, compute only once
 222           to improve speed */
 223         volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
 224             i_intensity, i_intensity, i_intensity, i_intensity,
 225             i_intensity, i_intensity };
 226         const uint8_t filling_array_8u[8] =
 227             { filling_const_8u, filling_const_8u, filling_const_8u,
 228             filling_const_8u, filling_const_8u, filling_const_8u,
 229             filling_const_8u, filling_const_8u };
 230         const uint8_t filling_array_8v[8] =
 231             { filling_const_8v, filling_const_8v, filling_const_8v,
 232             filling_const_8v, filling_const_8v, filling_const_8v,
 233             filling_const_8v, filling_const_8v };
 234
 235         /* iterate for every two visible line in the frame */
 236         for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
 237         {
 238             const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
 239             const int i_dy_line2_start =
 240             (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
 241             const int i_du_line_start =
 242             (y / 2) * p_outpic->p[U_PLANE].i_pitch;
 243             const int i_dv_line_start =
 244             (y / 2) * p_outpic->p[V_PLANE].i_pitch;
 245             int x = 0;
 246             /* iterate for every visible line in the frame (eight values at once) */
 247             for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16)
 248             {
 249                 /* Compute yellow channel values with asm function */
 250                 Sepia8ySSE41(
 251                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
 252                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
 253                           intensity_array );
 254                 Sepia8ySSE41(
 255                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
 256                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
 257                           intensity_array );
 258                 Sepia8ySSE41(
 259                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
 260                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
 261                           intensity_array );
 262                 Sepia8ySSE41(
 263                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
 264                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
 265                           intensity_array );
 266                 /* Copy precomputed values to destination image memory location */
 267                 Memcpy8BMMX(
 268                           &p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
 269                           filling_array_8u );
 270                 Memcpy8BMMX(&p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
 271                           filling_array_8v );
 272             }
 273             /* Completing the job, the cycle above takes really big chunks, so
 274               this makes sure the job will be done completely */
 275             for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2)
 276             {
 277                 // y = y - y/4 {to prevent overflow} + intensity / 4
 278                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
 279                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
 280                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
 281                     (i_intensity >> 2);
 282                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
 283                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
 284                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
 285                     (i_intensity >> 2);
 286                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
 287                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
 288                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
 289                     (i_intensity >> 2);
 290                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
 291                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
 292                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
 293                     (i_intensity >> 2);
 294                 // u = 128 {half => B&W} - intensity / 6
 295                 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
 296                     filling_const_8u;
 297                 // v = 128 {half => B&W} + intensity / 14
 298                 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
 299                     filling_const_8v;
 300             }
 301         }
 302     } else
 303 #endif
 304     {
 305         /* iterate for every two visible line in the frame */
 306         for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
 307         {
 308             const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
 309             const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
 310             const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
 311             const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
 312             // to prevent sigsegv if one pic is smaller (theoretically)
 313             int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
 314                       < p_outpic->p[Y_PLANE].i_visible_pitch
 315                       ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
 316                       (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
 317             /* iterate for every two visible line in the frame */
 318             for( int x = 0; x < i_picture_size_limit; x += 2)
 319             {
 320                 // y = y - y/4 {to prevent overflow} + intensity / 4
 321                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
 322                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
 323                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
 324                     (i_intensity >> 2);
 325                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
 326                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
 327                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
 328                     (i_intensity >> 2);
 329                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
 330                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
 331                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
 332                     (i_intensity >> 2);
 333                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
 334                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
 335                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
 336                     (i_intensity >> 2);
 337                 // u = 128 {half => B&W} - intensity / 6
 338                 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
 339                     filling_const_8u;
 340                 // v = 128 {half => B&W} + intensity / 14
 341                 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
 342                     filling_const_8v;
 343             }
 344         }
 345     }
 346 }
 347
 348 /*****************************************************************************
 349  * PackedYUVSepia: Applies sepia to one frame of the packed YUV video
 350  *****************************************************************************
 351  * This function applies sepia effext to one frame of the video by iterating
 352  * through video lines. In every pass, we calculate new values for pixels
 353  * (UYVY, VYUY, YUYV and YVYU formats are supported)
 354  *****************************************************************************/
 355 static void PackedYUVSepia( picture_t *p_pic, picture_t *p_outpic,
 356                            int i_intensity )
 357 {
 358     uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
 359     int i_yindex = 1, i_uindex = 2, i_vindex = 0;
 360
 361     GetPackedYuvOffsets( p_outpic->format.i_chroma,
 362                         &i_yindex, &i_uindex, &i_vindex );
 363
 364     // prepared values to copy for U and V channels
 365     const uint8_t filling_const_8u = 128 - i_intensity / 6;
 366     const uint8_t filling_const_8v = 128 + i_intensity / 14;
 367
 368     p_in = p_pic->p[0].p_pixels;
 369     p_in_end = p_in + p_pic->p[0].i_visible_lines
 370         * p_pic->p[0].i_pitch;
 371     p_out = p_outpic->p[0].p_pixels;
 372 #if defined(CAN_COMPILE_SSE4_1)
 373     if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
 374     {
 375         /*prepare array of values to copy with mmx, compute only once
 376           to improve speed */
 377         volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
 378             i_intensity, i_intensity, i_intensity, i_intensity,
 379             i_intensity,
 380             i_intensity
 381         };
 382         const uint8_t filling_array_8u[8] =
 383             { filling_const_8u, filling_const_8u,
 384             filling_const_8u, filling_const_8u, filling_const_8u,
 385             filling_const_8u,
 386             filling_const_8u, filling_const_8u
 387         };
 388         const uint8_t filling_array_8v[8] =
 389             { filling_const_8v, filling_const_8v,
 390             filling_const_8v, filling_const_8v, filling_const_8v,
 391             filling_const_8v,
 392             filling_const_8v, filling_const_8v
 393         };
 394
 395         /* iterate for every two visible line in the frame */
 396         while (p_in < p_in_end)
 397         {
 398             p_line_end = p_in + p_pic->p[0].i_visible_pitch;
 399             while (p_in < p_line_end)
 400             {
 401                 Sepia8ySSE41(&p_out[i_yindex], &p_in[i_yindex],
 402                           intensity_array);
 403                 Sepia8ySSE41(&p_out[i_yindex + 8], &p_in[i_yindex + 8],
 404                           intensity_array);
 405                 Sepia8ySSE41(&p_out[i_yindex + 16], &p_in[i_yindex + 16],
 406                           intensity_array);
 407                 Sepia8ySSE41(&p_out[i_yindex + 24], &p_in[i_yindex + 24],
 408                           intensity_array);
 409                 Memcpy8BMMX(&p_out[i_uindex], filling_array_8u);
 410                 Memcpy8BMMX(&p_out[i_vindex], filling_array_8v);
 411
 412                 p_in += 32;
 413                 p_out += 32;
 414             }
 415             while (p_in < p_line_end)
 416             {
 417                 p_out[i_yindex] =
 418                     p_in[i_yindex] - (p_in[i_yindex] >> 2) +
 419                     (i_intensity >> 2);
 420                 p_out[i_yindex + 2] =
 421                     p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2) +
 422                     (i_intensity >> 2);
 423                 p_out[i_uindex] = filling_const_8u;
 424                 p_out[i_vindex] = filling_const_8v;
 425                 p_in += 4;
 426                 p_out += 4;
 427             }
 428             p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
 429             p_out += p_outpic->p[0].i_pitch
 430             - p_outpic->p[0].i_visible_pitch;
 431         }
 432     } else
 433 #endif
 434     {
 435         while( p_in < p_in_end )
 436         {
 437             p_line_end = p_in + p_pic->p[0].i_visible_pitch;
 438             while( p_in < p_line_end )
 439             {
 440                 /* calculate new, sepia values */
 441                 p_out[i_yindex] =
 442                     p_in[i_yindex] - (p_in[i_yindex] >> 2) + (i_intensity >> 2);
 443                 p_out[i_yindex + 2] =
 444                     p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2)
 445                     + (i_intensity >> 2);
 446                 p_out[i_uindex] = filling_const_8u;
 447                 p_out[i_vindex] = filling_const_8v;
 448                 p_in += 4;
 449                 p_out += 4;
 450             }
 451             p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
 452             p_out += p_outpic->p[0].i_pitch
 453                 - p_outpic->p[0].i_visible_pitch;
 454         }
 455     }
 456 }
 457
 458 /*****************************************************************************
 459  * RVSepia: Applies sepia to one frame of the RV24/RV32 video
 460  *****************************************************************************
 461  * This function applies sepia effect to one frame of the video by iterating
 462  * through video lines and calculating new values for every byte in chunks of
 463  * 3 (RV24) or 4 (RV32) bytes.
 464  *****************************************************************************/
 465 static void RVSepia( picture_t *p_pic, picture_t *p_outpic, int i_intensity )
 466 {
 467 #define SCALEBITS 10
 468 #define ONE_HALF  (1 << (SCALEBITS - 1))
 469 #define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
 470     uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
 471     bool b_isRV32 = p_pic->format.i_chroma == VLC_CODEC_RGB32;
 472     int i_rindex = 0, i_gindex = 1, i_bindex = 2;
 473
 474     GetPackedRgbIndexes( &p_outpic->format, &i_rindex, &i_gindex, &i_bindex );
 475
 476     p_in = p_pic->p[0].p_pixels;
 477     p_in_end = p_in + p_pic->p[0].i_visible_lines
 478         * p_pic->p[0].i_pitch;
 479     p_out = p_outpic->p[0].p_pixels;
 480
 481     /* Precompute values constant for this certain i_intensity, using the same
 482      * formula as YUV functions above */
 483     uint8_t r_intensity = (( FIX( 1.40200 * 255.0 / 224.0 ) * (i_intensity * 14)
 484                         + ONE_HALF )) >> SCALEBITS;
 485     uint8_t g_intensity = (( - FIX(0.34414*255.0/224.0) * ( - i_intensity / 6 )
 486                         - FIX( 0.71414 * 255.0 / 224.0) * ( i_intensity * 14 )
 487                         + ONE_HALF )) >> SCALEBITS;
 488     uint8_t b_intensity = (( FIX( 1.77200 * 255.0 / 224.0) * ( - i_intensity / 6 )
 489                         + ONE_HALF )) >> SCALEBITS;
 490
 491     while (p_in < p_in_end)
 492     {
 493         p_line_end = p_in + p_pic->p[0].i_visible_pitch;
 494         while (p_in < p_line_end)
 495         {
 496             /* do sepia: this calculation is based on the formula to calculate
 497              * YUV->RGB and RGB->YUV (in filter_picture.h) mode and that
 498              * y = y - y/4 + intensity/4 . As Y is the only channel that changes
 499              * through the whole image. After that, precomputed values are added
 500              * for each RGB channel and saved in the output image.
 501              * FIXME: needs cleanup */
 502             uint8_t i_y = ((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] +  25
 503                       * p_in[i_bindex] + 128 ) >> 8 ) * FIX(255.0/219.0))
 504                       - (((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] + 25
 505                       * p_in[i_bindex] + 128 ) >> 8 )
 506                       * FIX( 255.0 / 219.0 )) >> 2 ) + ( i_intensity >> 2 );
 507             p_out[i_rindex] = vlc_uint8(i_y + r_intensity);
 508             p_out[i_gindex] = vlc_uint8(i_y + g_intensity);
 509             p_out[i_bindex] = vlc_uint8(i_y + b_intensity);
 510             p_in += 3;
 511             p_out += 3;
 512             /* for rv32 we take 4 chunks at the time */
 513             if (b_isRV32) {
 514             /* alpha channel stays the same */
 515             *p_out++ = *p_in++;
 516             }
 517         }
 518
 519         p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
 520         p_out += p_outpic->p[0].i_pitch
 521             - p_outpic->p[0].i_visible_pitch;
 522     }
 523 #undef SCALEBITS
 524 #undef ONE_HALF
 525 #undef FIX
 526 }
 527
 528 /*****************************************************************************
 529  * Sepia8ySSE41
 530  *****************************************************************************
 531  * This function applies sepia effect to eight bytes of yellow using SSE4.1
 532  * instructions. It copies those 8 bytes to 128b register and fills the gaps
 533  * with zeroes and following operations are made with word-operating instructs.
 534  *****************************************************************************/
 535 inline void Sepia8ySSE41(uint8_t * dst, const uint8_t * src,
 536                volatile uint8_t * i_intensity)
 537 {
 538 #if defined(CAN_COMPILE_SSE4_1) && 1
 539     __asm__ volatile (
 540               "pmovzxbw      (%1),   %%xmm1\n"    // y = y - y / 4 + i_intensity / 4
 541               "pmovzxbw      (%1),   %%xmm2\n"    // store bytes as words with 0s in between
 542               "pmovzxbw      (%2),   %%xmm3\n"
 543               "psrlw          $2,    %%xmm2\n"    // rotate right 2
 544               "psubusb       %%xmm1, %%xmm2\n"    // subtract
 545               "psrlw          $2,    %%xmm3\n"
 546               "paddsb        %%xmm1, %%xmm3\n"    // add
 547               "packuswb      %%xmm2, %%xmm1\n"    // pack back to bytes
 548               "movq          %%xmm1, (%0)  \n"    // load to dest
 549               :
 550               :"r" (dst), "r"(src), "r"(i_intensity)
 551               :"memory");
 552 #endif
 553 }
 554
 555 /*****************************************************************************
 556  * Memcpy8BMMX: Copies 8 bytes of memory in two instructions
 557  *****************************************************************************
 558  * Not quite clean, but it should be fast.
 559  *****************************************************************************/
 560 inline void Memcpy8BMMX(uint8_t * dst, const uint8_t * src)
 561 {
 562 #if defined(CAN_COMPILE_MMX) && 1
 563     __asm__ volatile (
 564               "movq       (%1), %%xmm0\n"
 565               "movq       %%xmm0, (%0)\n"
 566               :
 567               :"r" (dst), "r"(src)
 568               :"memory");
 569 #endif
 570 }
 571
 572 static int FilterCallback ( vlc_object_t *p_this, char const *psz_var,
 573                             vlc_value_t oldval, vlc_value_t newval,
 574                             void *p_data )
 575 {
 576     VLC_UNUSED(psz_var); VLC_UNUSED(oldval); VLC_UNUSED(p_data);
 577     filter_t *p_filter = (filter_t*)p_this;
 578     filter_sys_t *p_sys = p_filter->p_sys;
 579
 580     vlc_spin_lock( &p_sys->lock );
 581     p_sys->i_intensity = newval.i_int;
 582     vlc_spin_unlock( &p_sys->lock );
 583
 584     return VLC_SUCCESS;
 585 }