]> git.sesse.net Git - vlc/blob - modules/video_filter/sepia.c
MKV: remove unused parameter
[vlc] / modules / video_filter / sepia.c
1 /*****************************************************************************
2  * sepia.c : Sepia video plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2010 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <vlc_common.h>
33 #include <vlc_plugin.h>
34 #include <vlc_filter.h>
35 #include <vlc_cpu.h>
36
37 #include <assert.h>
38 #include "filter_picture.h"
39
40 /*****************************************************************************
41  * Local prototypes
42  *****************************************************************************/
43 static int  Create      ( vlc_object_t * );
44 static void Destroy     ( vlc_object_t * );
45
46 static void RVSepia( picture_t *, picture_t *, int );
47 static void PlanarI420Sepia( picture_t *, picture_t *, int);
48 static void PackedYUVSepia( picture_t *, picture_t *, int);
49 static picture_t *Filter( filter_t *, picture_t * );
50 inline void Sepia8ySSE41( uint8_t *, const uint8_t *, volatile uint8_t * );
51 inline void Memcpy8BMMX( uint8_t *, const uint8_t * );
52 static const char *const ppsz_filter_options[] = {
53     "intensity", NULL
54 };
55
56 /*****************************************************************************
57  * Module descriptor
58  *****************************************************************************/
59 #define SEPIA_INTENSITY_TEXT N_("Sepia intensity")
60 #define SEPIA_INTENSITY_LONGTEXT N_("Intensity of sepia effect" )
61
62 #define CFG_PREFIX "sepia-"
63
64 vlc_module_begin ()
65     set_description( N_("Sepia video filter") )
66     set_shortname( N_("Sepia" ) )
67     set_help( N_("Gives video a warmer tone by applying sepia effect") )
68     set_category( CAT_VIDEO )
69     set_subcategory( SUBCAT_VIDEO_VFILTER )
70     set_capability( "video filter2", 0 )
71     add_integer_with_range( CFG_PREFIX "intensity", 100, 0, 255,
72                            SEPIA_INTENSITY_TEXT, SEPIA_INTENSITY_LONGTEXT,
73                            false )
74     set_callbacks( Create, Destroy )
75 vlc_module_end ()
76
77 /*****************************************************************************
78  * callback prototypes
79  *****************************************************************************/
80 static int FilterCallback( vlc_object_t *, char const *,
81                            vlc_value_t, vlc_value_t, void * );
82
83 typedef void (*SepiaFunction)( picture_t *, picture_t *, int );
84
85 static const struct
86 {
87     vlc_fourcc_t i_chroma;
88     SepiaFunction pf_sepia;
89 } p_sepia_cfg[] = {
90     { VLC_CODEC_I420, PlanarI420Sepia },
91     { VLC_CODEC_RGB24, RVSepia },
92     { VLC_CODEC_RGB32, RVSepia },
93     { VLC_CODEC_UYVY, PackedYUVSepia },
94     { VLC_CODEC_VYUY, PackedYUVSepia },
95     { VLC_CODEC_YUYV, PackedYUVSepia },
96     { VLC_CODEC_YVYU, PackedYUVSepia },
97     { 0, NULL }
98 };
99
100 /*****************************************************************************
101  * filter_sys_t: adjust filter method descriptor
102  *****************************************************************************/
103 struct filter_sys_t
104 {
105     SepiaFunction pf_sepia;
106     int i_intensity;
107     vlc_spinlock_t lock;
108 };
109
110 /*****************************************************************************
111  * Create: allocates Sepia video thread output method
112  *****************************************************************************
113  * This function allocates and initializes a Sepia vout method.
114  *****************************************************************************/
115 static int Create( vlc_object_t *p_this )
116 {
117     filter_t *p_filter = (filter_t *)p_this;
118     filter_sys_t *p_sys;
119
120     /* Allocate structure */
121     p_sys = p_filter->p_sys = malloc( sizeof( filter_sys_t ) );
122     if( p_filter->p_sys == NULL )
123         return VLC_ENOMEM;
124
125     p_sys->pf_sepia = NULL;
126
127     for( int i = 0; p_sepia_cfg[i].i_chroma != 0; i++ )
128     {
129         if( p_sepia_cfg[i].i_chroma != p_filter->fmt_in.video.i_chroma )
130             continue;
131         p_sys->pf_sepia = p_sepia_cfg[i].pf_sepia;
132     }
133
134     if( p_sys->pf_sepia == NULL )
135     {
136         msg_Err( p_filter, "Unsupported input chroma (%4.4s)",
137                 (char*)&(p_filter->fmt_in.video.i_chroma) );
138         free( p_sys );
139         return VLC_EGENERIC;
140     }
141
142     config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
143                        p_filter->p_cfg );
144     p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
145                        CFG_PREFIX "intensity" );
146
147     vlc_spin_init( &p_sys->lock );
148
149     var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
150
151     p_filter->pf_video_filter = Filter;
152
153     return VLC_SUCCESS;
154 }
155
156 /*****************************************************************************
157  * Destroy: destroy sepia video thread output method
158  *****************************************************************************
159  * Terminate an output method
160  *****************************************************************************/
161 static void Destroy( vlc_object_t *p_this )
162 {
163     filter_t *p_filter = (filter_t *)p_this;
164
165     var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
166
167     vlc_spin_destroy( &p_filter->p_sys->lock );
168     free( p_filter->p_sys );
169 }
170
171 /*****************************************************************************
172  * Render: displays previously rendered output
173  *****************************************************************************
174  * This function send the currently rendered image to sepia image, waits
175  * until it is displayed and switch the two rendering buffers, preparing next
176  * frame.
177  *****************************************************************************/
178 static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
179 {
180     picture_t *p_outpic;
181     int intensity;
182
183     if( !p_pic ) return NULL;
184
185     filter_sys_t *p_sys = p_filter->p_sys;
186     vlc_spin_lock( &p_sys->lock );
187     intensity = p_sys->i_intensity;
188     vlc_spin_unlock( &p_sys->lock );
189
190     p_outpic = filter_NewPicture( p_filter );
191     if( !p_outpic )
192     {
193         msg_Warn( p_filter, "can't get output picture" );
194         picture_Release( p_pic );
195         return NULL;
196     }
197
198     p_sys->pf_sepia( p_pic, p_outpic, intensity );
199
200     return CopyInfoAndRelease( p_outpic, p_pic );
201 }
202
203 /*****************************************************************************
204  * PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
205  *****************************************************************************
206  * This function applies sepia effect to one frame of the video by iterating
207  * through video lines. We iterate for every two lines and for every two pixels
208  * in line to calculate new sepia values for four y components as well for u
209  * and v components.
210  *****************************************************************************/
211 static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
212                                int i_intensity )
213 {
214     // prepared values to copy for U and V channels
215     const uint8_t filling_const_8u = 128 - i_intensity / 6;
216     const uint8_t filling_const_8v = 128 + i_intensity / 14;
217
218     #if defined(CAN_COMPILE_SSE4_1) && 1
219     if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
220     {
221         /*prepare array of values to copy with mmx, compute only once
222           to improve speed */
223         volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
224             i_intensity, i_intensity, i_intensity, i_intensity,
225             i_intensity, i_intensity };
226         const uint8_t filling_array_8u[8] =
227             { filling_const_8u, filling_const_8u, filling_const_8u,
228             filling_const_8u, filling_const_8u, filling_const_8u,
229             filling_const_8u, filling_const_8u };
230         const uint8_t filling_array_8v[8] =
231             { filling_const_8v, filling_const_8v, filling_const_8v,
232             filling_const_8v, filling_const_8v, filling_const_8v,
233             filling_const_8v, filling_const_8v };
234
235         /* iterate for every two visible line in the frame */
236         for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
237         {
238             const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
239             const int i_dy_line2_start =
240             (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
241             const int i_du_line_start =
242             (y / 2) * p_outpic->p[U_PLANE].i_pitch;
243             const int i_dv_line_start =
244             (y / 2) * p_outpic->p[V_PLANE].i_pitch;
245             int x = 0;
246             /* iterate for every visible line in the frame (eight values at once) */
247             for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16)
248             {
249                 /* Compute yellow channel values with asm function */
250                 Sepia8ySSE41(
251                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
252                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
253                           intensity_array );
254                 Sepia8ySSE41(
255                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
256                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
257                           intensity_array );
258                 Sepia8ySSE41(
259                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
260                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
261                           intensity_array );
262                 Sepia8ySSE41(
263                           &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
264                           &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
265                           intensity_array );
266                 /* Copy precomputed values to destination image memory location */
267                 Memcpy8BMMX(
268                           &p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
269                           filling_array_8u );
270                 Memcpy8BMMX(&p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
271                           filling_array_8v );
272             }
273             /* Completing the job, the cycle above takes really big chunks, so
274               this makes sure the job will be done completely */
275             for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2)
276             {
277                 // y = y - y/4 {to prevent overflow} + intensity / 4
278                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
279                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
280                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
281                     (i_intensity >> 2);
282                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
283                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
284                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
285                     (i_intensity >> 2);
286                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
287                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
288                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
289                     (i_intensity >> 2);
290                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
291                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
292                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
293                     (i_intensity >> 2);
294                 // u = 128 {half => B&W} - intensity / 6
295                 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
296                     filling_const_8u;
297                 // v = 128 {half => B&W} + intensity / 14
298                 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
299                     filling_const_8v;
300             }
301         }
302     } else
303 #endif
304     {
305         /* iterate for every two visible line in the frame */
306         for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
307         {
308             const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
309             const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
310             const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
311             const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
312             // to prevent sigsegv if one pic is smaller (theoretically)
313             int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
314                       < p_outpic->p[Y_PLANE].i_visible_pitch
315                       ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
316                       (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
317             /* iterate for every two visible line in the frame */
318             for( int x = 0; x < i_picture_size_limit; x += 2)
319             {
320                 // y = y - y/4 {to prevent overflow} + intensity / 4
321                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
322                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
323                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
324                     (i_intensity >> 2);
325                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
326                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
327                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
328                     (i_intensity >> 2);
329                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
330                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
331                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
332                     (i_intensity >> 2);
333                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
334                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
335                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
336                     (i_intensity >> 2);
337                 // u = 128 {half => B&W} - intensity / 6
338                 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
339                     filling_const_8u;
340                 // v = 128 {half => B&W} + intensity / 14
341                 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
342                     filling_const_8v;
343             }
344         }
345     }
346 }
347
348 /*****************************************************************************
349  * PackedYUVSepia: Applies sepia to one frame of the packed YUV video
350  *****************************************************************************
351  * This function applies sepia effext to one frame of the video by iterating
352  * through video lines. In every pass, we calculate new values for pixels
353  * (UYVY, VYUY, YUYV and YVYU formats are supported)
354  *****************************************************************************/
355 static void PackedYUVSepia( picture_t *p_pic, picture_t *p_outpic,
356                            int i_intensity )
357 {
358     uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
359     int i_yindex = 1, i_uindex = 2, i_vindex = 0;
360
361     GetPackedYuvOffsets( p_outpic->format.i_chroma,
362                         &i_yindex, &i_uindex, &i_vindex );
363
364     // prepared values to copy for U and V channels
365     const uint8_t filling_const_8u = 128 - i_intensity / 6;
366     const uint8_t filling_const_8v = 128 + i_intensity / 14;
367
368     p_in = p_pic->p[0].p_pixels;
369     p_in_end = p_in + p_pic->p[0].i_visible_lines
370         * p_pic->p[0].i_pitch;
371     p_out = p_outpic->p[0].p_pixels;
372 #if defined(CAN_COMPILE_SSE4_1)
373     if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
374     {
375         /*prepare array of values to copy with mmx, compute only once
376           to improve speed */
377         volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
378             i_intensity, i_intensity, i_intensity, i_intensity,
379             i_intensity,
380             i_intensity
381         };
382         const uint8_t filling_array_8u[8] =
383             { filling_const_8u, filling_const_8u,
384             filling_const_8u, filling_const_8u, filling_const_8u,
385             filling_const_8u,
386             filling_const_8u, filling_const_8u
387         };
388         const uint8_t filling_array_8v[8] =
389             { filling_const_8v, filling_const_8v,
390             filling_const_8v, filling_const_8v, filling_const_8v,
391             filling_const_8v,
392             filling_const_8v, filling_const_8v
393         };
394
395         /* iterate for every two visible line in the frame */
396         while (p_in < p_in_end)
397         {
398             p_line_end = p_in + p_pic->p[0].i_visible_pitch;
399             while (p_in < p_line_end)
400             {
401                 Sepia8ySSE41(&p_out[i_yindex], &p_in[i_yindex],
402                           intensity_array);
403                 Sepia8ySSE41(&p_out[i_yindex + 8], &p_in[i_yindex + 8],
404                           intensity_array);
405                 Sepia8ySSE41(&p_out[i_yindex + 16], &p_in[i_yindex + 16],
406                           intensity_array);
407                 Sepia8ySSE41(&p_out[i_yindex + 24], &p_in[i_yindex + 24],
408                           intensity_array);
409                 Memcpy8BMMX(&p_out[i_uindex], filling_array_8u);
410                 Memcpy8BMMX(&p_out[i_vindex], filling_array_8v);
411
412                 p_in += 32;
413                 p_out += 32;
414             }
415             while (p_in < p_line_end)
416             {
417                 p_out[i_yindex] =
418                     p_in[i_yindex] - (p_in[i_yindex] >> 2) +
419                     (i_intensity >> 2);
420                 p_out[i_yindex + 2] =
421                     p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2) +
422                     (i_intensity >> 2);
423                 p_out[i_uindex] = filling_const_8u;
424                 p_out[i_vindex] = filling_const_8v;
425                 p_in += 4;
426                 p_out += 4;
427             }
428             p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
429             p_out += p_outpic->p[0].i_pitch
430             - p_outpic->p[0].i_visible_pitch;
431         }
432     } else
433 #endif
434     {
435         while( p_in < p_in_end )
436         {
437             p_line_end = p_in + p_pic->p[0].i_visible_pitch;
438             while( p_in < p_line_end )
439             {
440                 /* calculate new, sepia values */
441                 p_out[i_yindex] =
442                     p_in[i_yindex] - (p_in[i_yindex] >> 2) + (i_intensity >> 2);
443                 p_out[i_yindex + 2] =
444                     p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2)
445                     + (i_intensity >> 2);
446                 p_out[i_uindex] = filling_const_8u;
447                 p_out[i_vindex] = filling_const_8v;
448                 p_in += 4;
449                 p_out += 4;
450             }
451             p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
452             p_out += p_outpic->p[0].i_pitch
453                 - p_outpic->p[0].i_visible_pitch;
454         }
455     }
456 }
457
458 /*****************************************************************************
459  * RVSepia: Applies sepia to one frame of the RV24/RV32 video
460  *****************************************************************************
461  * This function applies sepia effect to one frame of the video by iterating
462  * through video lines and calculating new values for every byte in chunks of
463  * 3 (RV24) or 4 (RV32) bytes.
464  *****************************************************************************/
465 static void RVSepia( picture_t *p_pic, picture_t *p_outpic, int i_intensity )
466 {
467 #define SCALEBITS 10
468 #define ONE_HALF  (1 << (SCALEBITS - 1))
469 #define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
470     uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
471     bool b_isRV32 = p_pic->format.i_chroma == VLC_CODEC_RGB32;
472     int i_rindex = 0, i_gindex = 1, i_bindex = 2;
473
474     GetPackedRgbIndexes( &p_outpic->format, &i_rindex, &i_gindex, &i_bindex );
475
476     p_in = p_pic->p[0].p_pixels;
477     p_in_end = p_in + p_pic->p[0].i_visible_lines
478         * p_pic->p[0].i_pitch;
479     p_out = p_outpic->p[0].p_pixels;
480
481     /* Precompute values constant for this certain i_intensity, using the same
482      * formula as YUV functions above */
483     uint8_t r_intensity = (( FIX( 1.40200 * 255.0 / 224.0 ) * (i_intensity * 14)
484                         + ONE_HALF )) >> SCALEBITS;
485     uint8_t g_intensity = (( - FIX(0.34414*255.0/224.0) * ( - i_intensity / 6 )
486                         - FIX( 0.71414 * 255.0 / 224.0) * ( i_intensity * 14 )
487                         + ONE_HALF )) >> SCALEBITS;
488     uint8_t b_intensity = (( FIX( 1.77200 * 255.0 / 224.0) * ( - i_intensity / 6 )
489                         + ONE_HALF )) >> SCALEBITS;
490
491     while (p_in < p_in_end)
492     {
493         p_line_end = p_in + p_pic->p[0].i_visible_pitch;
494         while (p_in < p_line_end)
495         {
496             /* do sepia: this calculation is based on the formula to calculate
497              * YUV->RGB and RGB->YUV (in filter_picture.h) mode and that
498              * y = y - y/4 + intensity/4 . As Y is the only channel that changes
499              * through the whole image. After that, precomputed values are added
500              * for each RGB channel and saved in the output image.
501              * FIXME: needs cleanup */
502             uint8_t i_y = ((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] +  25
503                       * p_in[i_bindex] + 128 ) >> 8 ) * FIX(255.0/219.0))
504                       - (((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] + 25
505                       * p_in[i_bindex] + 128 ) >> 8 )
506                       * FIX( 255.0 / 219.0 )) >> 2 ) + ( i_intensity >> 2 );
507             p_out[i_rindex] = vlc_uint8(i_y + r_intensity);
508             p_out[i_gindex] = vlc_uint8(i_y + g_intensity);
509             p_out[i_bindex] = vlc_uint8(i_y + b_intensity);
510             p_in += 3;
511             p_out += 3;
512             /* for rv32 we take 4 chunks at the time */
513             if (b_isRV32) {
514             /* alpha channel stays the same */
515             *p_out++ = *p_in++;
516             }
517         }
518
519         p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
520         p_out += p_outpic->p[0].i_pitch
521             - p_outpic->p[0].i_visible_pitch;
522     }
523 #undef SCALEBITS
524 #undef ONE_HALF
525 #undef FIX
526 }
527
528 /*****************************************************************************
529  * Sepia8ySSE41
530  *****************************************************************************
531  * This function applies sepia effect to eight bytes of yellow using SSE4.1
532  * instructions. It copies those 8 bytes to 128b register and fills the gaps
533  * with zeroes and following operations are made with word-operating instructs.
534  *****************************************************************************/
535 inline void Sepia8ySSE41(uint8_t * dst, const uint8_t * src,
536                volatile uint8_t * i_intensity)
537 {
538 #if defined(CAN_COMPILE_SSE4_1) && 1
539     __asm__ volatile (
540               "pmovzxbw      (%1),   %%xmm1\n"    // y = y - y / 4 + i_intensity / 4
541               "pmovzxbw      (%1),   %%xmm2\n"    // store bytes as words with 0s in between
542               "pmovzxbw      (%2),   %%xmm3\n"
543               "psrlw          $2,    %%xmm2\n"    // rotate right 2
544               "psubusb       %%xmm1, %%xmm2\n"    // subtract
545               "psrlw          $2,    %%xmm3\n"
546               "paddsb        %%xmm1, %%xmm3\n"    // add
547               "packuswb      %%xmm2, %%xmm1\n"    // pack back to bytes
548               "movq          %%xmm1, (%0)  \n"    // load to dest
549               :
550               :"r" (dst), "r"(src), "r"(i_intensity)
551               :"memory");
552 #endif
553 }
554
555 /*****************************************************************************
556  * Memcpy8BMMX: Copies 8 bytes of memory in two instructions
557  *****************************************************************************
558  * Not quite clean, but it should be fast.
559  *****************************************************************************/
560 inline void Memcpy8BMMX(uint8_t * dst, const uint8_t * src)
561 {
562 #if defined(CAN_COMPILE_MMX) && 1
563     __asm__ volatile (
564               "movq       (%1), %%xmm0\n"
565               "movq       %%xmm0, (%0)\n"
566               :
567               :"r" (dst), "r"(src)
568               :"memory");
569 #endif
570 }
571
572 static int FilterCallback ( vlc_object_t *p_this, char const *psz_var,
573                             vlc_value_t oldval, vlc_value_t newval,
574                             void *p_data )
575 {
576     VLC_UNUSED(psz_var); VLC_UNUSED(oldval); VLC_UNUSED(p_data);
577     filter_t *p_filter = (filter_t*)p_this;
578     filter_sys_t *p_sys = p_filter->p_sys;
579
580     vlc_spin_lock( &p_sys->lock );
581     p_sys->i_intensity = newval.i_int;
582     vlc_spin_unlock( &p_sys->lock );
583
584     return VLC_SUCCESS;
585 }