]> git.sesse.net Git - vlc/blob - modules/video_filter/sepia.c
swscale: pass/calculate sar-info, should fix #7437
[vlc] / modules / video_filter / sepia.c
1 /*****************************************************************************
2  * sepia.c : Sepia video plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2010 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <vlc_common.h>
33 #include <vlc_plugin.h>
34 #include <vlc_filter.h>
35 #include <vlc_cpu.h>
36
37 #include <assert.h>
38 #include "filter_picture.h"
39
40 /*****************************************************************************
41  * Local prototypes
42  *****************************************************************************/
43 static int  Create      ( vlc_object_t * );
44 static void Destroy     ( vlc_object_t * );
45
46 static void RVSepia( picture_t *, picture_t *, int );
47 static void PlanarI420Sepia( picture_t *, picture_t *, int);
48 static void PackedYUVSepia( picture_t *, picture_t *, int);
49 static picture_t *Filter( filter_t *, picture_t * );
50 static const char *const ppsz_filter_options[] = {
51     "intensity", NULL
52 };
53
54 /*****************************************************************************
55  * Module descriptor
56  *****************************************************************************/
57 #define SEPIA_INTENSITY_TEXT N_("Sepia intensity")
58 #define SEPIA_INTENSITY_LONGTEXT N_("Intensity of sepia effect" )
59
60 #define CFG_PREFIX "sepia-"
61
62 vlc_module_begin ()
63     set_description( N_("Sepia video filter") )
64     set_shortname( N_("Sepia" ) )
65     set_help( N_("Gives video a warmer tone by applying sepia effect") )
66     set_category( CAT_VIDEO )
67     set_subcategory( SUBCAT_VIDEO_VFILTER )
68     set_capability( "video filter2", 0 )
69     add_integer_with_range( CFG_PREFIX "intensity", 120, 0, 255,
70                            SEPIA_INTENSITY_TEXT, SEPIA_INTENSITY_LONGTEXT,
71                            false )
72     set_callbacks( Create, Destroy )
73 vlc_module_end ()
74
75 /*****************************************************************************
76  * callback prototypes
77  *****************************************************************************/
78 static int FilterCallback( vlc_object_t *, char const *,
79                            vlc_value_t, vlc_value_t, void * );
80
81 typedef void (*SepiaFunction)( picture_t *, picture_t *, int );
82
83 static const struct
84 {
85     vlc_fourcc_t i_chroma;
86     SepiaFunction pf_sepia;
87 } p_sepia_cfg[] = {
88     { VLC_CODEC_I420, PlanarI420Sepia },
89     { VLC_CODEC_RGB24, RVSepia },
90     { VLC_CODEC_RGB32, RVSepia },
91     { VLC_CODEC_UYVY, PackedYUVSepia },
92     { VLC_CODEC_VYUY, PackedYUVSepia },
93     { VLC_CODEC_YUYV, PackedYUVSepia },
94     { VLC_CODEC_YVYU, PackedYUVSepia },
95     { 0, NULL }
96 };
97
98 /*****************************************************************************
99  * filter_sys_t: adjust filter method descriptor
100  *****************************************************************************/
101 struct filter_sys_t
102 {
103     SepiaFunction pf_sepia;
104     int i_intensity;
105     vlc_spinlock_t lock;
106 };
107
108 /*****************************************************************************
109  * Create: allocates Sepia video thread output method
110  *****************************************************************************
111  * This function allocates and initializes a Sepia vout method.
112  *****************************************************************************/
113 static int Create( vlc_object_t *p_this )
114 {
115     filter_t *p_filter = (filter_t *)p_this;
116     filter_sys_t *p_sys;
117
118     /* Allocate structure */
119     p_sys = p_filter->p_sys = malloc( sizeof( filter_sys_t ) );
120     if( p_filter->p_sys == NULL )
121         return VLC_ENOMEM;
122
123     p_sys->pf_sepia = NULL;
124
125     for( int i = 0; p_sepia_cfg[i].i_chroma != 0; i++ )
126     {
127         if( p_sepia_cfg[i].i_chroma != p_filter->fmt_in.video.i_chroma )
128             continue;
129         p_sys->pf_sepia = p_sepia_cfg[i].pf_sepia;
130     }
131
132     if( p_sys->pf_sepia == NULL )
133     {
134         msg_Err( p_filter, "Unsupported input chroma (%4.4s)",
135                 (char*)&(p_filter->fmt_in.video.i_chroma) );
136         free( p_sys );
137         return VLC_EGENERIC;
138     }
139
140     config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
141                        p_filter->p_cfg );
142     p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
143                        CFG_PREFIX "intensity" );
144
145     vlc_spin_init( &p_sys->lock );
146
147     var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
148
149     p_filter->pf_video_filter = Filter;
150
151     return VLC_SUCCESS;
152 }
153
154 /*****************************************************************************
155  * Destroy: destroy sepia video thread output method
156  *****************************************************************************
157  * Terminate an output method
158  *****************************************************************************/
159 static void Destroy( vlc_object_t *p_this )
160 {
161     filter_t *p_filter = (filter_t *)p_this;
162
163     var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
164
165     vlc_spin_destroy( &p_filter->p_sys->lock );
166     free( p_filter->p_sys );
167 }
168
169 /*****************************************************************************
170  * Render: displays previously rendered output
171  *****************************************************************************
172  * This function send the currently rendered image to sepia image, waits
173  * until it is displayed and switch the two rendering buffers, preparing next
174  * frame.
175  *****************************************************************************/
176 static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
177 {
178     picture_t *p_outpic;
179     int intensity;
180
181     if( !p_pic ) return NULL;
182
183     filter_sys_t *p_sys = p_filter->p_sys;
184     vlc_spin_lock( &p_sys->lock );
185     intensity = p_sys->i_intensity;
186     vlc_spin_unlock( &p_sys->lock );
187
188     p_outpic = filter_NewPicture( p_filter );
189     if( !p_outpic )
190     {
191         msg_Warn( p_filter, "can't get output picture" );
192         picture_Release( p_pic );
193         return NULL;
194     }
195
196     p_sys->pf_sepia( p_pic, p_outpic, intensity );
197
198     return CopyInfoAndRelease( p_outpic, p_pic );
199 }
200
201 #if defined(CAN_COMPILE_SSE2)
202 /*****************************************************************************
203  * Sepia8ySSE2
204  *****************************************************************************
205  * This function applies sepia effect to eight bytes of yellow using SSE4.1
206  * instructions. It copies those 8 bytes to 128b register and fills the gaps
207  * with zeroes and following operations are made with word-operating instructs.
208  *****************************************************************************/
209 static inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
210                          int i_intensity_spread)
211 {
212     __asm__ volatile (
213         // y = y - y / 4 + i_intensity / 4
214         "movq            (%1), %%xmm1\n"
215         "punpcklbw     %%xmm7, %%xmm1\n"
216         "movq            (%1), %%xmm2\n" // store bytes as words with 0s in between
217         "punpcklbw     %%xmm7, %%xmm2\n"
218         "movd              %2, %%xmm3\n"
219         "pshufd    $0, %%xmm3, %%xmm3\n"
220         "psrlw             $2, %%xmm2\n"    // rotate right 2
221         "psubusb       %%xmm1, %%xmm2\n"    // subtract
222         "psrlw             $2, %%xmm3\n"
223         "paddsb        %%xmm1, %%xmm3\n"    // add
224         "packuswb      %%xmm2, %%xmm1\n"    // pack back to bytes
225         "movq          %%xmm1, (%0)  \n"    // load to dest
226         :
227         :"r" (dst), "r"(src), "r"(i_intensity_spread)
228         :"memory");
229 }
230 #endif
231
232 /*****************************************************************************
233  * PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
234  *****************************************************************************
235  * This function applies sepia effect to one frame of the video by iterating
236  * through video lines. We iterate for every two lines and for every two pixels
237  * in line to calculate new sepia values for four y components as well for u
238  * and v components.
239  *****************************************************************************/
240 static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
241                                int i_intensity )
242 {
243     // prepared values to copy for U and V channels
244     const uint8_t filling_const_8u = 128 - i_intensity / 6;
245     const uint8_t filling_const_8v = 128 + i_intensity / 14;
246
247 #if defined(CAN_COMPILE_SSE2)
248     if (vlc_CPU_SSE2())
249     {
250         /* prepared value for faster broadcasting in xmm register */
251         int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
252
253         __asm__ volatile(
254             "pxor      %%xmm7, %%xmm7\n"
255         ::);
256
257         /* iterate for every two visible line in the frame */
258         for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
259         {
260             const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
261             const int i_dy_line2_start =
262             (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
263             const int i_du_line_start =
264             (y / 2) * p_outpic->p[U_PLANE].i_pitch;
265             const int i_dv_line_start =
266             (y / 2) * p_outpic->p[V_PLANE].i_pitch;
267             int x = 0;
268             /* iterate for every visible line in the frame (eight values at once) */
269             for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16 )
270             {
271                 /* Compute yellow channel values with asm function */
272                 Sepia8ySSE2(
273                     &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
274                     &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
275                     i_intensity_spread );
276                 Sepia8ySSE2(
277                     &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
278                     &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
279                     i_intensity_spread );
280                 Sepia8ySSE2(
281                     &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
282                     &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
283                     i_intensity_spread );
284                 Sepia8ySSE2(
285                     &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
286                     &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
287                     i_intensity_spread );
288                 /* Copy precomputed values to destination memory location */
289                 memset(
290                     &p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
291                     filling_const_8u, 8 );
292                 memset(
293                     &p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
294                     filling_const_8v, 8 );
295             }
296             /* Completing the job, the cycle above takes really big chunks, so
297               this makes sure the job will be done completely */
298             for ( ; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2 )
299             {
300                 // y = y - y/4 {to prevent overflow} + intensity / 4
301                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
302                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
303                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
304                     (i_intensity >> 2);
305                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
306                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
307                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
308                     (i_intensity >> 2);
309                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
310                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
311                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
312                     (i_intensity >> 2);
313                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
314                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
315                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
316                     (i_intensity >> 2);
317                 // u = 128 {half => B&W} - intensity / 6
318                 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
319                     filling_const_8u;
320                 // v = 128 {half => B&W} + intensity / 14
321                 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
322                     filling_const_8v;
323             }
324         }
325     }
326     else
327 #endif
328     {
329         /* iterate for every two visible line in the frame */
330         for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
331         {
332             const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
333             const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
334             const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
335             const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
336             // to prevent sigsegv if one pic is smaller (theoretically)
337             int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
338                       < p_outpic->p[Y_PLANE].i_visible_pitch
339                       ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
340                       (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
341             /* iterate for every two visible line in the frame */
342             for( int x = 0; x < i_picture_size_limit; x += 2)
343             {
344                 // y = y - y/4 {to prevent overflow} + intensity / 4
345                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
346                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
347                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
348                     (i_intensity >> 2);
349                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
350                     p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
351                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
352                     (i_intensity >> 2);
353                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
354                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
355                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
356                     (i_intensity >> 2);
357                 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
358                     p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
359                     (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
360                     (i_intensity >> 2);
361                 // u = 128 {half => B&W} - intensity / 6
362                 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
363                     filling_const_8u;
364                 // v = 128 {half => B&W} + intensity / 14
365                 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
366                     filling_const_8v;
367             }
368         }
369     }
370 }
371
372 /*****************************************************************************
373  * PackedYUVSepia: Applies sepia to one frame of the packed YUV video
374  *****************************************************************************
375  * This function applies sepia effext to one frame of the video by iterating
376  * through video lines. In every pass, we calculate new values for pixels
377  * (UYVY, VYUY, YUYV and YVYU formats are supported)
378  *****************************************************************************/
379 static void PackedYUVSepia( picture_t *p_pic, picture_t *p_outpic,
380                            int i_intensity )
381 {
382     uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
383     int i_yindex = 1, i_uindex = 2, i_vindex = 0;
384
385     GetPackedYuvOffsets( p_outpic->format.i_chroma,
386                         &i_yindex, &i_uindex, &i_vindex );
387
388     // prepared values to copy for U and V channels
389     const uint8_t filling_const_8u = 128 - i_intensity / 6;
390     const uint8_t filling_const_8v = 128 + i_intensity / 14;
391
392     p_in = p_pic->p[0].p_pixels;
393     p_in_end = p_in + p_pic->p[0].i_visible_lines
394         * p_pic->p[0].i_pitch;
395     p_out = p_outpic->p[0].p_pixels;
396
397     {
398         while( p_in < p_in_end )
399         {
400             p_line_end = p_in + p_pic->p[0].i_visible_pitch;
401             while( p_in < p_line_end )
402             {
403                 /* calculate new, sepia values */
404                 p_out[i_yindex] =
405                     p_in[i_yindex] - (p_in[i_yindex] >> 2) + (i_intensity >> 2);
406                 p_out[i_yindex + 2] =
407                     p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2)
408                     + (i_intensity >> 2);
409                 p_out[i_uindex] = filling_const_8u;
410                 p_out[i_vindex] = filling_const_8v;
411                 p_in += 4;
412                 p_out += 4;
413             }
414             p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
415             p_out += p_outpic->p[0].i_pitch
416                 - p_outpic->p[0].i_visible_pitch;
417         }
418     }
419 }
420
421 /*****************************************************************************
422  * RVSepia: Applies sepia to one frame of the RV24/RV32 video
423  *****************************************************************************
424  * This function applies sepia effect to one frame of the video by iterating
425  * through video lines and calculating new values for every byte in chunks of
426  * 3 (RV24) or 4 (RV32) bytes.
427  *****************************************************************************/
428 static void RVSepia( picture_t *p_pic, picture_t *p_outpic, int i_intensity )
429 {
430 #define SCALEBITS 10
431 #define ONE_HALF  (1 << (SCALEBITS - 1))
432 #define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
433     uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
434     bool b_isRV32 = p_pic->format.i_chroma == VLC_CODEC_RGB32;
435     int i_rindex = 0, i_gindex = 1, i_bindex = 2;
436
437     GetPackedRgbIndexes( &p_outpic->format, &i_rindex, &i_gindex, &i_bindex );
438
439     p_in = p_pic->p[0].p_pixels;
440     p_in_end = p_in + p_pic->p[0].i_visible_lines
441         * p_pic->p[0].i_pitch;
442     p_out = p_outpic->p[0].p_pixels;
443
444     /* Precompute values constant for this certain i_intensity, using the same
445      * formula as YUV functions above */
446     uint8_t r_intensity = (( FIX( 1.40200 * 255.0 / 224.0 ) * (i_intensity * 14)
447                         + ONE_HALF )) >> SCALEBITS;
448     uint8_t g_intensity = (( - FIX(0.34414*255.0/224.0) * ( - i_intensity / 6 )
449                         - FIX( 0.71414 * 255.0 / 224.0) * ( i_intensity * 14 )
450                         + ONE_HALF )) >> SCALEBITS;
451     uint8_t b_intensity = (( FIX( 1.77200 * 255.0 / 224.0) * ( - i_intensity / 6 )
452                         + ONE_HALF )) >> SCALEBITS;
453
454     while (p_in < p_in_end)
455     {
456         p_line_end = p_in + p_pic->p[0].i_visible_pitch;
457         while (p_in < p_line_end)
458         {
459             /* do sepia: this calculation is based on the formula to calculate
460              * YUV->RGB and RGB->YUV (in filter_picture.h) mode and that
461              * y = y - y/4 + intensity/4 . As Y is the only channel that changes
462              * through the whole image. After that, precomputed values are added
463              * for each RGB channel and saved in the output image.
464              * FIXME: needs cleanup */
465             uint8_t i_y = ((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] +  25
466                       * p_in[i_bindex] + 128 ) >> 8 ) * FIX(255.0/219.0))
467                       - (((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] + 25
468                       * p_in[i_bindex] + 128 ) >> 8 )
469                       * FIX( 255.0 / 219.0 )) >> 2 ) + ( i_intensity >> 2 );
470             p_out[i_rindex] = vlc_uint8(i_y + r_intensity);
471             p_out[i_gindex] = vlc_uint8(i_y + g_intensity);
472             p_out[i_bindex] = vlc_uint8(i_y + b_intensity);
473             p_in += 3;
474             p_out += 3;
475             /* for rv32 we take 4 chunks at the time */
476             if (b_isRV32) {
477             /* alpha channel stays the same */
478             *p_out++ = *p_in++;
479             }
480         }
481
482         p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
483         p_out += p_outpic->p[0].i_pitch
484             - p_outpic->p[0].i_visible_pitch;
485     }
486 #undef SCALEBITS
487 #undef ONE_HALF
488 #undef FIX
489 }
490
491 static int FilterCallback ( vlc_object_t *p_this, char const *psz_var,
492                             vlc_value_t oldval, vlc_value_t newval,
493                             void *p_data )
494 {
495     VLC_UNUSED(psz_var); VLC_UNUSED(oldval); VLC_UNUSED(p_data);
496     filter_t *p_filter = (filter_t*)p_this;
497     filter_sys_t *p_sys = p_filter->p_sys;
498
499     vlc_spin_lock( &p_sys->lock );
500     p_sys->i_intensity = newval.i_int;
501     vlc_spin_unlock( &p_sys->lock );
502
503     return VLC_SUCCESS;
504 }