1 /*****************************************************************************
2 * sepia.c : Sepia video plugin for vlc
3 *****************************************************************************
4 * Copyright (C) 2010 the VideoLAN team
7 * Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
32 #include <vlc_common.h>
33 #include <vlc_plugin.h>
34 #include <vlc_filter.h>
38 #include "filter_picture.h"
40 /*****************************************************************************
42 *****************************************************************************/
43 static int Create ( vlc_object_t * );
44 static void Destroy ( vlc_object_t * );
46 static void RVSepia( picture_t *, picture_t *, int );
47 static void PlanarI420Sepia( picture_t *, picture_t *, int);
48 static void PackedYUVSepia( picture_t *, picture_t *, int);
49 static picture_t *Filter( filter_t *, picture_t * );
50 inline void Sepia8ySSE41( uint8_t *, const uint8_t *, volatile uint8_t * );
51 inline void Memcpy8BMMX( uint8_t *, const uint8_t * );
52 static const char *const ppsz_filter_options[] = {
56 /*****************************************************************************
58 *****************************************************************************/
59 #define SEPIA_INTENSITY_TEXT N_("Sepia intensity")
60 #define SEPIA_INTENSITY_LONGTEXT N_("Intensity of sepia effect" )
62 #define CFG_PREFIX "sepia-"
65 set_description( N_("Sepia video filter") )
66 set_shortname( N_("Sepia" ) )
67 set_help( N_("Gives video a warmer tone by applying sepia effect") )
68 set_category( CAT_VIDEO )
69 set_subcategory( SUBCAT_VIDEO_VFILTER )
70 set_capability( "video filter2", 0 )
71 add_integer_with_range( CFG_PREFIX "intensity", 100, 0, 255,
72 SEPIA_INTENSITY_TEXT, SEPIA_INTENSITY_LONGTEXT,
74 set_callbacks( Create, Destroy )
77 /*****************************************************************************
79 *****************************************************************************/
80 static int FilterCallback( vlc_object_t *, char const *,
81 vlc_value_t, vlc_value_t, void * );
83 typedef void (*SepiaFunction)( picture_t *, picture_t *, int );
87 vlc_fourcc_t i_chroma;
88 SepiaFunction pf_sepia;
90 { VLC_CODEC_I420, PlanarI420Sepia },
91 { VLC_CODEC_RGB24, RVSepia },
92 { VLC_CODEC_RGB32, RVSepia },
93 { VLC_CODEC_UYVY, PackedYUVSepia },
94 { VLC_CODEC_VYUY, PackedYUVSepia },
95 { VLC_CODEC_YUYV, PackedYUVSepia },
96 { VLC_CODEC_YVYU, PackedYUVSepia },
100 /*****************************************************************************
101 * filter_sys_t: adjust filter method descriptor
102 *****************************************************************************/
105 SepiaFunction pf_sepia;
110 /*****************************************************************************
111 * Create: allocates Sepia video thread output method
112 *****************************************************************************
113 * This function allocates and initializes a Sepia vout method.
114 *****************************************************************************/
115 static int Create( vlc_object_t *p_this )
117 filter_t *p_filter = (filter_t *)p_this;
120 /* Allocate structure */
121 p_sys = p_filter->p_sys = malloc( sizeof( filter_sys_t ) );
122 if( p_filter->p_sys == NULL )
125 p_sys->pf_sepia = NULL;
127 for( int i = 0; p_sepia_cfg[i].i_chroma != 0; i++ )
129 if( p_sepia_cfg[i].i_chroma != p_filter->fmt_in.video.i_chroma )
131 p_sys->pf_sepia = p_sepia_cfg[i].pf_sepia;
134 if( p_sys->pf_sepia == NULL )
136 msg_Err( p_filter, "Unsupported input chroma (%4.4s)",
137 (char*)&(p_filter->fmt_in.video.i_chroma) );
142 config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
144 p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
145 CFG_PREFIX "intensity" );
147 vlc_spin_init( &p_sys->lock );
149 var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
151 p_filter->pf_video_filter = Filter;
156 /*****************************************************************************
157 * Destroy: destroy sepia video thread output method
158 *****************************************************************************
159 * Terminate an output method
160 *****************************************************************************/
161 static void Destroy( vlc_object_t *p_this )
163 filter_t *p_filter = (filter_t *)p_this;
165 var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
167 vlc_spin_destroy( &p_filter->p_sys->lock );
168 free( p_filter->p_sys );
171 /*****************************************************************************
172 * Render: displays previously rendered output
173 *****************************************************************************
174 * This function send the currently rendered image to sepia image, waits
175 * until it is displayed and switch the two rendering buffers, preparing next
177 *****************************************************************************/
178 static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
183 if( !p_pic ) return NULL;
185 filter_sys_t *p_sys = p_filter->p_sys;
186 vlc_spin_lock( &p_sys->lock );
187 intensity = p_sys->i_intensity;
188 vlc_spin_unlock( &p_sys->lock );
190 p_outpic = filter_NewPicture( p_filter );
193 msg_Warn( p_filter, "can't get output picture" );
194 picture_Release( p_pic );
198 p_sys->pf_sepia( p_pic, p_outpic, intensity );
200 return CopyInfoAndRelease( p_outpic, p_pic );
203 /*****************************************************************************
204 * PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
205 *****************************************************************************
206 * This function applies sepia effect to one frame of the video by iterating
207 * through video lines. We iterate for every two lines and for every two pixels
208 * in line to calculate new sepia values for four y components as well for u
210 *****************************************************************************/
211 static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
214 // prepared values to copy for U and V channels
215 const uint8_t filling_const_8u = 128 - i_intensity / 6;
216 const uint8_t filling_const_8v = 128 + i_intensity / 14;
218 #if defined(CAN_COMPILE_SSE4_1) && 1
219 if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
221 /*prepare array of values to copy with mmx, compute only once
223 volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
224 i_intensity, i_intensity, i_intensity, i_intensity,
225 i_intensity, i_intensity };
226 const uint8_t filling_array_8u[8] =
227 { filling_const_8u, filling_const_8u, filling_const_8u,
228 filling_const_8u, filling_const_8u, filling_const_8u,
229 filling_const_8u, filling_const_8u };
230 const uint8_t filling_array_8v[8] =
231 { filling_const_8v, filling_const_8v, filling_const_8v,
232 filling_const_8v, filling_const_8v, filling_const_8v,
233 filling_const_8v, filling_const_8v };
235 /* iterate for every two visible line in the frame */
236 for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
238 const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
239 const int i_dy_line2_start =
240 (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
241 const int i_du_line_start =
242 (y / 2) * p_outpic->p[U_PLANE].i_pitch;
243 const int i_dv_line_start =
244 (y / 2) * p_outpic->p[V_PLANE].i_pitch;
246 /* iterate for every visible line in the frame (eight values at once) */
247 for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16)
249 /* Compute yellow channel values with asm function */
251 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
252 &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
255 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
256 &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
259 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
260 &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
263 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
264 &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
266 /* Copy precomputed values to destination image memory location */
268 &p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
270 Memcpy8BMMX(&p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
273 /* Completing the job, the cycle above takes really big chunks, so
274 this makes sure the job will be done completely */
275 for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2)
277 // y = y - y/4 {to prevent overflow} + intensity / 4
278 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
279 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
280 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
282 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
283 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
284 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
286 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
287 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
288 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
290 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
291 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
292 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
294 // u = 128 {half => B&W} - intensity / 6
295 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
297 // v = 128 {half => B&W} + intensity / 14
298 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
305 /* iterate for every two visible line in the frame */
306 for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
308 const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
309 const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
310 const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
311 const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
312 // to prevent sigsegv if one pic is smaller (theoretically)
313 int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
314 < p_outpic->p[Y_PLANE].i_visible_pitch
315 ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
316 (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
317 /* iterate for every two visible line in the frame */
318 for( int x = 0; x < i_picture_size_limit; x += 2)
320 // y = y - y/4 {to prevent overflow} + intensity / 4
321 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
322 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
323 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
325 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
326 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
327 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
329 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
330 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
331 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
333 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
334 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
335 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
337 // u = 128 {half => B&W} - intensity / 6
338 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
340 // v = 128 {half => B&W} + intensity / 14
341 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
348 /*****************************************************************************
349 * PackedYUVSepia: Applies sepia to one frame of the packed YUV video
350 *****************************************************************************
351 * This function applies sepia effext to one frame of the video by iterating
352 * through video lines. In every pass, we calculate new values for pixels
353 * (UYVY, VYUY, YUYV and YVYU formats are supported)
354 *****************************************************************************/
355 static void PackedYUVSepia( picture_t *p_pic, picture_t *p_outpic,
358 uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
359 int i_yindex = 1, i_uindex = 2, i_vindex = 0;
361 GetPackedYuvOffsets( p_outpic->format.i_chroma,
362 &i_yindex, &i_uindex, &i_vindex );
364 // prepared values to copy for U and V channels
365 const uint8_t filling_const_8u = 128 - i_intensity / 6;
366 const uint8_t filling_const_8v = 128 + i_intensity / 14;
368 p_in = p_pic->p[0].p_pixels;
369 p_in_end = p_in + p_pic->p[0].i_visible_lines
370 * p_pic->p[0].i_pitch;
371 p_out = p_outpic->p[0].p_pixels;
372 #if defined(CAN_COMPILE_SSE4_1)
373 if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
375 /*prepare array of values to copy with mmx, compute only once
377 volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
378 i_intensity, i_intensity, i_intensity, i_intensity,
382 const uint8_t filling_array_8u[8] =
383 { filling_const_8u, filling_const_8u,
384 filling_const_8u, filling_const_8u, filling_const_8u,
386 filling_const_8u, filling_const_8u
388 const uint8_t filling_array_8v[8] =
389 { filling_const_8v, filling_const_8v,
390 filling_const_8v, filling_const_8v, filling_const_8v,
392 filling_const_8v, filling_const_8v
395 /* iterate for every two visible line in the frame */
396 while (p_in < p_in_end)
398 p_line_end = p_in + p_pic->p[0].i_visible_pitch;
399 while (p_in < p_line_end)
401 Sepia8ySSE41(&p_out[i_yindex], &p_in[i_yindex],
403 Sepia8ySSE41(&p_out[i_yindex + 8], &p_in[i_yindex + 8],
405 Sepia8ySSE41(&p_out[i_yindex + 16], &p_in[i_yindex + 16],
407 Sepia8ySSE41(&p_out[i_yindex + 24], &p_in[i_yindex + 24],
409 Memcpy8BMMX(&p_out[i_uindex], filling_array_8u);
410 Memcpy8BMMX(&p_out[i_vindex], filling_array_8v);
415 while (p_in < p_line_end)
418 p_in[i_yindex] - (p_in[i_yindex] >> 2) +
420 p_out[i_yindex + 2] =
421 p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2) +
423 p_out[i_uindex] = filling_const_8u;
424 p_out[i_vindex] = filling_const_8v;
428 p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
429 p_out += p_outpic->p[0].i_pitch
430 - p_outpic->p[0].i_visible_pitch;
435 while( p_in < p_in_end )
437 p_line_end = p_in + p_pic->p[0].i_visible_pitch;
438 while( p_in < p_line_end )
440 /* calculate new, sepia values */
442 p_in[i_yindex] - (p_in[i_yindex] >> 2) + (i_intensity >> 2);
443 p_out[i_yindex + 2] =
444 p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2)
445 + (i_intensity >> 2);
446 p_out[i_uindex] = filling_const_8u;
447 p_out[i_vindex] = filling_const_8v;
451 p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
452 p_out += p_outpic->p[0].i_pitch
453 - p_outpic->p[0].i_visible_pitch;
458 /*****************************************************************************
459 * RVSepia: Applies sepia to one frame of the RV24/RV32 video
460 *****************************************************************************
461 * This function applies sepia effect to one frame of the video by iterating
462 * through video lines and calculating new values for every byte in chunks of
463 * 3 (RV24) or 4 (RV32) bytes.
464 *****************************************************************************/
465 static void RVSepia( picture_t *p_pic, picture_t *p_outpic, int i_intensity )
468 #define ONE_HALF (1 << (SCALEBITS - 1))
469 #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
470 uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
471 bool b_isRV32 = p_pic->format.i_chroma == VLC_CODEC_RGB32;
472 int i_rindex = 0, i_gindex = 1, i_bindex = 2;
474 GetPackedRgbIndexes( &p_outpic->format, &i_rindex, &i_gindex, &i_bindex );
476 p_in = p_pic->p[0].p_pixels;
477 p_in_end = p_in + p_pic->p[0].i_visible_lines
478 * p_pic->p[0].i_pitch;
479 p_out = p_outpic->p[0].p_pixels;
481 /* Precompute values constant for this certain i_intensity, using the same
482 * formula as YUV functions above */
483 uint8_t r_intensity = (( FIX( 1.40200 * 255.0 / 224.0 ) * (i_intensity * 14)
484 + ONE_HALF )) >> SCALEBITS;
485 uint8_t g_intensity = (( - FIX(0.34414*255.0/224.0) * ( - i_intensity / 6 )
486 - FIX( 0.71414 * 255.0 / 224.0) * ( i_intensity * 14 )
487 + ONE_HALF )) >> SCALEBITS;
488 uint8_t b_intensity = (( FIX( 1.77200 * 255.0 / 224.0) * ( - i_intensity / 6 )
489 + ONE_HALF )) >> SCALEBITS;
491 while (p_in < p_in_end)
493 p_line_end = p_in + p_pic->p[0].i_visible_pitch;
494 while (p_in < p_line_end)
496 /* do sepia: this calculation is based on the formula to calculate
497 * YUV->RGB and RGB->YUV (in filter_picture.h) mode and that
498 * y = y - y/4 + intensity/4 . As Y is the only channel that changes
499 * through the whole image. After that, precomputed values are added
500 * for each RGB channel and saved in the output image.
501 * FIXME: needs cleanup */
502 uint8_t i_y = ((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] + 25
503 * p_in[i_bindex] + 128 ) >> 8 ) * FIX(255.0/219.0))
504 - (((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] + 25
505 * p_in[i_bindex] + 128 ) >> 8 )
506 * FIX( 255.0 / 219.0 )) >> 2 ) + ( i_intensity >> 2 );
507 p_out[i_rindex] = vlc_uint8(i_y + r_intensity);
508 p_out[i_gindex] = vlc_uint8(i_y + g_intensity);
509 p_out[i_bindex] = vlc_uint8(i_y + b_intensity);
512 /* for rv32 we take 4 chunks at the time */
514 /* alpha channel stays the same */
519 p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
520 p_out += p_outpic->p[0].i_pitch
521 - p_outpic->p[0].i_visible_pitch;
528 /*****************************************************************************
530 *****************************************************************************
531 * This function applies sepia effect to eight bytes of yellow using SSE4.1
532 * instructions. It copies those 8 bytes to 128b register and fills the gaps
533 * with zeroes and following operations are made with word-operating instructs.
534 *****************************************************************************/
535 inline void Sepia8ySSE41(uint8_t * dst, const uint8_t * src,
536 volatile uint8_t * i_intensity)
538 #if defined(CAN_COMPILE_SSE4_1) && 1
540 "pmovzxbw (%1), %%xmm1\n" // y = y - y / 4 + i_intensity / 4
541 "pmovzxbw (%1), %%xmm2\n" // store bytes as words with 0s in between
542 "pmovzxbw (%2), %%xmm3\n"
543 "psrlw $2, %%xmm2\n" // rotate right 2
544 "psubusb %%xmm1, %%xmm2\n" // subtract
546 "paddsb %%xmm1, %%xmm3\n" // add
547 "packuswb %%xmm2, %%xmm1\n" // pack back to bytes
548 "movq %%xmm1, (%0) \n" // load to dest
550 :"r" (dst), "r"(src), "r"(i_intensity)
555 /*****************************************************************************
556 * Memcpy8BMMX: Copies 8 bytes of memory in two instructions
557 *****************************************************************************
558 * Not quite clean, but it should be fast.
559 *****************************************************************************/
560 inline void Memcpy8BMMX(uint8_t * dst, const uint8_t * src)
562 #if defined(CAN_COMPILE_MMX) && 1
564 "movq (%1), %%xmm0\n"
565 "movq %%xmm0, (%0)\n"
572 static int FilterCallback ( vlc_object_t *p_this, char const *psz_var,
573 vlc_value_t oldval, vlc_value_t newval,
576 VLC_UNUSED(psz_var); VLC_UNUSED(oldval); VLC_UNUSED(p_data);
577 filter_t *p_filter = (filter_t*)p_this;
578 filter_sys_t *p_sys = p_filter->p_sys;
580 vlc_spin_lock( &p_sys->lock );
581 p_sys->i_intensity = newval.i_int;
582 vlc_spin_unlock( &p_sys->lock );