1 /*****************************************************************************
2 * deinterlace.c : deinterlacer plugin for vlc
3 *****************************************************************************
4 * Copyright (C) 2000-2009 the VideoLAN team
7 * Author: Sam Hocevar <sam@zoy.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
39 #include <vlc_common.h>
40 #include <vlc_plugin.h>
43 #include <vlc_filter.h>
46 #ifdef CAN_COMPILE_MMXEXT
50 #include "filter_common.h"
52 #define DEINTERLACE_DISCARD 1
53 #define DEINTERLACE_MEAN 2
54 #define DEINTERLACE_BLEND 3
55 #define DEINTERLACE_BOB 4
56 #define DEINTERLACE_LINEAR 5
57 #define DEINTERLACE_X 6
58 #define DEINTERLACE_YADIF 7
59 #define DEINTERLACE_YADIF2X 8
61 /*****************************************************************************
63 *****************************************************************************/
64 static int Create ( vlc_object_t * );
65 static void Destroy ( vlc_object_t * );
67 static int Init ( vout_thread_t * );
68 static void End ( vout_thread_t * );
69 static void Render ( vout_thread_t *, picture_t * );
71 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
72 vlc_value_t oldval, vlc_value_t newval, void *p_data );
74 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
75 static void RenderBob ( vout_thread_t *, picture_t *, picture_t *, int );
76 static void RenderMean ( vout_thread_t *, picture_t *, picture_t * );
77 static void RenderBlend ( vout_thread_t *, picture_t *, picture_t * );
78 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
79 static void RenderX ( picture_t *, picture_t * );
80 static void RenderYadif ( vout_thread_t *, picture_t *, picture_t *, int, int );
82 static void MergeGeneric ( void *, const void *, const void *, size_t );
83 #if defined(CAN_COMPILE_C_ALTIVEC)
84 static void MergeAltivec ( void *, const void *, const void *, size_t );
86 #if defined(CAN_COMPILE_MMXEXT)
87 static void MergeMMXEXT ( void *, const void *, const void *, size_t );
89 #if defined(CAN_COMPILE_3DNOW)
90 static void Merge3DNow ( void *, const void *, const void *, size_t );
92 #if defined(CAN_COMPILE_SSE)
93 static void MergeSSE2 ( void *, const void *, const void *, size_t );
95 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
96 static void EndMMX ( void );
98 #if defined(CAN_COMPILE_3DNOW)
99 static void End3DNow ( void );
101 #if defined __ARM_NEON__
102 static void MergeNEON (void *, const void *, const void *, size_t);
105 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method );
106 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
108 static int OpenFilter( vlc_object_t *p_this );
109 static void CloseFilter( vlc_object_t *p_this );
111 /*****************************************************************************
112 * Callback prototypes
113 *****************************************************************************/
114 static int FilterCallback( vlc_object_t *, char const *,
115 vlc_value_t, vlc_value_t, void * );
117 /*****************************************************************************
119 *****************************************************************************/
120 #define MODE_TEXT N_("Deinterlace mode")
121 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
123 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
124 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
126 #define FILTER_CFG_PREFIX "sout-deinterlace-"
128 static const char *const mode_list[] = {
129 "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
130 static const char *const mode_list_text[] = {
131 N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
134 set_description( N_("Deinterlacing video filter") )
135 set_shortname( N_("Deinterlace" ))
136 set_capability( "video filter", 0 )
137 set_category( CAT_VIDEO )
138 set_subcategory( SUBCAT_VIDEO_VFILTER )
140 set_section( N_("Display"),NULL)
141 add_string( "filter-deinterlace-mode", "discard", NULL, MODE_TEXT,
142 MODE_LONGTEXT, false )
143 change_string_list( mode_list, mode_list_text, 0 )
146 add_shortcut( "deinterlace" )
147 set_callbacks( Create, Destroy )
150 set_capability( "video filter2", 0 )
151 set_section( N_("Streaming"),NULL)
152 add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
153 SOUT_MODE_LONGTEXT, false )
154 change_string_list( mode_list, mode_list_text, 0 )
155 add_shortcut( "deinterlace" )
156 set_callbacks( OpenFilter, CloseFilter )
159 static const char *const ppsz_filter_options[] = {
163 /*****************************************************************************
164 * vout_sys_t: Deinterlace video output method descriptor
165 *****************************************************************************
166 * This structure is part of the video output thread descriptor.
167 * It describes the Deinterlace specific properties of an output thread.
168 *****************************************************************************/
169 #define HISTORY_SIZE (3)
172 int i_mode; /* Deinterlace mode */
173 bool b_double_rate; /* Shall we double the framerate? */
174 bool b_half_height; /* Shall be devide the height by 2 */
179 vout_thread_t *p_vout;
181 vlc_mutex_t filter_lock;
183 void (*pf_merge) ( void *, const void *, const void *, size_t );
184 void (*pf_end_merge) ( void );
187 picture_t *pp_history[HISTORY_SIZE];
190 /*****************************************************************************
191 * Control: control facility for the vout (forwards to child vout)
192 *****************************************************************************/
193 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
195 return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
198 /*****************************************************************************
199 * Create: allocates Deinterlace video thread output method
200 *****************************************************************************
201 * This function allocates and initializes a Deinterlace vout method.
202 *****************************************************************************/
203 static int Create( vlc_object_t *p_this )
205 vout_thread_t *p_vout = (vout_thread_t *)p_this;
209 /* Allocate structure */
210 p_sys = p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
211 if( p_vout->p_sys == NULL )
214 p_vout->pf_init = Init;
215 p_vout->pf_end = End;
216 p_vout->pf_manage = NULL;
217 p_vout->pf_render = Render;
218 p_vout->pf_display = NULL;
219 p_vout->pf_control = Control;
221 p_sys->i_mode = DEINTERLACE_DISCARD;
222 p_sys->b_double_rate = false;
223 p_sys->b_half_height = true;
224 p_sys->last_date = 0;
226 vlc_mutex_init( &p_sys->filter_lock );
228 #if defined(CAN_COMPILE_C_ALTIVEC)
229 if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
231 p_sys->pf_merge = MergeAltivec;
232 p_sys->pf_end_merge = NULL;
236 #if defined(CAN_COMPILE_SSE)
237 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
239 p_sys->pf_merge = MergeSSE2;
240 p_sys->pf_end_merge = EndMMX;
244 #if defined(CAN_COMPILE_MMXEXT)
245 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
247 p_sys->pf_merge = MergeMMXEXT;
248 p_sys->pf_end_merge = EndMMX;
252 #if defined(CAN_COMPILE_3DNOW)
253 if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
255 p_sys->pf_merge = Merge3DNow;
256 p_sys->pf_end_merge = End3DNow;
260 #if defined __ARM_NEON__
261 if( vlc_CPU() & CPU_CAPABILITY_NEON )
263 p_sys->pf_merge = MergeNEON;
264 p_sys->pf_end_merge = NULL;
269 p_sys->pf_merge = MergeGeneric;
270 p_sys->pf_end_merge = NULL;
273 /* Look what method was requested */
274 psz_mode = var_CreateGetString( p_vout, "filter-deinterlace-mode" );
278 msg_Err( p_vout, "configuration variable filter-deinterlace-mode empty" );
279 msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
281 psz_mode = strdup( "discard" );
284 SetFilterMethod( p_vout, psz_mode );
291 /*****************************************************************************
292 * SetFilterMethod: setup the deinterlace method to use.
293 *****************************************************************************/
294 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method )
296 vout_sys_t *p_sys = p_vout->p_sys;
297 if( !strcmp( psz_method, "mean" ) )
299 p_sys->i_mode = DEINTERLACE_MEAN;
300 p_sys->b_double_rate = false;
301 p_sys->b_half_height = true;
303 else if( !strcmp( psz_method, "blend" )
304 || !strcmp( psz_method, "average" )
305 || !strcmp( psz_method, "combine-fields" ) )
307 p_sys->i_mode = DEINTERLACE_BLEND;
308 p_sys->b_double_rate = false;
309 p_sys->b_half_height = false;
311 else if( !strcmp( psz_method, "bob" )
312 || !strcmp( psz_method, "progressive-scan" ) )
314 p_sys->i_mode = DEINTERLACE_BOB;
315 p_sys->b_double_rate = true;
316 p_sys->b_half_height = false;
318 else if( !strcmp( psz_method, "linear" ) )
320 p_sys->i_mode = DEINTERLACE_LINEAR;
321 p_sys->b_double_rate = true;
322 p_sys->b_half_height = false;
324 else if( !strcmp( psz_method, "x" ) )
326 p_sys->i_mode = DEINTERLACE_X;
327 p_sys->b_double_rate = false;
328 p_sys->b_half_height = false;
330 else if( !strcmp( psz_method, "yadif" ) )
332 p_sys->i_mode = DEINTERLACE_YADIF;
333 p_sys->b_double_rate = false;
334 p_sys->b_half_height = false;
336 else if( !strcmp( psz_method, "yadif2x" ) )
338 p_sys->i_mode = DEINTERLACE_YADIF2X;
339 p_sys->b_double_rate = true;
340 p_sys->b_half_height = false;
344 const bool b_i422 = p_vout->render.i_chroma == VLC_CODEC_I422 ||
345 p_vout->render.i_chroma == VLC_CODEC_J422;
346 if( strcmp( psz_method, "discard" ) )
347 msg_Err( p_vout, "no valid deinterlace mode provided, "
348 "using \"discard\"" );
350 p_sys->i_mode = DEINTERLACE_DISCARD;
351 p_sys->b_double_rate = false;
352 p_sys->b_half_height = !b_i422;
355 msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
358 static void GetOutputFormat( vout_thread_t *p_vout,
359 video_format_t *p_dst, const video_format_t *p_src )
363 if( p_vout->p_sys->b_half_height )
365 p_dst->i_height /= 2;
366 p_dst->i_visible_height /= 2;
367 p_dst->i_y_offset /= 2;
368 p_dst->i_sar_den *= 2;
371 if( p_src->i_chroma == VLC_CODEC_I422 ||
372 p_src->i_chroma == VLC_CODEC_J422 )
374 switch( p_vout->p_sys->i_mode )
376 case DEINTERLACE_MEAN:
377 case DEINTERLACE_LINEAR:
379 case DEINTERLACE_YADIF:
380 case DEINTERLACE_YADIF2X:
381 p_dst->i_chroma = p_src->i_chroma;
384 p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
391 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
393 return i_chroma == VLC_CODEC_I420 ||
394 i_chroma == VLC_CODEC_J420 ||
395 i_chroma == VLC_CODEC_YV12 ||
396 i_chroma == VLC_CODEC_I422 ||
397 i_chroma == VLC_CODEC_J422;
400 /*****************************************************************************
401 * Init: initialize Deinterlace video thread output method
402 *****************************************************************************/
403 static int Init( vout_thread_t *p_vout )
405 I_OUTPUTPICTURES = 0;
407 if( !IsChromaSupported( p_vout->render.i_chroma ) )
408 return VLC_EGENERIC; /* unknown chroma */
410 /* Initialize the output structure, full of directbuffers since we want
411 * the decoder to output directly to our structures. */
412 p_vout->output.i_chroma = p_vout->render.i_chroma;
413 p_vout->output.i_width = p_vout->render.i_width;
414 p_vout->output.i_height = p_vout->render.i_height;
415 p_vout->output.i_aspect = p_vout->render.i_aspect;
416 p_vout->fmt_out = p_vout->fmt_in;
418 /* Try to open the real video output */
419 p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
421 if( p_vout->p_sys->p_vout == NULL )
423 /* Everything failed */
424 msg_Err( p_vout, "cannot open vout, aborting" );
429 for( int i = 0; i < HISTORY_SIZE; i++ )
430 p_vout->p_sys->pp_history[i] = NULL;
432 vout_filter_AllocateDirectBuffers( p_vout, VOUT_MAX_PICTURES );
434 vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
436 var_AddCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
441 /*****************************************************************************
442 * SpawnRealVout: spawn the real video output.
443 *****************************************************************************/
444 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
446 msg_Dbg( p_vout, "spawning the real video output" );
449 GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
451 return vout_Create( p_vout, &fmt );
454 /*****************************************************************************
455 * End: terminate Deinterlace video thread output method
456 *****************************************************************************/
457 static void End( vout_thread_t *p_vout )
459 vout_sys_t *p_sys = p_vout->p_sys;
461 var_DelCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
463 for( int i = 0; i < HISTORY_SIZE; i++ )
465 if( p_sys->pp_history[i] )
466 picture_Release( p_sys->pp_history[i] );
471 vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
472 vout_CloseAndRelease( p_sys->p_vout );
475 vout_filter_ReleaseDirectBuffers( p_vout );
478 /*****************************************************************************
479 * Destroy: destroy Deinterlace video thread output method
480 *****************************************************************************
481 * Terminate an output method created by DeinterlaceCreateOutputMethod
482 *****************************************************************************/
483 static void Destroy( vlc_object_t *p_this )
485 vout_thread_t *p_vout = (vout_thread_t *)p_this;
486 vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
487 free( p_vout->p_sys );
491 * Forward mouse event with proper conversion.
493 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
494 vlc_value_t oldval, vlc_value_t newval, void *p_data )
496 vout_thread_t *p_vout = p_data;
497 VLC_UNUSED(p_this); VLC_UNUSED(oldval);
499 if( !strcmp( psz_var, "mouse-y" ) && p_vout->p_sys->b_half_height )
502 return var_Set( p_vout, psz_var, newval );
505 /*****************************************************************************
506 * Render: displays previously rendered output
507 *****************************************************************************
508 * This function send the currently rendered image to Deinterlace image,
509 * waits until it is displayed and switch the two rendering buffers, preparing
511 *****************************************************************************/
512 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
514 vout_sys_t *p_sys = p_vout->p_sys;
515 picture_t *pp_outpic[2];
517 /* FIXME are they needed ? */
518 p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
519 p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
520 p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
521 p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
523 /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
524 p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
525 p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
526 p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
527 p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
528 if( p_vout->p_sys->b_half_height )
530 p_sys->p_vout->fmt_in.i_y_offset /= 2;
531 p_sys->p_vout->fmt_in.i_visible_height /= 2;
534 if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
536 p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
538 p_vout->fmt_out.i_aspect = p_vout->fmt_in.i_aspect;
539 p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
540 p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
542 video_format_t fmt = p_vout->fmt_out;
543 if( p_vout->p_sys->b_half_height )
545 fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
549 p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
554 pp_outpic[0] = pp_outpic[1] = NULL;
556 vlc_mutex_lock( &p_vout->p_sys->filter_lock );
558 /* Get a new picture */
559 while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
563 if( !vlc_object_alive( p_vout ) || p_vout->b_error )
565 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
568 msleep( VOUT_OUTMEM_SLEEP );
571 pp_outpic[0]->date = p_pic->date;
573 /* If we are using double rate, get an additional new picture */
574 if( p_vout->p_sys->b_double_rate )
576 while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
580 if( !vlc_object_alive( p_vout ) || p_vout->b_error )
582 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
583 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
586 msleep( VOUT_OUTMEM_SLEEP );
589 /* 20ms is a bit arbitrary, but it's only for the first image we get */
590 if( !p_vout->p_sys->last_date )
591 pp_outpic[1]->date = p_pic->date + 20000;
593 pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
594 p_vout->p_sys->last_date = p_pic->date;
597 switch( p_vout->p_sys->i_mode )
599 case DEINTERLACE_DISCARD:
600 RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
601 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
604 case DEINTERLACE_BOB:
605 RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
606 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
607 RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
608 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
611 case DEINTERLACE_LINEAR:
612 RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
613 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
614 RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
615 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
618 case DEINTERLACE_MEAN:
619 RenderMean( p_vout, pp_outpic[0], p_pic );
620 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
623 case DEINTERLACE_BLEND:
624 RenderBlend( p_vout, pp_outpic[0], p_pic );
625 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
629 RenderX( pp_outpic[0], p_pic );
630 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
633 case DEINTERLACE_YADIF:
634 RenderYadif( p_vout, pp_outpic[0], p_pic, 0, 0 );
635 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
638 case DEINTERLACE_YADIF2X:
639 RenderYadif( p_vout, pp_outpic[0], p_pic, 0, p_pic->b_top_field_first ? 0 : 1 );
640 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
641 RenderYadif( p_vout, pp_outpic[1], p_pic, 1, p_pic->b_top_field_first ? 1 : 0 );
642 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
645 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
648 /*****************************************************************************
649 * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
650 *****************************************************************************/
651 static void RenderDiscard( vout_thread_t *p_vout,
652 picture_t *p_outpic, picture_t *p_pic, int i_field )
656 /* Copy image and skip lines */
657 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
659 uint8_t *p_in, *p_out_end, *p_out;
662 p_in = p_pic->p[i_plane].p_pixels
663 + i_field * p_pic->p[i_plane].i_pitch;
665 p_out = p_outpic->p[i_plane].p_pixels;
666 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
667 * p_outpic->p[i_plane].i_visible_lines;
669 switch( p_vout->render.i_chroma )
675 for( ; p_out < p_out_end ; )
677 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
679 p_out += p_outpic->p[i_plane].i_pitch;
680 p_in += 2 * p_pic->p[i_plane].i_pitch;
687 i_increment = 2 * p_pic->p[i_plane].i_pitch;
689 if( i_plane == Y_PLANE )
691 for( ; p_out < p_out_end ; )
693 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
694 p_out += p_outpic->p[i_plane].i_pitch;
695 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
696 p_out += p_outpic->p[i_plane].i_pitch;
702 for( ; p_out < p_out_end ; )
704 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
705 p_out += p_outpic->p[i_plane].i_pitch;
717 /*****************************************************************************
718 * RenderBob: renders a BOB picture - simple copy
719 *****************************************************************************/
720 static void RenderBob( vout_thread_t *p_vout,
721 picture_t *p_outpic, picture_t *p_pic, int i_field )
725 /* Copy image and skip lines */
726 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
728 uint8_t *p_in, *p_out_end, *p_out;
730 p_in = p_pic->p[i_plane].p_pixels;
731 p_out = p_outpic->p[i_plane].p_pixels;
732 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
733 * p_outpic->p[i_plane].i_visible_lines;
735 switch( p_vout->render.i_chroma )
740 /* For BOTTOM field we need to add the first line */
743 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
744 p_in += p_pic->p[i_plane].i_pitch;
745 p_out += p_outpic->p[i_plane].i_pitch;
748 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
750 for( ; p_out < p_out_end ; )
752 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
754 p_out += p_outpic->p[i_plane].i_pitch;
756 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
758 p_in += 2 * p_pic->p[i_plane].i_pitch;
759 p_out += p_outpic->p[i_plane].i_pitch;
762 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
764 /* For TOP field we need to add the last line */
767 p_in += p_pic->p[i_plane].i_pitch;
768 p_out += p_outpic->p[i_plane].i_pitch;
769 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
775 /* For BOTTOM field we need to add the first line */
778 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
779 p_in += p_pic->p[i_plane].i_pitch;
780 p_out += p_outpic->p[i_plane].i_pitch;
783 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
785 if( i_plane == Y_PLANE )
787 for( ; p_out < p_out_end ; )
789 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
791 p_out += p_outpic->p[i_plane].i_pitch;
793 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
795 p_in += 2 * p_pic->p[i_plane].i_pitch;
796 p_out += p_outpic->p[i_plane].i_pitch;
801 for( ; p_out < p_out_end ; )
803 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
805 p_out += p_outpic->p[i_plane].i_pitch;
806 p_in += 2 * p_pic->p[i_plane].i_pitch;
810 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
812 /* For TOP field we need to add the last line */
815 p_in += p_pic->p[i_plane].i_pitch;
816 p_out += p_outpic->p[i_plane].i_pitch;
817 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
824 #define Merge p_vout->p_sys->pf_merge
825 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
827 /*****************************************************************************
828 * RenderLinear: BOB with linear interpolation
829 *****************************************************************************/
830 static void RenderLinear( vout_thread_t *p_vout,
831 picture_t *p_outpic, picture_t *p_pic, int i_field )
835 /* Copy image and skip lines */
836 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
838 uint8_t *p_in, *p_out_end, *p_out;
840 p_in = p_pic->p[i_plane].p_pixels;
841 p_out = p_outpic->p[i_plane].p_pixels;
842 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
843 * p_outpic->p[i_plane].i_visible_lines;
845 /* For BOTTOM field we need to add the first line */
848 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
849 p_in += p_pic->p[i_plane].i_pitch;
850 p_out += p_outpic->p[i_plane].i_pitch;
853 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
855 for( ; p_out < p_out_end ; )
857 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
859 p_out += p_outpic->p[i_plane].i_pitch;
861 Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
862 p_pic->p[i_plane].i_pitch );
864 p_in += 2 * p_pic->p[i_plane].i_pitch;
865 p_out += p_outpic->p[i_plane].i_pitch;
868 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
870 /* For TOP field we need to add the last line */
873 p_in += p_pic->p[i_plane].i_pitch;
874 p_out += p_outpic->p[i_plane].i_pitch;
875 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
881 static void RenderMean( vout_thread_t *p_vout,
882 picture_t *p_outpic, picture_t *p_pic )
886 /* Copy image and skip lines */
887 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
889 uint8_t *p_in, *p_out_end, *p_out;
891 p_in = p_pic->p[i_plane].p_pixels;
893 p_out = p_outpic->p[i_plane].p_pixels;
894 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
895 * p_outpic->p[i_plane].i_visible_lines;
897 /* All lines: mean value */
898 for( ; p_out < p_out_end ; )
900 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
901 p_pic->p[i_plane].i_pitch );
903 p_out += p_outpic->p[i_plane].i_pitch;
904 p_in += 2 * p_pic->p[i_plane].i_pitch;
910 static void RenderBlend( vout_thread_t *p_vout,
911 picture_t *p_outpic, picture_t *p_pic )
915 /* Copy image and skip lines */
916 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
918 uint8_t *p_in, *p_out_end, *p_out;
920 p_in = p_pic->p[i_plane].p_pixels;
922 p_out = p_outpic->p[i_plane].p_pixels;
923 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
924 * p_outpic->p[i_plane].i_visible_lines;
926 switch( p_vout->render.i_chroma )
931 /* First line: simple copy */
932 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
933 p_out += p_outpic->p[i_plane].i_pitch;
935 /* Remaining lines: mean value */
936 for( ; p_out < p_out_end ; )
938 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
939 p_pic->p[i_plane].i_pitch );
941 p_out += p_outpic->p[i_plane].i_pitch;
942 p_in += p_pic->p[i_plane].i_pitch;
948 /* First line: simple copy */
949 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
950 p_out += p_outpic->p[i_plane].i_pitch;
952 /* Remaining lines: mean value */
953 if( i_plane == Y_PLANE )
955 for( ; p_out < p_out_end ; )
957 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
958 p_pic->p[i_plane].i_pitch );
960 p_out += p_outpic->p[i_plane].i_pitch;
961 p_in += p_pic->p[i_plane].i_pitch;
967 for( ; p_out < p_out_end ; )
969 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
970 p_pic->p[i_plane].i_pitch );
972 p_out += p_outpic->p[i_plane].i_pitch;
973 p_in += 2*p_pic->p[i_plane].i_pitch;
984 static void MergeGeneric( void *_p_dest, const void *_p_s1,
985 const void *_p_s2, size_t i_bytes )
987 uint8_t* p_dest = (uint8_t*)_p_dest;
988 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
989 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
990 uint8_t* p_end = p_dest + i_bytes - 8;
992 while( p_dest < p_end )
994 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
995 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
996 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
997 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
998 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
999 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1000 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1001 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1006 while( p_dest < p_end )
1008 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1012 #if defined(CAN_COMPILE_MMXEXT)
1013 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
1016 uint8_t* p_dest = (uint8_t*)_p_dest;
1017 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1018 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1019 uint8_t* p_end = p_dest + i_bytes - 8;
1020 while( p_dest < p_end )
1022 __asm__ __volatile__( "movq %2,%%mm1;"
1024 "movq %%mm1, %0" :"=m" (*p_dest):
1034 while( p_dest < p_end )
1036 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1041 #if defined(CAN_COMPILE_3DNOW)
1042 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
1045 uint8_t* p_dest = (uint8_t*)_p_dest;
1046 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1047 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1048 uint8_t* p_end = p_dest + i_bytes - 8;
1049 while( p_dest < p_end )
1051 __asm__ __volatile__( "movq %2,%%mm1;"
1052 "pavgusb %1, %%mm1;"
1053 "movq %%mm1, %0" :"=m" (*p_dest):
1063 while( p_dest < p_end )
1065 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1070 #if defined(CAN_COMPILE_SSE)
1071 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1074 uint8_t* p_dest = (uint8_t*)_p_dest;
1075 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1076 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1078 while( (uintptr_t)p_s1 % 16 )
1080 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1082 p_end = p_dest + i_bytes - 16;
1083 while( p_dest < p_end )
1085 __asm__ __volatile__( "movdqu %2,%%xmm1;"
1087 "movdqu %%xmm1, %0" :"=m" (*p_dest):
1097 while( p_dest < p_end )
1099 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1104 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1105 static void EndMMX( void )
1107 __asm__ __volatile__( "emms" :: );
1111 #if defined(CAN_COMPILE_3DNOW)
1112 static void End3DNow( void )
1114 __asm__ __volatile__( "femms" :: );
1118 #ifdef CAN_COMPILE_C_ALTIVEC
1119 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1120 const void *_p_s2, size_t i_bytes )
1122 uint8_t *p_dest = (uint8_t *)_p_dest;
1123 uint8_t *p_s1 = (uint8_t *)_p_s1;
1124 uint8_t *p_s2 = (uint8_t *)_p_s2;
1125 uint8_t *p_end = p_dest + i_bytes - 15;
1127 /* Use C until the first 16-bytes aligned destination pixel */
1128 while( (uintptr_t)p_dest & 0xF )
1130 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1133 if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1135 /* Unaligned source */
1136 vector unsigned char s1v, s2v, destv;
1137 vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1138 vector unsigned char perm1v, perm2v;
1140 perm1v = vec_lvsl( 0, p_s1 );
1141 perm2v = vec_lvsl( 0, p_s2 );
1142 s1oldv = vec_ld( 0, p_s1 );
1143 s2oldv = vec_ld( 0, p_s2 );
1145 while( p_dest < p_end )
1147 s1newv = vec_ld( 16, p_s1 );
1148 s2newv = vec_ld( 16, p_s2 );
1149 s1v = vec_perm( s1oldv, s1newv, perm1v );
1150 s2v = vec_perm( s2oldv, s2newv, perm2v );
1153 destv = vec_avg( s1v, s2v );
1154 vec_st( destv, 0, p_dest );
1163 /* Aligned source */
1164 vector unsigned char s1v, s2v, destv;
1166 while( p_dest < p_end )
1168 s1v = vec_ld( 0, p_s1 );
1169 s2v = vec_ld( 0, p_s2 );
1170 destv = vec_avg( s1v, s2v );
1171 vec_st( destv, 0, p_dest );
1181 while( p_dest < p_end )
1183 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1189 static void MergeNEON (void *restrict out, const void *in1,
1190 const void *in2, size_t n)
1192 uint8_t *outp = out;
1193 const uint8_t *in1p = in1;
1194 const uint8_t *in2p = in2;
1195 size_t mis = ((uintptr_t)outp) & 15;
1199 MergeGeneric (outp, in1p, in2p, mis);
1206 uint8_t *end = outp + (n & ~15);
1208 if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
1211 "vld1.u8 {q0-q1}, [%[in1]]!\n"
1212 "vld1.u8 {q2-q3}, [%[in2]]!\n"
1213 "vhadd.u8 q4, q0, q2\n"
1214 "vld1.u8 {q6-q7}, [%[in1]]!\n"
1215 "vhadd.u8 q5, q1, q3\n"
1216 "vld1.u8 {q8-q9}, [%[in2]]!\n"
1217 "vhadd.u8 q10, q6, q8\n"
1218 "vhadd.u8 q11, q7, q9\n"
1219 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
1220 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
1221 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1223 : "q0", "q1", "q2", "memory");
1227 "vld1.u8 {q0-q1}, [%[in1],:128]!\n"
1228 "vld1.u8 {q2-q3}, [%[in2],:128]!\n"
1229 "vhadd.u8 q4, q0, q2\n"
1230 "vld1.u8 {q6-q7}, [%[in1],:128]!\n"
1231 "vhadd.u8 q5, q1, q3\n"
1232 "vld1.u8 {q8-q9}, [%[in2],:128]!\n"
1233 "vhadd.u8 q10, q6, q8\n"
1234 "vhadd.u8 q11, q7, q9\n"
1235 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
1236 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
1237 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1239 : "q0", "q1", "q2", "memory");
1242 MergeGeneric (outp, in1p, in2p, n);
1246 /*****************************************************************************
1247 * RenderX: This algo works on a 8x8 block basic, it copies the top field
1248 * and apply a process to recreate the bottom field :
1249 * If a 8x8 block is classified as :
1250 * - progressive: it applies a small blend (1,6,1)
1252 * * in the MMX version: we do a ME between the 2 fields, if there is a
1253 * good match we use MC to recreate the bottom field (with a small
1255 * * otherwise: it recreates the bottom field by an edge oriented
1257 *****************************************************************************/
1259 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1260 * XXX: It need to access to 8x10
1261 * We use more than 8 lines to help with scrolling (text)
1262 * (and because XDeint8x8Frame use line 9)
1263 * XXX: smooth/uniform area with noise detection doesn't works well
1264 * but it's not really a problem because they don't have much details anyway
1266 static inline int ssd( int a ) { return a*a; }
1267 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1273 /* Detect interlacing */
1275 for( y = 0; y < 7; y += 2 )
1278 for( x = 0; x < 8; x++ )
1280 fr += ssd(src[ x] - src[1*i_src+x]) +
1281 ssd(src[i_src+x] - src[2*i_src+x]);
1282 ff += ssd(src[ x] - src[2*i_src+x]) +
1283 ssd(src[i_src+x] - src[3*i_src+x]);
1285 if( ff < 6*fr/8 && fr > 32 )
1291 return fc < 1 ? false : true;
1293 #ifdef CAN_COMPILE_MMXEXT
1294 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1301 /* Detect interlacing */
1303 pxor_r2r( mm7, mm7 );
1304 for( y = 0; y < 9; y += 2 )
1307 pxor_r2r( mm5, mm5 );
1308 pxor_r2r( mm6, mm6 );
1309 for( x = 0; x < 8; x+=4 )
1311 movd_m2r( src[ x], mm0 );
1312 movd_m2r( src[1*i_src+x], mm1 );
1313 movd_m2r( src[2*i_src+x], mm2 );
1314 movd_m2r( src[3*i_src+x], mm3 );
1316 punpcklbw_r2r( mm7, mm0 );
1317 punpcklbw_r2r( mm7, mm1 );
1318 punpcklbw_r2r( mm7, mm2 );
1319 punpcklbw_r2r( mm7, mm3 );
1321 movq_r2r( mm0, mm4 );
1323 psubw_r2r( mm1, mm0 );
1324 psubw_r2r( mm2, mm4 );
1326 psubw_r2r( mm1, mm2 );
1327 psubw_r2r( mm1, mm3 );
1329 pmaddwd_r2r( mm0, mm0 );
1330 pmaddwd_r2r( mm4, mm4 );
1331 pmaddwd_r2r( mm2, mm2 );
1332 pmaddwd_r2r( mm3, mm3 );
1333 paddd_r2r( mm0, mm2 );
1334 paddd_r2r( mm4, mm3 );
1335 paddd_r2r( mm2, mm5 );
1336 paddd_r2r( mm3, mm6 );
1339 movq_r2r( mm5, mm0 );
1340 psrlq_i2r( 32, mm0 );
1341 paddd_r2r( mm0, mm5 );
1342 movd_r2m( mm5, fr );
1344 movq_r2r( mm6, mm0 );
1345 psrlq_i2r( 32, mm0 );
1346 paddd_r2r( mm0, mm6 );
1347 movd_r2m( mm6, ff );
1349 if( ff < 6*fr/8 && fr > 32 )
1358 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1359 uint8_t *src1, int i_src1,
1360 uint8_t *src2, int i_src2 )
1365 for( y = 0; y < 8; y += 2 )
1367 memcpy( dst, src1, 8 );
1370 for( x = 0; x < 8; x++ )
1371 dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1379 #ifdef CAN_COMPILE_MMXEXT
1380 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1381 uint8_t *src1, int i_src1,
1382 uint8_t *src2, int i_src2 )
1384 static const uint64_t m_4 = INT64_C(0x0004000400040004);
1388 pxor_r2r( mm7, mm7 );
1389 for( y = 0; y < 8; y += 2 )
1391 for( x = 0; x < 8; x +=4 )
1393 movd_m2r( src1[x], mm0 );
1394 movd_r2m( mm0, dst[x] );
1396 movd_m2r( src2[x], mm1 );
1397 movd_m2r( src1[i_src1+x], mm2 );
1399 punpcklbw_r2r( mm7, mm0 );
1400 punpcklbw_r2r( mm7, mm1 );
1401 punpcklbw_r2r( mm7, mm2 );
1402 paddw_r2r( mm1, mm1 );
1403 movq_r2r( mm1, mm3 );
1404 paddw_r2r( mm3, mm3 );
1405 paddw_r2r( mm2, mm0 );
1406 paddw_r2r( mm3, mm1 );
1407 paddw_m2r( m_4, mm1 );
1408 paddw_r2r( mm1, mm0 );
1409 psraw_i2r( 3, mm0 );
1410 packuswb_r2r( mm7, mm0 );
1411 movd_r2m( mm0, dst[i_dst+x] );
1422 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1425 for( y = 0; y < 8; y++ )
1426 memset( &dst[y*i_dst], v, 8 );
1429 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1432 * TODO: a better one for the inner part.
1434 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1435 uint8_t *src, int i_src )
1440 for( y = 0; y < 8; y += 2 )
1442 memcpy( dst, src, 8 );
1445 for( x = 0; x < 8; x++ )
1446 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1451 #ifdef CAN_COMPILE_MMXEXT
1452 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1453 uint8_t *src, int i_src )
1458 for( y = 0; y < 8; y += 2 )
1460 movq_m2r( src[0], mm0 );
1461 movq_r2m( mm0, dst[0] );
1464 movq_m2r( src[2*i_src], mm1 );
1465 pavgb_r2r( mm1, mm0 );
1467 movq_r2m( mm0, dst[0] );
1475 /* XDeint8x8Field: Edge oriented interpolation
1476 * (Need -4 and +5 pixels H, +1 line)
1478 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1479 uint8_t *src, int i_src )
1484 for( y = 0; y < 8; y += 2 )
1486 memcpy( dst, src, 8 );
1489 for( x = 0; x < 8; x++ )
1491 uint8_t *src2 = &src[2*i_src];
1492 /* I use 8 pixels just to match the MMX version, but it's overkill
1493 * 5 would be enough (less isn't good) */
1494 const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1495 abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1496 abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1497 abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1499 const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1500 abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1501 abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1502 abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1504 const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1505 abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1506 abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1507 abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1509 if( c0 < c1 && c1 <= c2 )
1510 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1511 else if( c2 < c1 && c1 <= c0 )
1512 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1514 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1521 #ifdef CAN_COMPILE_MMXEXT
1522 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1523 uint8_t *src, int i_src )
1528 for( y = 0; y < 8; y += 2 )
1530 memcpy( dst, src, 8 );
1533 for( x = 0; x < 8; x++ )
1535 uint8_t *src2 = &src[2*i_src];
1538 movq_m2r( src[x-2], mm0 );
1539 movq_m2r( src[x-3], mm1 );
1540 movq_m2r( src[x-4], mm2 );
1542 psadbw_m2r( src2[x-4], mm0 );
1543 psadbw_m2r( src2[x-3], mm1 );
1544 psadbw_m2r( src2[x-2], mm2 );
1546 movd_r2m( mm0, c2 );
1547 movd_r2m( mm1, c1 );
1548 movd_r2m( mm2, c0 );
1550 if( c0 < c1 && c1 <= c2 )
1551 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1552 else if( c2 < c1 && c1 <= c0 )
1553 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1555 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1564 /* NxN arbitray size (and then only use pixel in the NxN block)
1566 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1567 int i_height, int i_width )
1574 /* Detect interlacing */
1575 /* FIXME way too simple, need to be more like XDeint8x8Detect */
1578 for( y = 0; y < i_height - 2; y += 2 )
1580 const uint8_t *s = &src[y*i_src];
1581 for( x = 0; x < i_width; x++ )
1583 fr += ssd(s[ x] - s[1*i_src+x]);
1584 ff += ssd(s[ x] - s[2*i_src+x]);
1586 if( ff < fr && fr > i_width / 2 )
1590 return fc < 2 ? false : true;
1593 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1594 uint8_t *src, int i_src,
1595 int i_width, int i_height )
1600 for( y = 0; y < i_height; y += 2 )
1602 memcpy( dst, src, i_width );
1605 if( y < i_height - 2 )
1607 for( x = 0; x < i_width; x++ )
1608 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1612 /* Blend last line */
1613 for( x = 0; x < i_width; x++ )
1614 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1621 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1622 uint8_t *src, int i_src,
1623 int i_width, int i_height )
1628 for( y = 0; y < i_height; y += 2 )
1630 memcpy( dst, src, i_width );
1633 if( y < i_height - 2 )
1635 for( x = 0; x < i_width; x++ )
1636 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1640 /* Blend last line */
1641 for( x = 0; x < i_width; x++ )
1642 dst[x] = (src[x] + src[i_src+x]) >> 1;
1649 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1650 int i_width, int i_height )
1652 if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1653 XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1655 XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1659 static inline int median( int a, int b, int c )
1661 int min = a, max =a;
1672 return a + b + c - min - max;
1678 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1679 uint8_t *src, int i_src,
1680 const int i_mbx, int i_modx )
1684 for( x = 0; x < i_mbx; x++ )
1687 if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1689 if( x == 0 || x == i_mbx - 1 )
1690 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1692 XDeint8x8FieldC( dst, i_dst, src, i_src );
1696 XDeint8x8MergeC( dst, i_dst,
1697 &src[0*i_src], 2*i_src,
1698 &src[1*i_src], 2*i_src );
1706 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1708 #ifdef CAN_COMPILE_MMXEXT
1709 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1710 uint8_t *src, int i_src,
1711 const int i_mbx, int i_modx )
1715 /* Reset current line */
1716 for( x = 0; x < i_mbx; x++ )
1719 if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1721 if( x == 0 || x == i_mbx - 1 )
1722 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1724 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1728 XDeint8x8MergeMMXEXT( dst, i_dst,
1729 &src[0*i_src], 2*i_src,
1730 &src[1*i_src], 2*i_src );
1738 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1742 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1746 /* Copy image and skip lines */
1747 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1749 const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1750 const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1752 const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1753 const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1755 const int i_dst = p_outpic->p[i_plane].i_pitch;
1756 const int i_src = p_pic->p[i_plane].i_pitch;
1760 for( y = 0; y < i_mby; y++ )
1762 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1763 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1765 #ifdef CAN_COMPILE_MMXEXT
1766 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1767 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1770 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1773 /* Last line (C only)*/
1776 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1777 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1779 for( x = 0; x < i_mbx; x++ )
1781 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1788 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1792 #ifdef CAN_COMPILE_MMXEXT
1793 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1798 /*****************************************************************************
1799 * Yadif (Yet Another DeInterlacing Filter).
1800 *****************************************************************************/
1804 * 0: Output 1 frame for each frame.
1805 * 1: Output 1 frame for each field.
1806 * 2: Like 0 but skips spatial interlacing check.
1807 * 3: Like 1 but skips spatial interlacing check.
1809 * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1814 /* I am unsure it is the right one */
1815 typedef intptr_t x86_reg;
1817 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1818 #define FFMAX(a,b) __MAX(a,b)
1819 #define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
1820 #define FFMIN(a,b) __MIN(a,b)
1821 #define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
1823 /* yadif.h comes from vf_yadif.c of mplayer project */
1826 static void RenderYadif( vout_thread_t *p_vout, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1828 vout_sys_t *p_sys = p_vout->p_sys;
1831 assert( i_order == 0 || i_order == 1 );
1832 assert( i_field == 0 || i_field == 1 );
1836 /* Duplicate the picture
1837 * TODO when the vout rework is finished, picture_Hold() might be enough
1838 * but becarefull, the pitches must match */
1839 picture_t *p_dup = picture_NewFromFormat( &p_src->format );
1841 picture_Copy( p_dup, p_src );
1843 /* Slide the history */
1844 if( p_sys->pp_history[0] )
1845 picture_Release( p_sys->pp_history[0] );
1846 for( int i = 1; i < HISTORY_SIZE; i++ )
1847 p_sys->pp_history[i-1] = p_sys->pp_history[i];
1848 p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1851 /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1852 picture_t *p_prev = p_sys->pp_history[0];
1853 picture_t *p_cur = p_sys->pp_history[1];
1854 picture_t *p_next = p_sys->pp_history[2];
1856 /* Filter if we have all the pictures we need */
1857 if( p_prev && p_cur && p_next )
1860 void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1861 #if defined(HAVE_YADIF_SSE2)
1862 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1863 filter = yadif_filter_line_mmx2;
1866 filter = yadif_filter_line_c;
1868 for( int n = 0; n < p_dst->i_planes; n++ )
1870 const plane_t *prevp = &p_prev->p[n];
1871 const plane_t *curp = &p_cur->p[n];
1872 const plane_t *nextp = &p_next->p[n];
1873 plane_t *dstp = &p_dst->p[n];
1875 for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1877 if( (y % 2) == i_field )
1879 vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1880 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1884 struct vf_priv_s cfg;
1885 /* Spatial checks only when enough data */
1886 cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1888 assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1890 &dstp->p_pixels[y * dstp->i_pitch],
1891 &prevp->p_pixels[y * prevp->i_pitch],
1892 &curp->p_pixels[y * curp->i_pitch],
1893 &nextp->p_pixels[y * nextp->i_pitch],
1894 dstp->i_visible_pitch,
1896 (i_field ^ (i_order == i_field)) & 1 );
1899 /* We duplicate the first and last lines */
1901 vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1902 else if( y == dstp->i_visible_lines - 2 )
1903 vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1908 p_dst->date = (p_next->date - p_cur->date) * i_order / 2 + p_cur->date;
1912 /* Fallback to something simple
1913 * XXX it is wrong when we have 2 pictures, we should not output a picture */
1914 RenderX( p_dst, p_src );
1918 /*****************************************************************************
1919 * FilterCallback: called when changing the deinterlace method on the fly.
1920 *****************************************************************************/
1921 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1922 vlc_value_t oldval, vlc_value_t newval,
1925 VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1926 vout_thread_t * p_vout = (vout_thread_t *)p_this;
1927 vout_sys_t *p_sys = p_vout->p_sys;
1929 msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1931 vlc_mutex_lock( &p_sys->filter_lock );
1932 const bool b_old_half_height = p_sys->b_half_height;
1934 SetFilterMethod( p_vout, newval.psz_string );
1936 if( !b_old_half_height == !p_sys->b_half_height )
1938 vlc_mutex_unlock( &p_sys->filter_lock );
1942 /* We need to kill the old vout */
1945 vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
1946 vout_CloseAndRelease( p_sys->p_vout );
1949 /* Try to open a new video output */
1950 p_sys->p_vout = SpawnRealVout( p_vout );
1952 if( p_sys->p_vout == NULL )
1954 /* Everything failed */
1955 msg_Err( p_vout, "cannot open vout, aborting" );
1957 vlc_mutex_unlock( &p_sys->filter_lock );
1958 return VLC_EGENERIC;
1961 vout_filter_AddChild( p_vout, p_sys->p_vout, MouseEvent );
1963 vlc_mutex_unlock( &p_sys->filter_lock );
1967 /*****************************************************************************
1968 * video filter2 functions
1969 *****************************************************************************/
1970 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1972 vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1973 picture_t *p_pic_dst;
1975 /* Request output picture */
1976 p_pic_dst = filter_NewPicture( p_filter );
1977 if( p_pic_dst == NULL )
1979 picture_Release( p_pic );
1983 switch( p_vout->p_sys->i_mode )
1985 case DEINTERLACE_DISCARD:
1986 RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1989 case DEINTERLACE_BOB:
1991 RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1992 RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1996 case DEINTERLACE_LINEAR:
1998 RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1999 RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
2001 msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2002 picture_Release( p_pic_dst );
2003 picture_Release( p_pic );
2006 case DEINTERLACE_MEAN:
2007 RenderMean( p_vout, p_pic_dst, p_pic );
2010 case DEINTERLACE_BLEND:
2011 RenderBlend( p_vout, p_pic_dst, p_pic );
2015 RenderX( p_pic_dst, p_pic );
2018 case DEINTERLACE_YADIF:
2019 msg_Err( p_vout, "delaying frames is not supported yet" );
2020 picture_Release( p_pic_dst );
2021 picture_Release( p_pic );
2024 case DEINTERLACE_YADIF2X:
2025 msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2026 picture_Release( p_pic_dst );
2027 picture_Release( p_pic );
2031 picture_CopyProperties( p_pic_dst, p_pic );
2032 p_pic_dst->b_progressive = true;
2034 picture_Release( p_pic );
2038 /*****************************************************************************
2040 *****************************************************************************/
2041 static int OpenFilter( vlc_object_t *p_this )
2043 filter_t *p_filter = (filter_t*)p_this;
2044 vout_thread_t *p_vout;
2047 if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
2048 return VLC_EGENERIC;
2050 /* Impossible to use VLC_OBJECT_VOUT here because it would be used
2052 p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
2053 vlc_object_attach( p_vout, p_filter );
2054 p_filter->p_sys = (filter_sys_t *)p_vout;
2055 p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
2057 config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2059 var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
2061 var_Create( p_filter, "filter-deinterlace-mode", VLC_VAR_STRING );
2062 var_Set( p_filter, "filter-deinterlace-mode", val );
2063 free( val.psz_string );
2065 if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
2067 vlc_object_detach( p_vout );
2068 vlc_object_release( p_vout );
2069 return VLC_EGENERIC;
2073 GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
2074 if( !p_filter->b_allow_fmt_out_change &&
2075 ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
2076 fmt.i_height != p_filter->fmt_in.video.i_height ) )
2078 CloseFilter( VLC_OBJECT(p_filter) );
2079 return VLC_EGENERIC;
2081 p_filter->fmt_out.video = fmt;
2082 p_filter->fmt_out.i_codec = fmt.i_chroma;
2083 p_filter->pf_video_filter = Deinterlace;
2085 msg_Dbg( p_filter, "deinterlacing" );
2090 /*****************************************************************************
2091 * CloseFilter: clean up the filter
2092 *****************************************************************************/
2093 static void CloseFilter( vlc_object_t *p_this )
2095 filter_t *p_filter = (filter_t*)p_this;
2096 vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2098 Destroy( VLC_OBJECT(p_vout) );
2099 vlc_object_detach( p_vout );
2100 vlc_object_release( p_vout );