1 /*****************************************************************************
2 * deinterlace.c : deinterlacer plugin for vlc
3 *****************************************************************************
4 * Copyright (C) 2000-2009 the VideoLAN team
7 * Author: Sam Hocevar <sam@zoy.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
41 #include <vlc_filter.h>
44 #ifdef CAN_COMPILE_MMXEXT
48 #include "filter_common.h"
50 #define DEINTERLACE_DISCARD 1
51 #define DEINTERLACE_MEAN 2
52 #define DEINTERLACE_BLEND 3
53 #define DEINTERLACE_BOB 4
54 #define DEINTERLACE_LINEAR 5
55 #define DEINTERLACE_X 6
56 #define DEINTERLACE_YADIF 7
57 #define DEINTERLACE_YADIF2X 8
59 /*****************************************************************************
61 *****************************************************************************/
62 static int Create ( vlc_object_t * );
63 static void Destroy ( vlc_object_t * );
65 static int Init ( vout_thread_t * );
66 static void End ( vout_thread_t * );
67 static void Render ( vout_thread_t *, picture_t * );
69 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
70 vlc_value_t oldval, vlc_value_t newval, void *p_data );
72 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
73 static void RenderBob ( vout_thread_t *, picture_t *, picture_t *, int );
74 static void RenderMean ( vout_thread_t *, picture_t *, picture_t * );
75 static void RenderBlend ( vout_thread_t *, picture_t *, picture_t * );
76 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
77 static void RenderX ( picture_t *, picture_t * );
78 static void RenderYadif ( vout_thread_t *, picture_t *, picture_t *, int, int );
80 static void MergeGeneric ( void *, const void *, const void *, size_t );
81 #if defined(CAN_COMPILE_C_ALTIVEC)
82 static void MergeAltivec ( void *, const void *, const void *, size_t );
84 #if defined(CAN_COMPILE_MMXEXT)
85 static void MergeMMXEXT ( void *, const void *, const void *, size_t );
87 #if defined(CAN_COMPILE_3DNOW)
88 static void Merge3DNow ( void *, const void *, const void *, size_t );
90 #if defined(CAN_COMPILE_SSE)
91 static void MergeSSE2 ( void *, const void *, const void *, size_t );
93 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
94 static void EndMMX ( void );
96 #if defined(CAN_COMPILE_3DNOW)
97 static void End3DNow ( void );
99 #if defined __ARM_NEON__
100 static void MergeNEON (void *, const void *, const void *, size_t);
103 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method );
104 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
106 static int OpenFilter( vlc_object_t *p_this );
107 static void CloseFilter( vlc_object_t *p_this );
109 /*****************************************************************************
110 * Callback prototypes
111 *****************************************************************************/
112 static int FilterCallback( vlc_object_t *, char const *,
113 vlc_value_t, vlc_value_t, void * );
115 /*****************************************************************************
117 *****************************************************************************/
118 #define MODE_TEXT N_("Deinterlace mode")
119 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
121 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
122 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
124 #define FILTER_CFG_PREFIX "sout-deinterlace-"
126 static const char *const mode_list[] = {
127 "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
128 static const char *const mode_list_text[] = {
129 N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
132 set_description( N_("Deinterlacing video filter") )
133 set_shortname( N_("Deinterlace" ))
134 set_capability( "video filter", 0 )
135 set_category( CAT_VIDEO )
136 set_subcategory( SUBCAT_VIDEO_VFILTER )
138 set_section( N_("Display"),NULL)
139 add_string( "filter-deinterlace-mode", "discard", NULL, MODE_TEXT,
140 MODE_LONGTEXT, false )
141 change_string_list( mode_list, mode_list_text, 0 )
144 add_shortcut( "deinterlace" )
145 set_callbacks( Create, Destroy )
148 set_capability( "video filter2", 0 )
149 set_section( N_("Streaming"),NULL)
150 add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
151 SOUT_MODE_LONGTEXT, false )
152 change_string_list( mode_list, mode_list_text, 0 )
153 add_shortcut( "deinterlace" )
154 set_callbacks( OpenFilter, CloseFilter )
157 static const char *const ppsz_filter_options[] = {
161 /*****************************************************************************
162 * vout_sys_t: Deinterlace video output method descriptor
163 *****************************************************************************
164 * This structure is part of the video output thread descriptor.
165 * It describes the Deinterlace specific properties of an output thread.
166 *****************************************************************************/
167 #define HISTORY_SIZE (3)
170 int i_mode; /* Deinterlace mode */
171 bool b_double_rate; /* Shall we double the framerate? */
172 bool b_half_height; /* Shall be devide the height by 2 */
177 vout_thread_t *p_vout;
179 vlc_mutex_t filter_lock;
181 void (*pf_merge) ( void *, const void *, const void *, size_t );
182 void (*pf_end_merge) ( void );
185 picture_t *pp_history[HISTORY_SIZE];
188 /*****************************************************************************
189 * Control: control facility for the vout (forwards to child vout)
190 *****************************************************************************/
191 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
193 return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
196 /*****************************************************************************
197 * Create: allocates Deinterlace video thread output method
198 *****************************************************************************
199 * This function allocates and initializes a Deinterlace vout method.
200 *****************************************************************************/
201 static int Create( vlc_object_t *p_this )
203 vout_thread_t *p_vout = (vout_thread_t *)p_this;
207 /* Allocate structure */
208 p_sys = p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
209 if( p_vout->p_sys == NULL )
212 p_vout->pf_init = Init;
213 p_vout->pf_end = End;
214 p_vout->pf_manage = NULL;
215 p_vout->pf_render = Render;
216 p_vout->pf_display = NULL;
217 p_vout->pf_control = Control;
219 p_sys->i_mode = DEINTERLACE_DISCARD;
220 p_sys->b_double_rate = false;
221 p_sys->b_half_height = true;
222 p_sys->last_date = 0;
224 vlc_mutex_init( &p_sys->filter_lock );
226 #if defined(CAN_COMPILE_C_ALTIVEC)
227 if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
229 p_sys->pf_merge = MergeAltivec;
230 p_sys->pf_end_merge = NULL;
234 #if defined(CAN_COMPILE_SSE)
235 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
237 p_sys->pf_merge = MergeSSE2;
238 p_sys->pf_end_merge = EndMMX;
242 #if defined(CAN_COMPILE_MMXEXT)
243 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
245 p_sys->pf_merge = MergeMMXEXT;
246 p_sys->pf_end_merge = EndMMX;
250 #if defined(CAN_COMPILE_3DNOW)
251 if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
253 p_sys->pf_merge = Merge3DNow;
254 p_sys->pf_end_merge = End3DNow;
258 #if defined __ARM_NEON__
259 if( vlc_CPU() & CPU_CAPABILITY_NEON )
261 p_sys->pf_merge = MergeNEON;
262 p_sys->pf_end_merge = NULL;
267 p_sys->pf_merge = MergeGeneric;
268 p_sys->pf_end_merge = NULL;
271 /* Look what method was requested */
272 psz_mode = var_CreateGetString( p_vout, "filter-deinterlace-mode" );
276 msg_Err( p_vout, "configuration variable filter-deinterlace-mode empty" );
277 msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
279 psz_mode = strdup( "discard" );
282 SetFilterMethod( p_vout, psz_mode );
289 /*****************************************************************************
290 * SetFilterMethod: setup the deinterlace method to use.
291 *****************************************************************************/
292 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method )
294 vout_sys_t *p_sys = p_vout->p_sys;
295 if( !strcmp( psz_method, "mean" ) )
297 p_sys->i_mode = DEINTERLACE_MEAN;
298 p_sys->b_double_rate = false;
299 p_sys->b_half_height = true;
301 else if( !strcmp( psz_method, "blend" )
302 || !strcmp( psz_method, "average" )
303 || !strcmp( psz_method, "combine-fields" ) )
305 p_sys->i_mode = DEINTERLACE_BLEND;
306 p_sys->b_double_rate = false;
307 p_sys->b_half_height = false;
309 else if( !strcmp( psz_method, "bob" )
310 || !strcmp( psz_method, "progressive-scan" ) )
312 p_sys->i_mode = DEINTERLACE_BOB;
313 p_sys->b_double_rate = true;
314 p_sys->b_half_height = false;
316 else if( !strcmp( psz_method, "linear" ) )
318 p_sys->i_mode = DEINTERLACE_LINEAR;
319 p_sys->b_double_rate = true;
320 p_sys->b_half_height = false;
322 else if( !strcmp( psz_method, "x" ) )
324 p_sys->i_mode = DEINTERLACE_X;
325 p_sys->b_double_rate = false;
326 p_sys->b_half_height = false;
328 else if( !strcmp( psz_method, "yadif" ) )
330 p_sys->i_mode = DEINTERLACE_YADIF;
331 p_sys->b_double_rate = false;
332 p_sys->b_half_height = false;
334 else if( !strcmp( psz_method, "yadif2x" ) )
336 p_sys->i_mode = DEINTERLACE_YADIF2X;
337 p_sys->b_double_rate = true;
338 p_sys->b_half_height = false;
342 const bool b_i422 = p_vout->render.i_chroma == VLC_CODEC_I422 ||
343 p_vout->render.i_chroma == VLC_CODEC_J422;
344 if( strcmp( psz_method, "discard" ) )
345 msg_Err( p_vout, "no valid deinterlace mode provided, "
346 "using \"discard\"" );
348 p_sys->i_mode = DEINTERLACE_DISCARD;
349 p_sys->b_double_rate = false;
350 p_sys->b_half_height = !b_i422;
353 msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
356 static void GetOutputFormat( vout_thread_t *p_vout,
357 video_format_t *p_dst, const video_format_t *p_src )
361 if( p_vout->p_sys->b_half_height )
363 p_dst->i_height /= 2;
364 p_dst->i_visible_height /= 2;
365 p_dst->i_y_offset /= 2;
366 p_dst->i_sar_den *= 2;
369 if( p_src->i_chroma == VLC_CODEC_I422 ||
370 p_src->i_chroma == VLC_CODEC_J422 )
372 switch( p_vout->p_sys->i_mode )
374 case DEINTERLACE_MEAN:
375 case DEINTERLACE_LINEAR:
377 case DEINTERLACE_YADIF:
378 case DEINTERLACE_YADIF2X:
379 p_dst->i_chroma = p_src->i_chroma;
382 p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
389 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
391 return i_chroma == VLC_CODEC_I420 ||
392 i_chroma == VLC_CODEC_J420 ||
393 i_chroma == VLC_CODEC_YV12 ||
394 i_chroma == VLC_CODEC_I422 ||
395 i_chroma == VLC_CODEC_J422;
398 /*****************************************************************************
399 * Init: initialize Deinterlace video thread output method
400 *****************************************************************************/
401 static int Init( vout_thread_t *p_vout )
403 I_OUTPUTPICTURES = 0;
405 if( !IsChromaSupported( p_vout->render.i_chroma ) )
406 return VLC_EGENERIC; /* unknown chroma */
408 /* Initialize the output structure, full of directbuffers since we want
409 * the decoder to output directly to our structures. */
410 p_vout->output.i_chroma = p_vout->render.i_chroma;
411 p_vout->output.i_width = p_vout->render.i_width;
412 p_vout->output.i_height = p_vout->render.i_height;
413 p_vout->output.i_aspect = p_vout->render.i_aspect;
414 p_vout->fmt_out = p_vout->fmt_in;
416 /* Try to open the real video output */
417 p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
419 if( p_vout->p_sys->p_vout == NULL )
421 /* Everything failed */
422 msg_Err( p_vout, "cannot open vout, aborting" );
427 for( int i = 0; i < HISTORY_SIZE; i++ )
428 p_vout->p_sys->pp_history[i] = NULL;
430 vout_filter_AllocateDirectBuffers( p_vout, VOUT_MAX_PICTURES );
432 vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
434 var_AddCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
439 /*****************************************************************************
440 * SpawnRealVout: spawn the real video output.
441 *****************************************************************************/
442 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
444 msg_Dbg( p_vout, "spawning the real video output" );
447 GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
449 return vout_Create( p_vout, &fmt );
452 /*****************************************************************************
453 * End: terminate Deinterlace video thread output method
454 *****************************************************************************/
455 static void End( vout_thread_t *p_vout )
457 vout_sys_t *p_sys = p_vout->p_sys;
459 var_DelCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
461 for( int i = 0; i < HISTORY_SIZE; i++ )
463 if( p_sys->pp_history[i] )
464 picture_Release( p_sys->pp_history[i] );
469 vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
470 vout_CloseAndRelease( p_sys->p_vout );
473 vout_filter_ReleaseDirectBuffers( p_vout );
476 /*****************************************************************************
477 * Destroy: destroy Deinterlace video thread output method
478 *****************************************************************************
479 * Terminate an output method created by DeinterlaceCreateOutputMethod
480 *****************************************************************************/
481 static void Destroy( vlc_object_t *p_this )
483 vout_thread_t *p_vout = (vout_thread_t *)p_this;
484 vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
485 free( p_vout->p_sys );
489 * Forward mouse event with proper conversion.
491 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
492 vlc_value_t oldval, vlc_value_t newval, void *p_data )
494 vout_thread_t *p_vout = p_data;
495 VLC_UNUSED(p_this); VLC_UNUSED(oldval);
497 if( !strcmp( psz_var, "mouse-y" ) && p_vout->p_sys->b_half_height )
500 return var_Set( p_vout, psz_var, newval );
503 /*****************************************************************************
504 * Render: displays previously rendered output
505 *****************************************************************************
506 * This function send the currently rendered image to Deinterlace image,
507 * waits until it is displayed and switch the two rendering buffers, preparing
509 *****************************************************************************/
510 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
512 vout_sys_t *p_sys = p_vout->p_sys;
513 picture_t *pp_outpic[2];
515 /* FIXME are they needed ? */
516 p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
517 p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
518 p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
519 p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
521 /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
522 p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
523 p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
524 p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
525 p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
526 if( p_vout->p_sys->b_half_height )
528 p_sys->p_vout->fmt_in.i_y_offset /= 2;
529 p_sys->p_vout->fmt_in.i_visible_height /= 2;
532 if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
534 p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
536 p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
537 p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
539 video_format_t fmt = p_vout->fmt_out;
540 if( p_vout->p_sys->b_half_height )
542 fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
546 p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
551 pp_outpic[0] = pp_outpic[1] = NULL;
553 vlc_mutex_lock( &p_vout->p_sys->filter_lock );
555 /* Get a new picture */
556 while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
560 if( !vlc_object_alive( p_vout ) || p_vout->b_error )
562 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
565 msleep( VOUT_OUTMEM_SLEEP );
568 pp_outpic[0]->date = p_pic->date;
570 /* If we are using double rate, get an additional new picture */
571 if( p_vout->p_sys->b_double_rate )
573 while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
577 if( !vlc_object_alive( p_vout ) || p_vout->b_error )
579 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
580 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
583 msleep( VOUT_OUTMEM_SLEEP );
586 /* 20ms is a bit arbitrary, but it's only for the first image we get */
587 if( !p_vout->p_sys->last_date )
588 pp_outpic[1]->date = p_pic->date + 20000;
590 pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
591 p_vout->p_sys->last_date = p_pic->date;
594 switch( p_vout->p_sys->i_mode )
596 case DEINTERLACE_DISCARD:
597 RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
598 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
601 case DEINTERLACE_BOB:
602 RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
603 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
604 RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
605 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
608 case DEINTERLACE_LINEAR:
609 RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
610 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
611 RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
612 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
615 case DEINTERLACE_MEAN:
616 RenderMean( p_vout, pp_outpic[0], p_pic );
617 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
620 case DEINTERLACE_BLEND:
621 RenderBlend( p_vout, pp_outpic[0], p_pic );
622 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
626 RenderX( pp_outpic[0], p_pic );
627 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
630 case DEINTERLACE_YADIF:
631 RenderYadif( p_vout, pp_outpic[0], p_pic, 0, 0 );
632 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
635 case DEINTERLACE_YADIF2X:
636 RenderYadif( p_vout, pp_outpic[0], p_pic, 0, p_pic->b_top_field_first ? 0 : 1 );
637 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
638 RenderYadif( p_vout, pp_outpic[1], p_pic, 1, p_pic->b_top_field_first ? 1 : 0 );
639 vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
642 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
645 /*****************************************************************************
646 * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
647 *****************************************************************************/
648 static void RenderDiscard( vout_thread_t *p_vout,
649 picture_t *p_outpic, picture_t *p_pic, int i_field )
653 /* Copy image and skip lines */
654 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
656 uint8_t *p_in, *p_out_end, *p_out;
659 p_in = p_pic->p[i_plane].p_pixels
660 + i_field * p_pic->p[i_plane].i_pitch;
662 p_out = p_outpic->p[i_plane].p_pixels;
663 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
664 * p_outpic->p[i_plane].i_visible_lines;
666 switch( p_vout->render.i_chroma )
672 for( ; p_out < p_out_end ; )
674 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
676 p_out += p_outpic->p[i_plane].i_pitch;
677 p_in += 2 * p_pic->p[i_plane].i_pitch;
684 i_increment = 2 * p_pic->p[i_plane].i_pitch;
686 if( i_plane == Y_PLANE )
688 for( ; p_out < p_out_end ; )
690 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
691 p_out += p_outpic->p[i_plane].i_pitch;
692 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
693 p_out += p_outpic->p[i_plane].i_pitch;
699 for( ; p_out < p_out_end ; )
701 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
702 p_out += p_outpic->p[i_plane].i_pitch;
714 /*****************************************************************************
715 * RenderBob: renders a BOB picture - simple copy
716 *****************************************************************************/
717 static void RenderBob( vout_thread_t *p_vout,
718 picture_t *p_outpic, picture_t *p_pic, int i_field )
722 /* Copy image and skip lines */
723 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
725 uint8_t *p_in, *p_out_end, *p_out;
727 p_in = p_pic->p[i_plane].p_pixels;
728 p_out = p_outpic->p[i_plane].p_pixels;
729 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
730 * p_outpic->p[i_plane].i_visible_lines;
732 switch( p_vout->render.i_chroma )
737 /* For BOTTOM field we need to add the first line */
740 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
741 p_in += p_pic->p[i_plane].i_pitch;
742 p_out += p_outpic->p[i_plane].i_pitch;
745 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
747 for( ; p_out < p_out_end ; )
749 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
751 p_out += p_outpic->p[i_plane].i_pitch;
753 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
755 p_in += 2 * p_pic->p[i_plane].i_pitch;
756 p_out += p_outpic->p[i_plane].i_pitch;
759 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
761 /* For TOP field we need to add the last line */
764 p_in += p_pic->p[i_plane].i_pitch;
765 p_out += p_outpic->p[i_plane].i_pitch;
766 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
772 /* For BOTTOM field we need to add the first line */
775 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
776 p_in += p_pic->p[i_plane].i_pitch;
777 p_out += p_outpic->p[i_plane].i_pitch;
780 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
782 if( i_plane == Y_PLANE )
784 for( ; p_out < p_out_end ; )
786 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
788 p_out += p_outpic->p[i_plane].i_pitch;
790 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
792 p_in += 2 * p_pic->p[i_plane].i_pitch;
793 p_out += p_outpic->p[i_plane].i_pitch;
798 for( ; p_out < p_out_end ; )
800 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
802 p_out += p_outpic->p[i_plane].i_pitch;
803 p_in += 2 * p_pic->p[i_plane].i_pitch;
807 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
809 /* For TOP field we need to add the last line */
812 p_in += p_pic->p[i_plane].i_pitch;
813 p_out += p_outpic->p[i_plane].i_pitch;
814 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
821 #define Merge p_vout->p_sys->pf_merge
822 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
824 /*****************************************************************************
825 * RenderLinear: BOB with linear interpolation
826 *****************************************************************************/
827 static void RenderLinear( vout_thread_t *p_vout,
828 picture_t *p_outpic, picture_t *p_pic, int i_field )
832 /* Copy image and skip lines */
833 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
835 uint8_t *p_in, *p_out_end, *p_out;
837 p_in = p_pic->p[i_plane].p_pixels;
838 p_out = p_outpic->p[i_plane].p_pixels;
839 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
840 * p_outpic->p[i_plane].i_visible_lines;
842 /* For BOTTOM field we need to add the first line */
845 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
846 p_in += p_pic->p[i_plane].i_pitch;
847 p_out += p_outpic->p[i_plane].i_pitch;
850 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
852 for( ; p_out < p_out_end ; )
854 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
856 p_out += p_outpic->p[i_plane].i_pitch;
858 Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
859 p_pic->p[i_plane].i_pitch );
861 p_in += 2 * p_pic->p[i_plane].i_pitch;
862 p_out += p_outpic->p[i_plane].i_pitch;
865 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
867 /* For TOP field we need to add the last line */
870 p_in += p_pic->p[i_plane].i_pitch;
871 p_out += p_outpic->p[i_plane].i_pitch;
872 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
878 static void RenderMean( vout_thread_t *p_vout,
879 picture_t *p_outpic, picture_t *p_pic )
883 /* Copy image and skip lines */
884 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
886 uint8_t *p_in, *p_out_end, *p_out;
888 p_in = p_pic->p[i_plane].p_pixels;
890 p_out = p_outpic->p[i_plane].p_pixels;
891 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
892 * p_outpic->p[i_plane].i_visible_lines;
894 /* All lines: mean value */
895 for( ; p_out < p_out_end ; )
897 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
898 p_pic->p[i_plane].i_pitch );
900 p_out += p_outpic->p[i_plane].i_pitch;
901 p_in += 2 * p_pic->p[i_plane].i_pitch;
907 static void RenderBlend( vout_thread_t *p_vout,
908 picture_t *p_outpic, picture_t *p_pic )
912 /* Copy image and skip lines */
913 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
915 uint8_t *p_in, *p_out_end, *p_out;
917 p_in = p_pic->p[i_plane].p_pixels;
919 p_out = p_outpic->p[i_plane].p_pixels;
920 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
921 * p_outpic->p[i_plane].i_visible_lines;
923 switch( p_vout->render.i_chroma )
928 /* First line: simple copy */
929 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
930 p_out += p_outpic->p[i_plane].i_pitch;
932 /* Remaining lines: mean value */
933 for( ; p_out < p_out_end ; )
935 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
936 p_pic->p[i_plane].i_pitch );
938 p_out += p_outpic->p[i_plane].i_pitch;
939 p_in += p_pic->p[i_plane].i_pitch;
945 /* First line: simple copy */
946 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
947 p_out += p_outpic->p[i_plane].i_pitch;
949 /* Remaining lines: mean value */
950 if( i_plane == Y_PLANE )
952 for( ; p_out < p_out_end ; )
954 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
955 p_pic->p[i_plane].i_pitch );
957 p_out += p_outpic->p[i_plane].i_pitch;
958 p_in += p_pic->p[i_plane].i_pitch;
964 for( ; p_out < p_out_end ; )
966 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
967 p_pic->p[i_plane].i_pitch );
969 p_out += p_outpic->p[i_plane].i_pitch;
970 p_in += 2*p_pic->p[i_plane].i_pitch;
981 static void MergeGeneric( void *_p_dest, const void *_p_s1,
982 const void *_p_s2, size_t i_bytes )
984 uint8_t* p_dest = (uint8_t*)_p_dest;
985 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
986 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
987 uint8_t* p_end = p_dest + i_bytes - 8;
989 while( p_dest < p_end )
991 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
992 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
993 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
994 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
995 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
996 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
997 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
998 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1003 while( p_dest < p_end )
1005 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1009 #if defined(CAN_COMPILE_MMXEXT)
1010 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
1013 uint8_t* p_dest = (uint8_t*)_p_dest;
1014 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1015 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1016 uint8_t* p_end = p_dest + i_bytes - 8;
1017 while( p_dest < p_end )
1019 __asm__ __volatile__( "movq %2,%%mm1;"
1021 "movq %%mm1, %0" :"=m" (*p_dest):
1031 while( p_dest < p_end )
1033 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1038 #if defined(CAN_COMPILE_3DNOW)
1039 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
1042 uint8_t* p_dest = (uint8_t*)_p_dest;
1043 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1044 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1045 uint8_t* p_end = p_dest + i_bytes - 8;
1046 while( p_dest < p_end )
1048 __asm__ __volatile__( "movq %2,%%mm1;"
1049 "pavgusb %1, %%mm1;"
1050 "movq %%mm1, %0" :"=m" (*p_dest):
1060 while( p_dest < p_end )
1062 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1067 #if defined(CAN_COMPILE_SSE)
1068 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1071 uint8_t* p_dest = (uint8_t*)_p_dest;
1072 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1073 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1075 while( (uintptr_t)p_s1 % 16 )
1077 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1079 p_end = p_dest + i_bytes - 16;
1080 while( p_dest < p_end )
1082 __asm__ __volatile__( "movdqu %2,%%xmm1;"
1084 "movdqu %%xmm1, %0" :"=m" (*p_dest):
1094 while( p_dest < p_end )
1096 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1101 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1102 static void EndMMX( void )
1104 __asm__ __volatile__( "emms" :: );
1108 #if defined(CAN_COMPILE_3DNOW)
1109 static void End3DNow( void )
1111 __asm__ __volatile__( "femms" :: );
1115 #ifdef CAN_COMPILE_C_ALTIVEC
1116 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1117 const void *_p_s2, size_t i_bytes )
1119 uint8_t *p_dest = (uint8_t *)_p_dest;
1120 uint8_t *p_s1 = (uint8_t *)_p_s1;
1121 uint8_t *p_s2 = (uint8_t *)_p_s2;
1122 uint8_t *p_end = p_dest + i_bytes - 15;
1124 /* Use C until the first 16-bytes aligned destination pixel */
1125 while( (uintptr_t)p_dest & 0xF )
1127 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1130 if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1132 /* Unaligned source */
1133 vector unsigned char s1v, s2v, destv;
1134 vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1135 vector unsigned char perm1v, perm2v;
1137 perm1v = vec_lvsl( 0, p_s1 );
1138 perm2v = vec_lvsl( 0, p_s2 );
1139 s1oldv = vec_ld( 0, p_s1 );
1140 s2oldv = vec_ld( 0, p_s2 );
1142 while( p_dest < p_end )
1144 s1newv = vec_ld( 16, p_s1 );
1145 s2newv = vec_ld( 16, p_s2 );
1146 s1v = vec_perm( s1oldv, s1newv, perm1v );
1147 s2v = vec_perm( s2oldv, s2newv, perm2v );
1150 destv = vec_avg( s1v, s2v );
1151 vec_st( destv, 0, p_dest );
1160 /* Aligned source */
1161 vector unsigned char s1v, s2v, destv;
1163 while( p_dest < p_end )
1165 s1v = vec_ld( 0, p_s1 );
1166 s2v = vec_ld( 0, p_s2 );
1167 destv = vec_avg( s1v, s2v );
1168 vec_st( destv, 0, p_dest );
1178 while( p_dest < p_end )
1180 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1186 static void MergeNEON (void *restrict out, const void *in1,
1187 const void *in2, size_t n)
1189 uint8_t *outp = out;
1190 const uint8_t *in1p = in1;
1191 const uint8_t *in2p = in2;
1192 size_t mis = ((uintptr_t)outp) & 15;
1196 MergeGeneric (outp, in1p, in2p, mis);
1203 uint8_t *end = outp + (n & ~15);
1205 if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
1208 "vld1.u8 {q0-q1}, [%[in1]]!\n"
1209 "vld1.u8 {q2-q3}, [%[in2]]!\n"
1210 "vhadd.u8 q4, q0, q2\n"
1211 "vld1.u8 {q6-q7}, [%[in1]]!\n"
1212 "vhadd.u8 q5, q1, q3\n"
1213 "vld1.u8 {q8-q9}, [%[in2]]!\n"
1214 "vhadd.u8 q10, q6, q8\n"
1215 "vhadd.u8 q11, q7, q9\n"
1216 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
1217 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
1218 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1220 : "q0", "q1", "q2", "memory");
1224 "vld1.u8 {q0-q1}, [%[in1],:128]!\n"
1225 "vld1.u8 {q2-q3}, [%[in2],:128]!\n"
1226 "vhadd.u8 q4, q0, q2\n"
1227 "vld1.u8 {q6-q7}, [%[in1],:128]!\n"
1228 "vhadd.u8 q5, q1, q3\n"
1229 "vld1.u8 {q8-q9}, [%[in2],:128]!\n"
1230 "vhadd.u8 q10, q6, q8\n"
1231 "vhadd.u8 q11, q7, q9\n"
1232 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
1233 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
1234 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1236 : "q0", "q1", "q2", "memory");
1239 MergeGeneric (outp, in1p, in2p, n);
1243 /*****************************************************************************
1244 * RenderX: This algo works on a 8x8 block basic, it copies the top field
1245 * and apply a process to recreate the bottom field :
1246 * If a 8x8 block is classified as :
1247 * - progressive: it applies a small blend (1,6,1)
1249 * * in the MMX version: we do a ME between the 2 fields, if there is a
1250 * good match we use MC to recreate the bottom field (with a small
1252 * * otherwise: it recreates the bottom field by an edge oriented
1254 *****************************************************************************/
1256 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1257 * XXX: It need to access to 8x10
1258 * We use more than 8 lines to help with scrolling (text)
1259 * (and because XDeint8x8Frame use line 9)
1260 * XXX: smooth/uniform area with noise detection doesn't works well
1261 * but it's not really a problem because they don't have much details anyway
1263 static inline int ssd( int a ) { return a*a; }
1264 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1270 /* Detect interlacing */
1272 for( y = 0; y < 7; y += 2 )
1275 for( x = 0; x < 8; x++ )
1277 fr += ssd(src[ x] - src[1*i_src+x]) +
1278 ssd(src[i_src+x] - src[2*i_src+x]);
1279 ff += ssd(src[ x] - src[2*i_src+x]) +
1280 ssd(src[i_src+x] - src[3*i_src+x]);
1282 if( ff < 6*fr/8 && fr > 32 )
1288 return fc < 1 ? false : true;
1290 #ifdef CAN_COMPILE_MMXEXT
1291 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1298 /* Detect interlacing */
1300 pxor_r2r( mm7, mm7 );
1301 for( y = 0; y < 9; y += 2 )
1304 pxor_r2r( mm5, mm5 );
1305 pxor_r2r( mm6, mm6 );
1306 for( x = 0; x < 8; x+=4 )
1308 movd_m2r( src[ x], mm0 );
1309 movd_m2r( src[1*i_src+x], mm1 );
1310 movd_m2r( src[2*i_src+x], mm2 );
1311 movd_m2r( src[3*i_src+x], mm3 );
1313 punpcklbw_r2r( mm7, mm0 );
1314 punpcklbw_r2r( mm7, mm1 );
1315 punpcklbw_r2r( mm7, mm2 );
1316 punpcklbw_r2r( mm7, mm3 );
1318 movq_r2r( mm0, mm4 );
1320 psubw_r2r( mm1, mm0 );
1321 psubw_r2r( mm2, mm4 );
1323 psubw_r2r( mm1, mm2 );
1324 psubw_r2r( mm1, mm3 );
1326 pmaddwd_r2r( mm0, mm0 );
1327 pmaddwd_r2r( mm4, mm4 );
1328 pmaddwd_r2r( mm2, mm2 );
1329 pmaddwd_r2r( mm3, mm3 );
1330 paddd_r2r( mm0, mm2 );
1331 paddd_r2r( mm4, mm3 );
1332 paddd_r2r( mm2, mm5 );
1333 paddd_r2r( mm3, mm6 );
1336 movq_r2r( mm5, mm0 );
1337 psrlq_i2r( 32, mm0 );
1338 paddd_r2r( mm0, mm5 );
1339 movd_r2m( mm5, fr );
1341 movq_r2r( mm6, mm0 );
1342 psrlq_i2r( 32, mm0 );
1343 paddd_r2r( mm0, mm6 );
1344 movd_r2m( mm6, ff );
1346 if( ff < 6*fr/8 && fr > 32 )
1355 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1356 uint8_t *src1, int i_src1,
1357 uint8_t *src2, int i_src2 )
1362 for( y = 0; y < 8; y += 2 )
1364 memcpy( dst, src1, 8 );
1367 for( x = 0; x < 8; x++ )
1368 dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1376 #ifdef CAN_COMPILE_MMXEXT
1377 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1378 uint8_t *src1, int i_src1,
1379 uint8_t *src2, int i_src2 )
1381 static const uint64_t m_4 = INT64_C(0x0004000400040004);
1385 pxor_r2r( mm7, mm7 );
1386 for( y = 0; y < 8; y += 2 )
1388 for( x = 0; x < 8; x +=4 )
1390 movd_m2r( src1[x], mm0 );
1391 movd_r2m( mm0, dst[x] );
1393 movd_m2r( src2[x], mm1 );
1394 movd_m2r( src1[i_src1+x], mm2 );
1396 punpcklbw_r2r( mm7, mm0 );
1397 punpcklbw_r2r( mm7, mm1 );
1398 punpcklbw_r2r( mm7, mm2 );
1399 paddw_r2r( mm1, mm1 );
1400 movq_r2r( mm1, mm3 );
1401 paddw_r2r( mm3, mm3 );
1402 paddw_r2r( mm2, mm0 );
1403 paddw_r2r( mm3, mm1 );
1404 paddw_m2r( m_4, mm1 );
1405 paddw_r2r( mm1, mm0 );
1406 psraw_i2r( 3, mm0 );
1407 packuswb_r2r( mm7, mm0 );
1408 movd_r2m( mm0, dst[i_dst+x] );
1419 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1422 for( y = 0; y < 8; y++ )
1423 memset( &dst[y*i_dst], v, 8 );
1426 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1429 * TODO: a better one for the inner part.
1431 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1432 uint8_t *src, int i_src )
1437 for( y = 0; y < 8; y += 2 )
1439 memcpy( dst, src, 8 );
1442 for( x = 0; x < 8; x++ )
1443 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1448 #ifdef CAN_COMPILE_MMXEXT
1449 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1450 uint8_t *src, int i_src )
1455 for( y = 0; y < 8; y += 2 )
1457 movq_m2r( src[0], mm0 );
1458 movq_r2m( mm0, dst[0] );
1461 movq_m2r( src[2*i_src], mm1 );
1462 pavgb_r2r( mm1, mm0 );
1464 movq_r2m( mm0, dst[0] );
1472 /* XDeint8x8Field: Edge oriented interpolation
1473 * (Need -4 and +5 pixels H, +1 line)
1475 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1476 uint8_t *src, int i_src )
1481 for( y = 0; y < 8; y += 2 )
1483 memcpy( dst, src, 8 );
1486 for( x = 0; x < 8; x++ )
1488 uint8_t *src2 = &src[2*i_src];
1489 /* I use 8 pixels just to match the MMX version, but it's overkill
1490 * 5 would be enough (less isn't good) */
1491 const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1492 abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1493 abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1494 abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1496 const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1497 abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1498 abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1499 abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1501 const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1502 abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1503 abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1504 abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1506 if( c0 < c1 && c1 <= c2 )
1507 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1508 else if( c2 < c1 && c1 <= c0 )
1509 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1511 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1518 #ifdef CAN_COMPILE_MMXEXT
1519 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1520 uint8_t *src, int i_src )
1525 for( y = 0; y < 8; y += 2 )
1527 memcpy( dst, src, 8 );
1530 for( x = 0; x < 8; x++ )
1532 uint8_t *src2 = &src[2*i_src];
1535 movq_m2r( src[x-2], mm0 );
1536 movq_m2r( src[x-3], mm1 );
1537 movq_m2r( src[x-4], mm2 );
1539 psadbw_m2r( src2[x-4], mm0 );
1540 psadbw_m2r( src2[x-3], mm1 );
1541 psadbw_m2r( src2[x-2], mm2 );
1543 movd_r2m( mm0, c2 );
1544 movd_r2m( mm1, c1 );
1545 movd_r2m( mm2, c0 );
1547 if( c0 < c1 && c1 <= c2 )
1548 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1549 else if( c2 < c1 && c1 <= c0 )
1550 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1552 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1561 /* NxN arbitray size (and then only use pixel in the NxN block)
1563 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1564 int i_height, int i_width )
1571 /* Detect interlacing */
1572 /* FIXME way too simple, need to be more like XDeint8x8Detect */
1575 for( y = 0; y < i_height - 2; y += 2 )
1577 const uint8_t *s = &src[y*i_src];
1578 for( x = 0; x < i_width; x++ )
1580 fr += ssd(s[ x] - s[1*i_src+x]);
1581 ff += ssd(s[ x] - s[2*i_src+x]);
1583 if( ff < fr && fr > i_width / 2 )
1587 return fc < 2 ? false : true;
1590 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1591 uint8_t *src, int i_src,
1592 int i_width, int i_height )
1597 for( y = 0; y < i_height; y += 2 )
1599 memcpy( dst, src, i_width );
1602 if( y < i_height - 2 )
1604 for( x = 0; x < i_width; x++ )
1605 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1609 /* Blend last line */
1610 for( x = 0; x < i_width; x++ )
1611 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1618 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1619 uint8_t *src, int i_src,
1620 int i_width, int i_height )
1625 for( y = 0; y < i_height; y += 2 )
1627 memcpy( dst, src, i_width );
1630 if( y < i_height - 2 )
1632 for( x = 0; x < i_width; x++ )
1633 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1637 /* Blend last line */
1638 for( x = 0; x < i_width; x++ )
1639 dst[x] = (src[x] + src[i_src+x]) >> 1;
1646 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1647 int i_width, int i_height )
1649 if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1650 XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1652 XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1656 static inline int median( int a, int b, int c )
1658 int min = a, max =a;
1669 return a + b + c - min - max;
1675 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1676 uint8_t *src, int i_src,
1677 const int i_mbx, int i_modx )
1681 for( x = 0; x < i_mbx; x++ )
1684 if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1686 if( x == 0 || x == i_mbx - 1 )
1687 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1689 XDeint8x8FieldC( dst, i_dst, src, i_src );
1693 XDeint8x8MergeC( dst, i_dst,
1694 &src[0*i_src], 2*i_src,
1695 &src[1*i_src], 2*i_src );
1703 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1705 #ifdef CAN_COMPILE_MMXEXT
1706 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1707 uint8_t *src, int i_src,
1708 const int i_mbx, int i_modx )
1712 /* Reset current line */
1713 for( x = 0; x < i_mbx; x++ )
1716 if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1718 if( x == 0 || x == i_mbx - 1 )
1719 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1721 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1725 XDeint8x8MergeMMXEXT( dst, i_dst,
1726 &src[0*i_src], 2*i_src,
1727 &src[1*i_src], 2*i_src );
1735 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1739 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1743 /* Copy image and skip lines */
1744 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1746 const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1747 const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1749 const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1750 const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1752 const int i_dst = p_outpic->p[i_plane].i_pitch;
1753 const int i_src = p_pic->p[i_plane].i_pitch;
1757 for( y = 0; y < i_mby; y++ )
1759 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1760 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1762 #ifdef CAN_COMPILE_MMXEXT
1763 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1764 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1767 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1770 /* Last line (C only)*/
1773 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1774 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1776 for( x = 0; x < i_mbx; x++ )
1778 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1785 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1789 #ifdef CAN_COMPILE_MMXEXT
1790 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1795 /*****************************************************************************
1796 * Yadif (Yet Another DeInterlacing Filter).
1797 *****************************************************************************/
1801 * 0: Output 1 frame for each frame.
1802 * 1: Output 1 frame for each field.
1803 * 2: Like 0 but skips spatial interlacing check.
1804 * 3: Like 1 but skips spatial interlacing check.
1806 * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1811 /* I am unsure it is the right one */
1812 typedef intptr_t x86_reg;
1814 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1815 #define FFMAX(a,b) __MAX(a,b)
1816 #define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
1817 #define FFMIN(a,b) __MIN(a,b)
1818 #define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
1820 /* yadif.h comes from vf_yadif.c of mplayer project */
1823 static void RenderYadif( vout_thread_t *p_vout, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1825 vout_sys_t *p_sys = p_vout->p_sys;
1828 assert( i_order == 0 || i_order == 1 );
1829 assert( i_field == 0 || i_field == 1 );
1833 /* Duplicate the picture
1834 * TODO when the vout rework is finished, picture_Hold() might be enough
1835 * but becarefull, the pitches must match */
1836 picture_t *p_dup = picture_NewFromFormat( &p_src->format );
1838 picture_Copy( p_dup, p_src );
1840 /* Slide the history */
1841 if( p_sys->pp_history[0] )
1842 picture_Release( p_sys->pp_history[0] );
1843 for( int i = 1; i < HISTORY_SIZE; i++ )
1844 p_sys->pp_history[i-1] = p_sys->pp_history[i];
1845 p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1848 /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1849 picture_t *p_prev = p_sys->pp_history[0];
1850 picture_t *p_cur = p_sys->pp_history[1];
1851 picture_t *p_next = p_sys->pp_history[2];
1853 /* Filter if we have all the pictures we need */
1854 if( p_prev && p_cur && p_next )
1857 void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1858 #if defined(HAVE_YADIF_SSE2)
1859 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1860 filter = yadif_filter_line_mmx2;
1863 filter = yadif_filter_line_c;
1865 for( int n = 0; n < p_dst->i_planes; n++ )
1867 const plane_t *prevp = &p_prev->p[n];
1868 const plane_t *curp = &p_cur->p[n];
1869 const plane_t *nextp = &p_next->p[n];
1870 plane_t *dstp = &p_dst->p[n];
1872 for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1874 if( (y % 2) == i_field )
1876 vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1877 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1881 struct vf_priv_s cfg;
1882 /* Spatial checks only when enough data */
1883 cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1885 assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1887 &dstp->p_pixels[y * dstp->i_pitch],
1888 &prevp->p_pixels[y * prevp->i_pitch],
1889 &curp->p_pixels[y * curp->i_pitch],
1890 &nextp->p_pixels[y * nextp->i_pitch],
1891 dstp->i_visible_pitch,
1893 (i_field ^ (i_order == i_field)) & 1 );
1896 /* We duplicate the first and last lines */
1898 vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1899 else if( y == dstp->i_visible_lines - 2 )
1900 vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1905 p_dst->date = (p_next->date - p_cur->date) * i_order / 2 + p_cur->date;
1909 /* Fallback to something simple
1910 * XXX it is wrong when we have 2 pictures, we should not output a picture */
1911 RenderX( p_dst, p_src );
1915 /*****************************************************************************
1916 * FilterCallback: called when changing the deinterlace method on the fly.
1917 *****************************************************************************/
1918 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1919 vlc_value_t oldval, vlc_value_t newval,
1922 VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1923 vout_thread_t * p_vout = (vout_thread_t *)p_this;
1924 vout_sys_t *p_sys = p_vout->p_sys;
1926 msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1928 vlc_mutex_lock( &p_sys->filter_lock );
1929 const bool b_old_half_height = p_sys->b_half_height;
1931 SetFilterMethod( p_vout, newval.psz_string );
1933 if( !b_old_half_height == !p_sys->b_half_height )
1935 vlc_mutex_unlock( &p_sys->filter_lock );
1939 /* We need to kill the old vout */
1942 vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
1943 vout_CloseAndRelease( p_sys->p_vout );
1946 /* Try to open a new video output */
1947 p_sys->p_vout = SpawnRealVout( p_vout );
1949 if( p_sys->p_vout == NULL )
1951 /* Everything failed */
1952 msg_Err( p_vout, "cannot open vout, aborting" );
1954 vlc_mutex_unlock( &p_sys->filter_lock );
1955 return VLC_EGENERIC;
1958 vout_filter_AddChild( p_vout, p_sys->p_vout, MouseEvent );
1960 vlc_mutex_unlock( &p_sys->filter_lock );
1964 /*****************************************************************************
1965 * video filter2 functions
1966 *****************************************************************************/
1967 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1969 vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1970 picture_t *p_pic_dst;
1972 /* Request output picture */
1973 p_pic_dst = filter_NewPicture( p_filter );
1974 if( p_pic_dst == NULL )
1976 picture_Release( p_pic );
1980 switch( p_vout->p_sys->i_mode )
1982 case DEINTERLACE_DISCARD:
1983 RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1986 case DEINTERLACE_BOB:
1988 RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1989 RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1993 case DEINTERLACE_LINEAR:
1995 RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1996 RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
1998 msg_Err( p_vout, "doubling the frame rate is not supported yet" );
1999 picture_Release( p_pic_dst );
2000 picture_Release( p_pic );
2003 case DEINTERLACE_MEAN:
2004 RenderMean( p_vout, p_pic_dst, p_pic );
2007 case DEINTERLACE_BLEND:
2008 RenderBlend( p_vout, p_pic_dst, p_pic );
2012 RenderX( p_pic_dst, p_pic );
2015 case DEINTERLACE_YADIF:
2016 msg_Err( p_vout, "delaying frames is not supported yet" );
2017 picture_Release( p_pic_dst );
2018 picture_Release( p_pic );
2021 case DEINTERLACE_YADIF2X:
2022 msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2023 picture_Release( p_pic_dst );
2024 picture_Release( p_pic );
2028 picture_CopyProperties( p_pic_dst, p_pic );
2029 p_pic_dst->b_progressive = true;
2031 picture_Release( p_pic );
2035 /*****************************************************************************
2037 *****************************************************************************/
2038 static int OpenFilter( vlc_object_t *p_this )
2040 filter_t *p_filter = (filter_t*)p_this;
2041 vout_thread_t *p_vout;
2044 if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
2045 return VLC_EGENERIC;
2047 /* Impossible to use VLC_OBJECT_VOUT here because it would be used
2049 p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
2050 vlc_object_attach( p_vout, p_filter );
2051 p_filter->p_sys = (filter_sys_t *)p_vout;
2052 p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
2054 config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2056 var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
2058 var_Create( p_filter, "filter-deinterlace-mode", VLC_VAR_STRING );
2059 var_Set( p_filter, "filter-deinterlace-mode", val );
2060 free( val.psz_string );
2062 if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
2064 vlc_object_release( p_vout );
2065 return VLC_EGENERIC;
2069 GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
2070 if( !p_filter->b_allow_fmt_out_change &&
2071 ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
2072 fmt.i_height != p_filter->fmt_in.video.i_height ) )
2074 CloseFilter( VLC_OBJECT(p_filter) );
2075 return VLC_EGENERIC;
2077 p_filter->fmt_out.video = fmt;
2078 p_filter->fmt_out.i_codec = fmt.i_chroma;
2079 p_filter->pf_video_filter = Deinterlace;
2081 msg_Dbg( p_filter, "deinterlacing" );
2086 /*****************************************************************************
2087 * CloseFilter: clean up the filter
2088 *****************************************************************************/
2089 static void CloseFilter( vlc_object_t *p_this )
2091 filter_t *p_filter = (filter_t*)p_this;
2092 vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2094 Destroy( VLC_OBJECT(p_vout) );
2095 vlc_object_release( p_vout );