]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c
Renamed deinterlace-mode into filter-deinterlace-mode.
[vlc] / modules / video_filter / deinterlace.c
1 /*****************************************************************************
2  * deinterlace.c : deinterlacer plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2000-2009 the VideoLAN team
5  * $Id$
6  *
7  * Author: Sam Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <errno.h>
33 #include <assert.h>
34
35 #ifdef HAVE_ALTIVEC_H
36 #   include <altivec.h>
37 #endif
38
39 #include <vlc_common.h>
40 #include <vlc_plugin.h>
41 #include <vlc_vout.h>
42 #include <vlc_sout.h>
43 #include <vlc_filter.h>
44 #include <vlc_cpu.h>
45
46 #ifdef CAN_COMPILE_MMXEXT
47 #   include "mmx.h"
48 #endif
49
50 #include "filter_common.h"
51
52 #define DEINTERLACE_DISCARD 1
53 #define DEINTERLACE_MEAN    2
54 #define DEINTERLACE_BLEND   3
55 #define DEINTERLACE_BOB     4
56 #define DEINTERLACE_LINEAR  5
57 #define DEINTERLACE_X       6
58 #define DEINTERLACE_YADIF   7
59 #define DEINTERLACE_YADIF2X 8
60
61 /*****************************************************************************
62  * Local protypes
63  *****************************************************************************/
64 static int  Create    ( vlc_object_t * );
65 static void Destroy   ( vlc_object_t * );
66
67 static int  Init      ( vout_thread_t * );
68 static void End       ( vout_thread_t * );
69 static void Render    ( vout_thread_t *, picture_t * );
70
71 static int  MouseEvent( vlc_object_t *p_this, char const *psz_var,
72                         vlc_value_t oldval, vlc_value_t newval, void *p_data );
73
74 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
75 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
76 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
77 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
78 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
79 static void RenderX      ( picture_t *, picture_t * );
80 static void RenderYadif  ( vout_thread_t *, picture_t *, picture_t *, int, int );
81
82 static void MergeGeneric ( void *, const void *, const void *, size_t );
83 #if defined(CAN_COMPILE_C_ALTIVEC)
84 static void MergeAltivec ( void *, const void *, const void *, size_t );
85 #endif
86 #if defined(CAN_COMPILE_MMXEXT)
87 static void MergeMMXEXT  ( void *, const void *, const void *, size_t );
88 #endif
89 #if defined(CAN_COMPILE_3DNOW)
90 static void Merge3DNow   ( void *, const void *, const void *, size_t );
91 #endif
92 #if defined(CAN_COMPILE_SSE)
93 static void MergeSSE2    ( void *, const void *, const void *, size_t );
94 #endif
95 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
96 static void EndMMX       ( void );
97 #endif
98 #if defined(CAN_COMPILE_3DNOW)
99 static void End3DNow     ( void );
100 #endif
101 #if defined __ARM_NEON__
102 static void MergeNEON (void *, const void *, const void *, size_t);
103 #endif
104
105 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method );
106 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
107
108 static int OpenFilter( vlc_object_t *p_this );
109 static void CloseFilter( vlc_object_t *p_this );
110
111 /*****************************************************************************
112  * Callback prototypes
113  *****************************************************************************/
114 static int FilterCallback( vlc_object_t *, char const *,
115                            vlc_value_t, vlc_value_t, void * );
116
117 /*****************************************************************************
118  * Module descriptor
119  *****************************************************************************/
120 #define MODE_TEXT N_("Deinterlace mode")
121 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
122
123 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
124 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
125
126 #define FILTER_CFG_PREFIX "sout-deinterlace-"
127
128 static const char *const mode_list[] = {
129     "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
130 static const char *const mode_list_text[] = {
131     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
132
133 vlc_module_begin ()
134     set_description( N_("Deinterlacing video filter") )
135     set_shortname( N_("Deinterlace" ))
136     set_capability( "video filter", 0 )
137     set_category( CAT_VIDEO )
138     set_subcategory( SUBCAT_VIDEO_VFILTER )
139
140     set_section( N_("Display"),NULL)
141     add_string( "filter-deinterlace-mode", "discard", NULL, MODE_TEXT,
142                 MODE_LONGTEXT, false )
143         change_string_list( mode_list, mode_list_text, 0 )
144         change_safe ()
145
146     add_shortcut( "deinterlace" )
147     set_callbacks( Create, Destroy )
148
149     add_submodule ()
150     set_capability( "video filter2", 0 )
151     set_section( N_("Streaming"),NULL)
152     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
153                 SOUT_MODE_LONGTEXT, false )
154         change_string_list( mode_list, mode_list_text, 0 )
155     add_shortcut( "deinterlace" )
156     set_callbacks( OpenFilter, CloseFilter )
157 vlc_module_end ()
158
159 static const char *const ppsz_filter_options[] = {
160     "mode", NULL
161 };
162
163 /*****************************************************************************
164  * vout_sys_t: Deinterlace video output method descriptor
165  *****************************************************************************
166  * This structure is part of the video output thread descriptor.
167  * It describes the Deinterlace specific properties of an output thread.
168  *****************************************************************************/
169 #define HISTORY_SIZE (3)
170 struct vout_sys_t
171 {
172     int        i_mode;        /* Deinterlace mode */
173     bool b_double_rate; /* Shall we double the framerate? */
174     bool b_half_height; /* Shall be devide the height by 2 */
175
176     mtime_t    last_date;
177     mtime_t    next_date;
178
179     vout_thread_t *p_vout;
180
181     vlc_mutex_t filter_lock;
182
183     void (*pf_merge) ( void *, const void *, const void *, size_t );
184     void (*pf_end_merge) ( void );
185
186     /* Yadif */
187     picture_t *pp_history[HISTORY_SIZE];
188 };
189
190 /*****************************************************************************
191  * Control: control facility for the vout (forwards to child vout)
192  *****************************************************************************/
193 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
194 {
195     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
196 }
197
198 /*****************************************************************************
199  * Create: allocates Deinterlace video thread output method
200  *****************************************************************************
201  * This function allocates and initializes a Deinterlace vout method.
202  *****************************************************************************/
203 static int Create( vlc_object_t *p_this )
204 {
205     vout_thread_t *p_vout = (vout_thread_t *)p_this;
206     vout_sys_t *p_sys;
207     char *psz_mode;
208
209     /* Allocate structure */
210     p_sys = p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
211     if( p_vout->p_sys == NULL )
212         return VLC_ENOMEM;
213
214     p_vout->pf_init = Init;
215     p_vout->pf_end = End;
216     p_vout->pf_manage = NULL;
217     p_vout->pf_render = Render;
218     p_vout->pf_display = NULL;
219     p_vout->pf_control = Control;
220
221     p_sys->i_mode = DEINTERLACE_DISCARD;
222     p_sys->b_double_rate = false;
223     p_sys->b_half_height = true;
224     p_sys->last_date = 0;
225     p_sys->p_vout = 0;
226     vlc_mutex_init( &p_sys->filter_lock );
227
228 #if defined(CAN_COMPILE_C_ALTIVEC)
229     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
230     {
231         p_sys->pf_merge = MergeAltivec;
232         p_sys->pf_end_merge = NULL;
233     }
234     else
235 #endif
236 #if defined(CAN_COMPILE_SSE)
237     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
238     {
239         p_sys->pf_merge = MergeSSE2;
240         p_sys->pf_end_merge = EndMMX;
241     }
242     else
243 #endif
244 #if defined(CAN_COMPILE_MMXEXT)
245     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
246     {
247         p_sys->pf_merge = MergeMMXEXT;
248         p_sys->pf_end_merge = EndMMX;
249     }
250     else
251 #endif
252 #if defined(CAN_COMPILE_3DNOW)
253     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
254     {
255         p_sys->pf_merge = Merge3DNow;
256         p_sys->pf_end_merge = End3DNow;
257     }
258     else
259 #endif
260 #if defined __ARM_NEON__
261     if( vlc_CPU() & CPU_CAPABILITY_NEON )
262     {
263         p_sys->pf_merge = MergeNEON;
264         p_sys->pf_end_merge = NULL;
265     }
266     else
267 #endif
268     {
269         p_sys->pf_merge = MergeGeneric;
270         p_sys->pf_end_merge = NULL;
271     }
272
273     /* Look what method was requested */
274     psz_mode = var_CreateGetString( p_vout, "filter-deinterlace-mode" );
275
276     if( !psz_mode )
277     {
278         msg_Err( p_vout, "configuration variable filter-deinterlace-mode empty" );
279         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
280
281         psz_mode = strdup( "discard" );
282     }
283
284     SetFilterMethod( p_vout, psz_mode );
285
286     free( psz_mode );
287
288     return VLC_SUCCESS;
289 }
290
291 /*****************************************************************************
292  * SetFilterMethod: setup the deinterlace method to use.
293  *****************************************************************************/
294 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method )
295 {
296     vout_sys_t *p_sys = p_vout->p_sys;
297     if( !strcmp( psz_method, "mean" ) )
298     {
299         p_sys->i_mode = DEINTERLACE_MEAN;
300         p_sys->b_double_rate = false;
301         p_sys->b_half_height = true;
302     }
303     else if( !strcmp( psz_method, "blend" )
304              || !strcmp( psz_method, "average" )
305              || !strcmp( psz_method, "combine-fields" ) )
306     {
307         p_sys->i_mode = DEINTERLACE_BLEND;
308         p_sys->b_double_rate = false;
309         p_sys->b_half_height = false;
310     }
311     else if( !strcmp( psz_method, "bob" )
312              || !strcmp( psz_method, "progressive-scan" ) )
313     {
314         p_sys->i_mode = DEINTERLACE_BOB;
315         p_sys->b_double_rate = true;
316         p_sys->b_half_height = false;
317     }
318     else if( !strcmp( psz_method, "linear" ) )
319     {
320         p_sys->i_mode = DEINTERLACE_LINEAR;
321         p_sys->b_double_rate = true;
322         p_sys->b_half_height = false;
323     }
324     else if( !strcmp( psz_method, "x" ) )
325     {
326         p_sys->i_mode = DEINTERLACE_X;
327         p_sys->b_double_rate = false;
328         p_sys->b_half_height = false;
329     }
330     else if( !strcmp( psz_method, "yadif" ) )
331     {
332         p_sys->i_mode = DEINTERLACE_YADIF;
333         p_sys->b_double_rate = false;
334         p_sys->b_half_height = false;
335     }
336     else if( !strcmp( psz_method, "yadif2x" ) )
337     {
338         p_sys->i_mode = DEINTERLACE_YADIF2X;
339         p_sys->b_double_rate = true;
340         p_sys->b_half_height = false;
341     }
342     else
343     {
344         const bool b_i422 = p_vout->render.i_chroma == VLC_CODEC_I422 ||
345                             p_vout->render.i_chroma == VLC_CODEC_J422;
346         if( strcmp( psz_method, "discard" ) )
347             msg_Err( p_vout, "no valid deinterlace mode provided, "
348                      "using \"discard\"" );
349
350         p_sys->i_mode = DEINTERLACE_DISCARD;
351         p_sys->b_double_rate = false;
352         p_sys->b_half_height = !b_i422;
353     }
354
355     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
356 }
357
358 static void GetOutputFormat( vout_thread_t *p_vout,
359                              video_format_t *p_dst, const video_format_t *p_src )
360 {
361     *p_dst = *p_src;
362
363     if( p_vout->p_sys->b_half_height )
364     {
365         p_dst->i_height /= 2;
366         p_dst->i_visible_height /= 2;
367         p_dst->i_y_offset /= 2;
368         p_dst->i_sar_den *= 2;
369     }
370
371     if( p_src->i_chroma == VLC_CODEC_I422 ||
372         p_src->i_chroma == VLC_CODEC_J422 )
373     {
374         switch( p_vout->p_sys->i_mode )
375         {
376         case DEINTERLACE_MEAN:
377         case DEINTERLACE_LINEAR:
378         case DEINTERLACE_X:
379         case DEINTERLACE_YADIF:
380         case DEINTERLACE_YADIF2X:
381             p_dst->i_chroma = p_src->i_chroma;
382             break;
383         default:
384             p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
385                                                                   VLC_CODEC_J420;
386             break;
387         }
388     }
389 }
390
391 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
392 {
393     return i_chroma == VLC_CODEC_I420 ||
394            i_chroma == VLC_CODEC_J420 ||
395            i_chroma == VLC_CODEC_YV12 ||
396            i_chroma == VLC_CODEC_I422 ||
397            i_chroma == VLC_CODEC_J422;
398 }
399
400 /*****************************************************************************
401  * Init: initialize Deinterlace video thread output method
402  *****************************************************************************/
403 static int Init( vout_thread_t *p_vout )
404 {
405     I_OUTPUTPICTURES = 0;
406
407     if( !IsChromaSupported( p_vout->render.i_chroma ) )
408         return VLC_EGENERIC; /* unknown chroma */
409
410     /* Initialize the output structure, full of directbuffers since we want
411      * the decoder to output directly to our structures. */
412     p_vout->output.i_chroma = p_vout->render.i_chroma;
413     p_vout->output.i_width  = p_vout->render.i_width;
414     p_vout->output.i_height = p_vout->render.i_height;
415     p_vout->output.i_aspect = p_vout->render.i_aspect;
416     p_vout->fmt_out = p_vout->fmt_in;
417
418     /* Try to open the real video output */
419     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
420
421     if( p_vout->p_sys->p_vout == NULL )
422     {
423         /* Everything failed */
424         msg_Err( p_vout, "cannot open vout, aborting" );
425
426         return VLC_EGENERIC;
427     }
428
429     for( int i = 0; i < HISTORY_SIZE; i++ )
430         p_vout->p_sys->pp_history[i] = NULL;
431
432     vout_filter_AllocateDirectBuffers( p_vout, VOUT_MAX_PICTURES );
433
434     vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
435
436     var_AddCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
437
438     return VLC_SUCCESS;
439 }
440
441 /*****************************************************************************
442  * SpawnRealVout: spawn the real video output.
443  *****************************************************************************/
444 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
445 {
446     msg_Dbg( p_vout, "spawning the real video output" );
447
448     video_format_t fmt;
449     GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
450
451     return vout_Create( p_vout, &fmt );
452 }
453
454 /*****************************************************************************
455  * End: terminate Deinterlace video thread output method
456  *****************************************************************************/
457 static void End( vout_thread_t *p_vout )
458 {
459     vout_sys_t *p_sys = p_vout->p_sys;
460
461     var_DelCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
462
463     for( int i = 0; i < HISTORY_SIZE; i++ )
464     {
465         if( p_sys->pp_history[i] )
466             picture_Release( p_sys->pp_history[i] );
467     }
468
469     if( p_sys->p_vout )
470     {
471         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
472         vout_CloseAndRelease( p_sys->p_vout );
473     }
474
475     vout_filter_ReleaseDirectBuffers( p_vout );
476 }
477
478 /*****************************************************************************
479  * Destroy: destroy Deinterlace video thread output method
480  *****************************************************************************
481  * Terminate an output method created by DeinterlaceCreateOutputMethod
482  *****************************************************************************/
483 static void Destroy( vlc_object_t *p_this )
484 {
485     vout_thread_t *p_vout = (vout_thread_t *)p_this;
486     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
487     free( p_vout->p_sys );
488 }
489
490 /**
491  * Forward mouse event with proper conversion.
492  */
493 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
494                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
495 {
496     vout_thread_t *p_vout = p_data;
497     VLC_UNUSED(p_this); VLC_UNUSED(oldval);
498
499     if( !strcmp( psz_var, "mouse-y" ) && p_vout->p_sys->b_half_height )
500         newval.i_int *= 2;
501
502     return var_Set( p_vout, psz_var, newval );
503 }
504
505 /*****************************************************************************
506  * Render: displays previously rendered output
507  *****************************************************************************
508  * This function send the currently rendered image to Deinterlace image,
509  * waits until it is displayed and switch the two rendering buffers, preparing
510  * next frame.
511  *****************************************************************************/
512 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
513 {
514     vout_sys_t *p_sys = p_vout->p_sys;
515     picture_t *pp_outpic[2];
516
517     /* FIXME are they needed ? */
518     p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
519     p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
520     p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
521     p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
522
523     /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
524     p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
525     p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
526     p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
527     p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
528     if( p_vout->p_sys->b_half_height )
529     {
530         p_sys->p_vout->fmt_in.i_y_offset /= 2;
531         p_sys->p_vout->fmt_in.i_visible_height /= 2;
532     }
533
534     if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
535     {
536         p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
537
538         p_vout->fmt_out.i_aspect = p_vout->fmt_in.i_aspect;
539         p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
540         p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
541
542         video_format_t fmt = p_vout->fmt_out;
543         if( p_vout->p_sys->b_half_height )
544         {
545             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
546             fmt.i_sar_den *= 2;
547         }
548
549         p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
550     }
551     if( !p_sys->p_vout )
552         return;
553
554     pp_outpic[0] = pp_outpic[1] = NULL;
555
556     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
557
558     /* Get a new picture */
559     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
560                                                 0, 0, 0 ) )
561               == NULL )
562     {
563         if( !vlc_object_alive( p_vout ) || p_vout->b_error )
564         {
565             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
566             return;
567         }
568         msleep( VOUT_OUTMEM_SLEEP );
569     }
570
571     pp_outpic[0]->date = p_pic->date;
572
573     /* If we are using double rate, get an additional new picture */
574     if( p_vout->p_sys->b_double_rate )
575     {
576         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
577                                                  0, 0, 0 ) )
578                   == NULL )
579         {
580             if( !vlc_object_alive( p_vout ) || p_vout->b_error )
581             {
582                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
583                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
584                 return;
585             }
586             msleep( VOUT_OUTMEM_SLEEP );
587         }
588
589         /* 20ms is a bit arbitrary, but it's only for the first image we get */
590         if( !p_vout->p_sys->last_date )
591             pp_outpic[1]->date = p_pic->date + 20000;
592         else
593             pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
594         p_vout->p_sys->last_date = p_pic->date;
595     }
596
597     switch( p_vout->p_sys->i_mode )
598     {
599         case DEINTERLACE_DISCARD:
600             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
601             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
602             break;
603
604         case DEINTERLACE_BOB:
605             RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
606             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
607             RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
608             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
609             break;
610
611         case DEINTERLACE_LINEAR:
612             RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
613             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
614             RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
615             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
616             break;
617
618         case DEINTERLACE_MEAN:
619             RenderMean( p_vout, pp_outpic[0], p_pic );
620             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
621             break;
622
623         case DEINTERLACE_BLEND:
624             RenderBlend( p_vout, pp_outpic[0], p_pic );
625             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
626             break;
627
628         case DEINTERLACE_X:
629             RenderX( pp_outpic[0], p_pic );
630             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
631             break;
632
633         case DEINTERLACE_YADIF:
634             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, 0 );
635             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
636             break;
637
638         case DEINTERLACE_YADIF2X:
639             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, p_pic->b_top_field_first ? 0 : 1 );
640             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
641             RenderYadif( p_vout, pp_outpic[1], p_pic, 1, p_pic->b_top_field_first ? 1 : 0 );
642             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
643             break;
644     }
645     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
646 }
647
648 /*****************************************************************************
649  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
650  *****************************************************************************/
651 static void RenderDiscard( vout_thread_t *p_vout,
652                            picture_t *p_outpic, picture_t *p_pic, int i_field )
653 {
654     int i_plane;
655
656     /* Copy image and skip lines */
657     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
658     {
659         uint8_t *p_in, *p_out_end, *p_out;
660         int i_increment;
661
662         p_in = p_pic->p[i_plane].p_pixels
663                    + i_field * p_pic->p[i_plane].i_pitch;
664
665         p_out = p_outpic->p[i_plane].p_pixels;
666         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
667                              * p_outpic->p[i_plane].i_visible_lines;
668
669         switch( p_vout->render.i_chroma )
670         {
671         case VLC_CODEC_I420:
672         case VLC_CODEC_J420:
673         case VLC_CODEC_YV12:
674
675             for( ; p_out < p_out_end ; )
676             {
677                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
678
679                 p_out += p_outpic->p[i_plane].i_pitch;
680                 p_in += 2 * p_pic->p[i_plane].i_pitch;
681             }
682             break;
683
684         case VLC_CODEC_I422:
685         case VLC_CODEC_J422:
686
687             i_increment = 2 * p_pic->p[i_plane].i_pitch;
688
689             if( i_plane == Y_PLANE )
690             {
691                 for( ; p_out < p_out_end ; )
692                 {
693                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
694                     p_out += p_outpic->p[i_plane].i_pitch;
695                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
696                     p_out += p_outpic->p[i_plane].i_pitch;
697                     p_in += i_increment;
698                 }
699             }
700             else
701             {
702                 for( ; p_out < p_out_end ; )
703                 {
704                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
705                     p_out += p_outpic->p[i_plane].i_pitch;
706                     p_in += i_increment;
707                 }
708             }
709             break;
710
711         default:
712             break;
713         }
714     }
715 }
716
717 /*****************************************************************************
718  * RenderBob: renders a BOB picture - simple copy
719  *****************************************************************************/
720 static void RenderBob( vout_thread_t *p_vout,
721                        picture_t *p_outpic, picture_t *p_pic, int i_field )
722 {
723     int i_plane;
724
725     /* Copy image and skip lines */
726     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
727     {
728         uint8_t *p_in, *p_out_end, *p_out;
729
730         p_in = p_pic->p[i_plane].p_pixels;
731         p_out = p_outpic->p[i_plane].p_pixels;
732         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
733                              * p_outpic->p[i_plane].i_visible_lines;
734
735         switch( p_vout->render.i_chroma )
736         {
737             case VLC_CODEC_I420:
738             case VLC_CODEC_J420:
739             case VLC_CODEC_YV12:
740                 /* For BOTTOM field we need to add the first line */
741                 if( i_field == 1 )
742                 {
743                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
744                     p_in += p_pic->p[i_plane].i_pitch;
745                     p_out += p_outpic->p[i_plane].i_pitch;
746                 }
747
748                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
749
750                 for( ; p_out < p_out_end ; )
751                 {
752                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
753
754                     p_out += p_outpic->p[i_plane].i_pitch;
755
756                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
757
758                     p_in += 2 * p_pic->p[i_plane].i_pitch;
759                     p_out += p_outpic->p[i_plane].i_pitch;
760                 }
761
762                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
763
764                 /* For TOP field we need to add the last line */
765                 if( i_field == 0 )
766                 {
767                     p_in += p_pic->p[i_plane].i_pitch;
768                     p_out += p_outpic->p[i_plane].i_pitch;
769                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
770                 }
771                 break;
772
773             case VLC_CODEC_I422:
774             case VLC_CODEC_J422:
775                 /* For BOTTOM field we need to add the first line */
776                 if( i_field == 1 )
777                 {
778                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
779                     p_in += p_pic->p[i_plane].i_pitch;
780                     p_out += p_outpic->p[i_plane].i_pitch;
781                 }
782
783                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
784
785                 if( i_plane == Y_PLANE )
786                 {
787                     for( ; p_out < p_out_end ; )
788                     {
789                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
790
791                         p_out += p_outpic->p[i_plane].i_pitch;
792
793                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
794
795                         p_in += 2 * p_pic->p[i_plane].i_pitch;
796                         p_out += p_outpic->p[i_plane].i_pitch;
797                     }
798                 }
799                 else
800                 {
801                     for( ; p_out < p_out_end ; )
802                     {
803                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
804
805                         p_out += p_outpic->p[i_plane].i_pitch;
806                         p_in += 2 * p_pic->p[i_plane].i_pitch;
807                     }
808                 }
809
810                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
811
812                 /* For TOP field we need to add the last line */
813                 if( i_field == 0 )
814                 {
815                     p_in += p_pic->p[i_plane].i_pitch;
816                     p_out += p_outpic->p[i_plane].i_pitch;
817                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
818                 }
819                 break;
820         }
821     }
822 }
823
824 #define Merge p_vout->p_sys->pf_merge
825 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
826
827 /*****************************************************************************
828  * RenderLinear: BOB with linear interpolation
829  *****************************************************************************/
830 static void RenderLinear( vout_thread_t *p_vout,
831                           picture_t *p_outpic, picture_t *p_pic, int i_field )
832 {
833     int i_plane;
834
835     /* Copy image and skip lines */
836     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
837     {
838         uint8_t *p_in, *p_out_end, *p_out;
839
840         p_in = p_pic->p[i_plane].p_pixels;
841         p_out = p_outpic->p[i_plane].p_pixels;
842         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
843                              * p_outpic->p[i_plane].i_visible_lines;
844
845         /* For BOTTOM field we need to add the first line */
846         if( i_field == 1 )
847         {
848             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
849             p_in += p_pic->p[i_plane].i_pitch;
850             p_out += p_outpic->p[i_plane].i_pitch;
851         }
852
853         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
854
855         for( ; p_out < p_out_end ; )
856         {
857             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
858
859             p_out += p_outpic->p[i_plane].i_pitch;
860
861             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
862                    p_pic->p[i_plane].i_pitch );
863
864             p_in += 2 * p_pic->p[i_plane].i_pitch;
865             p_out += p_outpic->p[i_plane].i_pitch;
866         }
867
868         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
869
870         /* For TOP field we need to add the last line */
871         if( i_field == 0 )
872         {
873             p_in += p_pic->p[i_plane].i_pitch;
874             p_out += p_outpic->p[i_plane].i_pitch;
875             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
876         }
877     }
878     EndMerge();
879 }
880
881 static void RenderMean( vout_thread_t *p_vout,
882                         picture_t *p_outpic, picture_t *p_pic )
883 {
884     int i_plane;
885
886     /* Copy image and skip lines */
887     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
888     {
889         uint8_t *p_in, *p_out_end, *p_out;
890
891         p_in = p_pic->p[i_plane].p_pixels;
892
893         p_out = p_outpic->p[i_plane].p_pixels;
894         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
895                              * p_outpic->p[i_plane].i_visible_lines;
896
897         /* All lines: mean value */
898         for( ; p_out < p_out_end ; )
899         {
900             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
901                    p_pic->p[i_plane].i_pitch );
902
903             p_out += p_outpic->p[i_plane].i_pitch;
904             p_in += 2 * p_pic->p[i_plane].i_pitch;
905         }
906     }
907     EndMerge();
908 }
909
910 static void RenderBlend( vout_thread_t *p_vout,
911                          picture_t *p_outpic, picture_t *p_pic )
912 {
913     int i_plane;
914
915     /* Copy image and skip lines */
916     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
917     {
918         uint8_t *p_in, *p_out_end, *p_out;
919
920         p_in = p_pic->p[i_plane].p_pixels;
921
922         p_out = p_outpic->p[i_plane].p_pixels;
923         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
924                              * p_outpic->p[i_plane].i_visible_lines;
925
926         switch( p_vout->render.i_chroma )
927         {
928             case VLC_CODEC_I420:
929             case VLC_CODEC_J420:
930             case VLC_CODEC_YV12:
931                 /* First line: simple copy */
932                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
933                 p_out += p_outpic->p[i_plane].i_pitch;
934
935                 /* Remaining lines: mean value */
936                 for( ; p_out < p_out_end ; )
937                 {
938                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
939                            p_pic->p[i_plane].i_pitch );
940
941                     p_out += p_outpic->p[i_plane].i_pitch;
942                     p_in += p_pic->p[i_plane].i_pitch;
943                 }
944                 break;
945
946             case VLC_CODEC_I422:
947             case VLC_CODEC_J422:
948                 /* First line: simple copy */
949                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
950                 p_out += p_outpic->p[i_plane].i_pitch;
951
952                 /* Remaining lines: mean value */
953                 if( i_plane == Y_PLANE )
954                 {
955                     for( ; p_out < p_out_end ; )
956                     {
957                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
958                                p_pic->p[i_plane].i_pitch );
959
960                         p_out += p_outpic->p[i_plane].i_pitch;
961                         p_in += p_pic->p[i_plane].i_pitch;
962                     }
963                 }
964
965                 else
966                 {
967                     for( ; p_out < p_out_end ; )
968                     {
969                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
970                                p_pic->p[i_plane].i_pitch );
971
972                         p_out += p_outpic->p[i_plane].i_pitch;
973                         p_in += 2*p_pic->p[i_plane].i_pitch;
974                     }
975                 }
976                 break;
977         }
978     }
979     EndMerge();
980 }
981
982 #undef Merge
983
984 static void MergeGeneric( void *_p_dest, const void *_p_s1,
985                           const void *_p_s2, size_t i_bytes )
986 {
987     uint8_t* p_dest = (uint8_t*)_p_dest;
988     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
989     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
990     uint8_t* p_end = p_dest + i_bytes - 8;
991
992     while( p_dest < p_end )
993     {
994         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
995         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
996         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
997         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
998         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
999         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1000         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1001         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1002     }
1003
1004     p_end += 8;
1005
1006     while( p_dest < p_end )
1007     {
1008         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1009     }
1010 }
1011
1012 #if defined(CAN_COMPILE_MMXEXT)
1013 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
1014                          size_t i_bytes )
1015 {
1016     uint8_t* p_dest = (uint8_t*)_p_dest;
1017     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1018     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1019     uint8_t* p_end = p_dest + i_bytes - 8;
1020     while( p_dest < p_end )
1021     {
1022         __asm__  __volatile__( "movq %2,%%mm1;"
1023                                "pavgb %1, %%mm1;"
1024                                "movq %%mm1, %0" :"=m" (*p_dest):
1025                                                  "m" (*p_s1),
1026                                                  "m" (*p_s2) );
1027         p_dest += 8;
1028         p_s1 += 8;
1029         p_s2 += 8;
1030     }
1031
1032     p_end += 8;
1033
1034     while( p_dest < p_end )
1035     {
1036         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1037     }
1038 }
1039 #endif
1040
1041 #if defined(CAN_COMPILE_3DNOW)
1042 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
1043                         size_t i_bytes )
1044 {
1045     uint8_t* p_dest = (uint8_t*)_p_dest;
1046     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1047     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1048     uint8_t* p_end = p_dest + i_bytes - 8;
1049     while( p_dest < p_end )
1050     {
1051         __asm__  __volatile__( "movq %2,%%mm1;"
1052                                "pavgusb %1, %%mm1;"
1053                                "movq %%mm1, %0" :"=m" (*p_dest):
1054                                                  "m" (*p_s1),
1055                                                  "m" (*p_s2) );
1056         p_dest += 8;
1057         p_s1 += 8;
1058         p_s2 += 8;
1059     }
1060
1061     p_end += 8;
1062
1063     while( p_dest < p_end )
1064     {
1065         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1066     }
1067 }
1068 #endif
1069
1070 #if defined(CAN_COMPILE_SSE)
1071 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1072                        size_t i_bytes )
1073 {
1074     uint8_t* p_dest = (uint8_t*)_p_dest;
1075     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1076     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1077     uint8_t* p_end;
1078     while( (uintptr_t)p_s1 % 16 )
1079     {
1080         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1081     }
1082     p_end = p_dest + i_bytes - 16;
1083     while( p_dest < p_end )
1084     {
1085         __asm__  __volatile__( "movdqu %2,%%xmm1;"
1086                                "pavgb %1, %%xmm1;"
1087                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
1088                                                  "m" (*p_s1),
1089                                                  "m" (*p_s2) );
1090         p_dest += 16;
1091         p_s1 += 16;
1092         p_s2 += 16;
1093     }
1094
1095     p_end += 16;
1096
1097     while( p_dest < p_end )
1098     {
1099         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1100     }
1101 }
1102 #endif
1103
1104 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1105 static void EndMMX( void )
1106 {
1107     __asm__ __volatile__( "emms" :: );
1108 }
1109 #endif
1110
1111 #if defined(CAN_COMPILE_3DNOW)
1112 static void End3DNow( void )
1113 {
1114     __asm__ __volatile__( "femms" :: );
1115 }
1116 #endif
1117
1118 #ifdef CAN_COMPILE_C_ALTIVEC
1119 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1120                           const void *_p_s2, size_t i_bytes )
1121 {
1122     uint8_t *p_dest = (uint8_t *)_p_dest;
1123     uint8_t *p_s1   = (uint8_t *)_p_s1;
1124     uint8_t *p_s2   = (uint8_t *)_p_s2;
1125     uint8_t *p_end  = p_dest + i_bytes - 15;
1126
1127     /* Use C until the first 16-bytes aligned destination pixel */
1128     while( (uintptr_t)p_dest & 0xF )
1129     {
1130         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1131     }
1132
1133     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1134     {
1135         /* Unaligned source */
1136         vector unsigned char s1v, s2v, destv;
1137         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1138         vector unsigned char perm1v, perm2v;
1139
1140         perm1v = vec_lvsl( 0, p_s1 );
1141         perm2v = vec_lvsl( 0, p_s2 );
1142         s1oldv = vec_ld( 0, p_s1 );
1143         s2oldv = vec_ld( 0, p_s2 );
1144
1145         while( p_dest < p_end )
1146         {
1147             s1newv = vec_ld( 16, p_s1 );
1148             s2newv = vec_ld( 16, p_s2 );
1149             s1v    = vec_perm( s1oldv, s1newv, perm1v );
1150             s2v    = vec_perm( s2oldv, s2newv, perm2v );
1151             s1oldv = s1newv;
1152             s2oldv = s2newv;
1153             destv  = vec_avg( s1v, s2v );
1154             vec_st( destv, 0, p_dest );
1155
1156             p_s1   += 16;
1157             p_s2   += 16;
1158             p_dest += 16;
1159         }
1160     }
1161     else
1162     {
1163         /* Aligned source */
1164         vector unsigned char s1v, s2v, destv;
1165
1166         while( p_dest < p_end )
1167         {
1168             s1v   = vec_ld( 0, p_s1 );
1169             s2v   = vec_ld( 0, p_s2 );
1170             destv = vec_avg( s1v, s2v );
1171             vec_st( destv, 0, p_dest );
1172
1173             p_s1   += 16;
1174             p_s2   += 16;
1175             p_dest += 16;
1176         }
1177     }
1178
1179     p_end += 15;
1180
1181     while( p_dest < p_end )
1182     {
1183         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1184     }
1185 }
1186 #endif
1187
1188 #ifdef __ARM_NEON__
1189 static void MergeNEON (void *restrict out, const void *in1,
1190                        const void *in2, size_t n)
1191 {
1192     uint8_t *outp = out;
1193     const uint8_t *in1p = in1;
1194     const uint8_t *in2p = in2;
1195     size_t mis = ((uintptr_t)outp) & 15;
1196
1197     if (mis)
1198     {
1199         MergeGeneric (outp, in1p, in2p, mis);
1200         outp += mis;
1201         in1p += mis;
1202         in2p += mis;
1203         n -= mis;
1204     }
1205
1206     uint8_t *end = outp + (n & ~15);
1207
1208     if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
1209         while (outp < end)
1210             asm volatile (
1211                 "vld1.u8  {q0-q1}, [%[in1]]!\n"
1212                 "vld1.u8  {q2-q3}, [%[in2]]!\n"
1213                 "vhadd.u8 q4, q0, q2\n"
1214                 "vld1.u8  {q6-q7}, [%[in1]]!\n"
1215                 "vhadd.u8 q5, q1, q3\n"
1216                 "vld1.u8  {q8-q9}, [%[in2]]!\n"
1217                 "vhadd.u8 q10, q6, q8\n"
1218                 "vhadd.u8 q11, q7, q9\n"
1219                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1220                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1221                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1222                 :
1223                 : "q0", "q1", "q2", "memory");
1224     else
1225          while (outp < end)
1226             asm volatile (
1227                 "vld1.u8  {q0-q1}, [%[in1],:128]!\n"
1228                 "vld1.u8  {q2-q3}, [%[in2],:128]!\n"
1229                 "vhadd.u8 q4, q0, q2\n"
1230                 "vld1.u8  {q6-q7}, [%[in1],:128]!\n"
1231                 "vhadd.u8 q5, q1, q3\n"
1232                 "vld1.u8  {q8-q9}, [%[in2],:128]!\n"
1233                 "vhadd.u8 q10, q6, q8\n"
1234                 "vhadd.u8 q11, q7, q9\n"
1235                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1236                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1237                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1238                 :
1239                 : "q0", "q1", "q2", "memory");
1240     n &= 15;
1241     if (n)
1242         MergeGeneric (outp, in1p, in2p, n);
1243 }
1244 #endif
1245
1246 /*****************************************************************************
1247  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1248  * and apply a process to recreate the bottom field :
1249  *  If a 8x8 block is classified as :
1250  *   - progressive: it applies a small blend (1,6,1)
1251  *   - interlaced:
1252  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1253  *    good match we use MC to recreate the bottom field (with a small
1254  *    blend (1,6,1) )
1255  *    * otherwise: it recreates the bottom field by an edge oriented
1256  *    interpolation.
1257   *****************************************************************************/
1258
1259 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1260  * XXX: It need to access to 8x10
1261  * We use more than 8 lines to help with scrolling (text)
1262  * (and because XDeint8x8Frame use line 9)
1263  * XXX: smooth/uniform area with noise detection doesn't works well
1264  * but it's not really a problem because they don't have much details anyway
1265  */
1266 static inline int ssd( int a ) { return a*a; }
1267 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1268 {
1269     int y, x;
1270     int ff, fr;
1271     int fc;
1272
1273     /* Detect interlacing */
1274     fc = 0;
1275     for( y = 0; y < 7; y += 2 )
1276     {
1277         ff = fr = 0;
1278         for( x = 0; x < 8; x++ )
1279         {
1280             fr += ssd(src[      x] - src[1*i_src+x]) +
1281                   ssd(src[i_src+x] - src[2*i_src+x]);
1282             ff += ssd(src[      x] - src[2*i_src+x]) +
1283                   ssd(src[i_src+x] - src[3*i_src+x]);
1284         }
1285         if( ff < 6*fr/8 && fr > 32 )
1286             fc++;
1287
1288         src += 2*i_src;
1289     }
1290
1291     return fc < 1 ? false : true;
1292 }
1293 #ifdef CAN_COMPILE_MMXEXT
1294 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1295 {
1296
1297     int y, x;
1298     int32_t ff, fr;
1299     int fc;
1300
1301     /* Detect interlacing */
1302     fc = 0;
1303     pxor_r2r( mm7, mm7 );
1304     for( y = 0; y < 9; y += 2 )
1305     {
1306         ff = fr = 0;
1307         pxor_r2r( mm5, mm5 );
1308         pxor_r2r( mm6, mm6 );
1309         for( x = 0; x < 8; x+=4 )
1310         {
1311             movd_m2r( src[        x], mm0 );
1312             movd_m2r( src[1*i_src+x], mm1 );
1313             movd_m2r( src[2*i_src+x], mm2 );
1314             movd_m2r( src[3*i_src+x], mm3 );
1315
1316             punpcklbw_r2r( mm7, mm0 );
1317             punpcklbw_r2r( mm7, mm1 );
1318             punpcklbw_r2r( mm7, mm2 );
1319             punpcklbw_r2r( mm7, mm3 );
1320
1321             movq_r2r( mm0, mm4 );
1322
1323             psubw_r2r( mm1, mm0 );
1324             psubw_r2r( mm2, mm4 );
1325
1326             psubw_r2r( mm1, mm2 );
1327             psubw_r2r( mm1, mm3 );
1328
1329             pmaddwd_r2r( mm0, mm0 );
1330             pmaddwd_r2r( mm4, mm4 );
1331             pmaddwd_r2r( mm2, mm2 );
1332             pmaddwd_r2r( mm3, mm3 );
1333             paddd_r2r( mm0, mm2 );
1334             paddd_r2r( mm4, mm3 );
1335             paddd_r2r( mm2, mm5 );
1336             paddd_r2r( mm3, mm6 );
1337         }
1338
1339         movq_r2r( mm5, mm0 );
1340         psrlq_i2r( 32, mm0 );
1341         paddd_r2r( mm0, mm5 );
1342         movd_r2m( mm5, fr );
1343
1344         movq_r2r( mm6, mm0 );
1345         psrlq_i2r( 32, mm0 );
1346         paddd_r2r( mm0, mm6 );
1347         movd_r2m( mm6, ff );
1348
1349         if( ff < 6*fr/8 && fr > 32 )
1350             fc++;
1351
1352         src += 2*i_src;
1353     }
1354     return fc;
1355 }
1356 #endif
1357
1358 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1359                                     uint8_t *src1, int i_src1,
1360                                     uint8_t *src2, int i_src2 )
1361 {
1362     int y, x;
1363
1364     /* Progressive */
1365     for( y = 0; y < 8; y += 2 )
1366     {
1367         memcpy( dst, src1, 8 );
1368         dst  += i_dst;
1369
1370         for( x = 0; x < 8; x++ )
1371             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1372         dst += i_dst;
1373
1374         src1 += i_src1;
1375         src2 += i_src2;
1376     }
1377 }
1378
1379 #ifdef CAN_COMPILE_MMXEXT
1380 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1381                                          uint8_t *src1, int i_src1,
1382                                          uint8_t *src2, int i_src2 )
1383 {
1384     static const uint64_t m_4 = INT64_C(0x0004000400040004);
1385     int y, x;
1386
1387     /* Progressive */
1388     pxor_r2r( mm7, mm7 );
1389     for( y = 0; y < 8; y += 2 )
1390     {
1391         for( x = 0; x < 8; x +=4 )
1392         {
1393             movd_m2r( src1[x], mm0 );
1394             movd_r2m( mm0, dst[x] );
1395
1396             movd_m2r( src2[x], mm1 );
1397             movd_m2r( src1[i_src1+x], mm2 );
1398
1399             punpcklbw_r2r( mm7, mm0 );
1400             punpcklbw_r2r( mm7, mm1 );
1401             punpcklbw_r2r( mm7, mm2 );
1402             paddw_r2r( mm1, mm1 );
1403             movq_r2r( mm1, mm3 );
1404             paddw_r2r( mm3, mm3 );
1405             paddw_r2r( mm2, mm0 );
1406             paddw_r2r( mm3, mm1 );
1407             paddw_m2r( m_4, mm1 );
1408             paddw_r2r( mm1, mm0 );
1409             psraw_i2r( 3, mm0 );
1410             packuswb_r2r( mm7, mm0 );
1411             movd_r2m( mm0, dst[i_dst+x] );
1412         }
1413         dst += 2*i_dst;
1414         src1 += i_src1;
1415         src2 += i_src2;
1416     }
1417 }
1418
1419 #endif
1420
1421 /* For debug */
1422 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1423 {
1424     int y;
1425     for( y = 0; y < 8; y++ )
1426         memset( &dst[y*i_dst], v, 8 );
1427 }
1428
1429 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1430  * neighbour
1431  * (Use 8x9 pixels)
1432  * TODO: a better one for the inner part.
1433  */
1434 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1435                                      uint8_t *src, int i_src )
1436 {
1437     int y, x;
1438
1439     /* Interlaced */
1440     for( y = 0; y < 8; y += 2 )
1441     {
1442         memcpy( dst, src, 8 );
1443         dst += i_dst;
1444
1445         for( x = 0; x < 8; x++ )
1446             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1447         dst += 1*i_dst;
1448         src += 2*i_src;
1449     }
1450 }
1451 #ifdef CAN_COMPILE_MMXEXT
1452 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1453                                           uint8_t *src, int i_src )
1454 {
1455     int y;
1456
1457     /* Interlaced */
1458     for( y = 0; y < 8; y += 2 )
1459     {
1460         movq_m2r( src[0], mm0 );
1461         movq_r2m( mm0, dst[0] );
1462         dst += i_dst;
1463
1464         movq_m2r( src[2*i_src], mm1 );
1465         pavgb_r2r( mm1, mm0 );
1466
1467         movq_r2m( mm0, dst[0] );
1468
1469         dst += 1*i_dst;
1470         src += 2*i_src;
1471     }
1472 }
1473 #endif
1474
1475 /* XDeint8x8Field: Edge oriented interpolation
1476  * (Need -4 and +5 pixels H, +1 line)
1477  */
1478 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1479                                     uint8_t *src, int i_src )
1480 {
1481     int y, x;
1482
1483     /* Interlaced */
1484     for( y = 0; y < 8; y += 2 )
1485     {
1486         memcpy( dst, src, 8 );
1487         dst += i_dst;
1488
1489         for( x = 0; x < 8; x++ )
1490         {
1491             uint8_t *src2 = &src[2*i_src];
1492             /* I use 8 pixels just to match the MMX version, but it's overkill
1493              * 5 would be enough (less isn't good) */
1494             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1495                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1496                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1497                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1498
1499             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1500                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1501                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1502                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1503
1504             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1505                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1506                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1507                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1508
1509             if( c0 < c1 && c1 <= c2 )
1510                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1511             else if( c2 < c1 && c1 <= c0 )
1512                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1513             else
1514                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1515         }
1516
1517         dst += 1*i_dst;
1518         src += 2*i_src;
1519     }
1520 }
1521 #ifdef CAN_COMPILE_MMXEXT
1522 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1523                                          uint8_t *src, int i_src )
1524 {
1525     int y, x;
1526
1527     /* Interlaced */
1528     for( y = 0; y < 8; y += 2 )
1529     {
1530         memcpy( dst, src, 8 );
1531         dst += i_dst;
1532
1533         for( x = 0; x < 8; x++ )
1534         {
1535             uint8_t *src2 = &src[2*i_src];
1536             int32_t c0, c1, c2;
1537
1538             movq_m2r( src[x-2], mm0 );
1539             movq_m2r( src[x-3], mm1 );
1540             movq_m2r( src[x-4], mm2 );
1541
1542             psadbw_m2r( src2[x-4], mm0 );
1543             psadbw_m2r( src2[x-3], mm1 );
1544             psadbw_m2r( src2[x-2], mm2 );
1545
1546             movd_r2m( mm0, c2 );
1547             movd_r2m( mm1, c1 );
1548             movd_r2m( mm2, c0 );
1549
1550             if( c0 < c1 && c1 <= c2 )
1551                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1552             else if( c2 < c1 && c1 <= c0 )
1553                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1554             else
1555                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1556         }
1557
1558         dst += 1*i_dst;
1559         src += 2*i_src;
1560     }
1561 }
1562 #endif
1563
1564 /* NxN arbitray size (and then only use pixel in the NxN block)
1565  */
1566 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1567                                    int i_height, int i_width )
1568 {
1569     int y, x;
1570     int ff, fr;
1571     int fc;
1572
1573
1574     /* Detect interlacing */
1575     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1576     ff = fr = 0;
1577     fc = 0;
1578     for( y = 0; y < i_height - 2; y += 2 )
1579     {
1580         const uint8_t *s = &src[y*i_src];
1581         for( x = 0; x < i_width; x++ )
1582         {
1583             fr += ssd(s[      x] - s[1*i_src+x]);
1584             ff += ssd(s[      x] - s[2*i_src+x]);
1585         }
1586         if( ff < fr && fr > i_width / 2 )
1587             fc++;
1588     }
1589
1590     return fc < 2 ? false : true;
1591 }
1592
1593 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1594                                    uint8_t *src, int i_src,
1595                                    int i_width, int i_height )
1596 {
1597     int y, x;
1598
1599     /* Progressive */
1600     for( y = 0; y < i_height; y += 2 )
1601     {
1602         memcpy( dst, src, i_width );
1603         dst += i_dst;
1604
1605         if( y < i_height - 2 )
1606         {
1607             for( x = 0; x < i_width; x++ )
1608                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1609         }
1610         else
1611         {
1612             /* Blend last line */
1613             for( x = 0; x < i_width; x++ )
1614                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1615         }
1616         dst += 1*i_dst;
1617         src += 2*i_src;
1618     }
1619 }
1620
1621 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1622                                    uint8_t *src, int i_src,
1623                                    int i_width, int i_height )
1624 {
1625     int y, x;
1626
1627     /* Interlaced */
1628     for( y = 0; y < i_height; y += 2 )
1629     {
1630         memcpy( dst, src, i_width );
1631         dst += i_dst;
1632
1633         if( y < i_height - 2 )
1634         {
1635             for( x = 0; x < i_width; x++ )
1636                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1637         }
1638         else
1639         {
1640             /* Blend last line */
1641             for( x = 0; x < i_width; x++ )
1642                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1643         }
1644         dst += 1*i_dst;
1645         src += 2*i_src;
1646     }
1647 }
1648
1649 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1650                               int i_width, int i_height )
1651 {
1652     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1653         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1654     else
1655         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1656 }
1657
1658
1659 static inline int median( int a, int b, int c )
1660 {
1661     int min = a, max =a;
1662     if( b < min )
1663         min = b;
1664     else
1665         max = b;
1666
1667     if( c < min )
1668         min = c;
1669     else if( c > max )
1670         max = c;
1671
1672     return a + b + c - min - max;
1673 }
1674
1675
1676 /* XDeintBand8x8:
1677  */
1678 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1679                                    uint8_t *src, int i_src,
1680                                    const int i_mbx, int i_modx )
1681 {
1682     int x;
1683
1684     for( x = 0; x < i_mbx; x++ )
1685     {
1686         int s;
1687         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1688         {
1689             if( x == 0 || x == i_mbx - 1 )
1690                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1691             else
1692                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1693         }
1694         else
1695         {
1696             XDeint8x8MergeC( dst, i_dst,
1697                              &src[0*i_src], 2*i_src,
1698                              &src[1*i_src], 2*i_src );
1699         }
1700
1701         dst += 8;
1702         src += 8;
1703     }
1704
1705     if( i_modx )
1706         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1707 }
1708 #ifdef CAN_COMPILE_MMXEXT
1709 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1710                                         uint8_t *src, int i_src,
1711                                         const int i_mbx, int i_modx )
1712 {
1713     int x;
1714
1715     /* Reset current line */
1716     for( x = 0; x < i_mbx; x++ )
1717     {
1718         int s;
1719         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1720         {
1721             if( x == 0 || x == i_mbx - 1 )
1722                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1723             else
1724                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1725         }
1726         else
1727         {
1728             XDeint8x8MergeMMXEXT( dst, i_dst,
1729                                   &src[0*i_src], 2*i_src,
1730                                   &src[1*i_src], 2*i_src );
1731         }
1732
1733         dst += 8;
1734         src += 8;
1735     }
1736
1737     if( i_modx )
1738         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1739 }
1740 #endif
1741
1742 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1743 {
1744     int i_plane;
1745
1746     /* Copy image and skip lines */
1747     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1748     {
1749         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1750         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1751
1752         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1753         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1754
1755         const int i_dst = p_outpic->p[i_plane].i_pitch;
1756         const int i_src = p_pic->p[i_plane].i_pitch;
1757
1758         int y, x;
1759
1760         for( y = 0; y < i_mby; y++ )
1761         {
1762             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1763             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1764
1765 #ifdef CAN_COMPILE_MMXEXT
1766             if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1767                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1768             else
1769 #endif
1770                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1771         }
1772
1773         /* Last line (C only)*/
1774         if( i_mody )
1775         {
1776             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1777             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1778
1779             for( x = 0; x < i_mbx; x++ )
1780             {
1781                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1782
1783                 dst += 8;
1784                 src += 8;
1785             }
1786
1787             if( i_modx )
1788                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1789         }
1790     }
1791
1792 #ifdef CAN_COMPILE_MMXEXT
1793     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1794         emms();
1795 #endif
1796 }
1797
1798 /*****************************************************************************
1799  * Yadif (Yet Another DeInterlacing Filter).
1800  *****************************************************************************/
1801 /* */
1802 struct vf_priv_s {
1803     /*
1804      * 0: Output 1 frame for each frame.
1805      * 1: Output 1 frame for each field.
1806      * 2: Like 0 but skips spatial interlacing check.
1807      * 3: Like 1 but skips spatial interlacing check.
1808      *
1809      * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1810      */
1811     int mode;
1812 };
1813
1814 /* I am unsure it is the right one */
1815 typedef intptr_t x86_reg;
1816
1817 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1818 #define FFMAX(a,b)      __MAX(a,b)
1819 #define FFMAX3(a,b,c)   FFMAX(FFMAX(a,b),c)
1820 #define FFMIN(a,b)      __MIN(a,b)
1821 #define FFMIN3(a,b,c)   FFMIN(FFMIN(a,b),c)
1822
1823 /* yadif.h comes from vf_yadif.c of mplayer project */
1824 #include "yadif.h"
1825
1826 static void RenderYadif( vout_thread_t *p_vout, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1827 {
1828     vout_sys_t *p_sys = p_vout->p_sys;
1829
1830     /* */
1831     assert( i_order == 0 || i_order == 1 );
1832     assert( i_field == 0 || i_field == 1 );
1833
1834     if( i_order == 0 )
1835     {
1836         /* Duplicate the picture
1837          * TODO when the vout rework is finished, picture_Hold() might be enough
1838          * but becarefull, the pitches must match */
1839         picture_t *p_dup = picture_NewFromFormat( &p_src->format );
1840         if( p_dup )
1841             picture_Copy( p_dup, p_src );
1842
1843         /* Slide the history */
1844         if( p_sys->pp_history[0] )
1845             picture_Release( p_sys->pp_history[0]  );
1846         for( int i = 1; i < HISTORY_SIZE; i++ )
1847             p_sys->pp_history[i-1] = p_sys->pp_history[i];
1848         p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1849     }
1850
1851     /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1852     picture_t *p_prev = p_sys->pp_history[0];
1853     picture_t *p_cur  = p_sys->pp_history[1];
1854     picture_t *p_next = p_sys->pp_history[2];
1855
1856     /* Filter if we have all the pictures we need */
1857     if( p_prev && p_cur && p_next )
1858     {
1859         /* */
1860         void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1861 #if defined(HAVE_YADIF_SSE2)
1862         if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1863             filter = yadif_filter_line_mmx2;
1864         else
1865 #endif
1866             filter = yadif_filter_line_c;
1867
1868         for( int n = 0; n < p_dst->i_planes; n++ )
1869         {
1870             const plane_t *prevp = &p_prev->p[n];
1871             const plane_t *curp  = &p_cur->p[n];
1872             const plane_t *nextp = &p_next->p[n];
1873             plane_t *dstp        = &p_dst->p[n];
1874
1875             for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1876             {
1877                 if( (y % 2) == i_field )
1878                 {
1879                     vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1880                                 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1881                 }
1882                 else
1883                 {
1884                     struct vf_priv_s cfg;
1885                     /* Spatial checks only when enough data */
1886                     cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1887
1888                     assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1889                     filter( &cfg,
1890                             &dstp->p_pixels[y * dstp->i_pitch],
1891                             &prevp->p_pixels[y * prevp->i_pitch],
1892                             &curp->p_pixels[y * curp->i_pitch],
1893                             &nextp->p_pixels[y * nextp->i_pitch],
1894                             dstp->i_visible_pitch,
1895                             curp->i_pitch,
1896                             (i_field ^ (i_order == i_field)) & 1 );
1897                 }
1898
1899                 /* We duplicate the first and last lines */
1900                 if( y == 1 )
1901                     vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1902                 else if( y == dstp->i_visible_lines - 2 )
1903                     vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1904             }
1905         }
1906
1907         /* */
1908         p_dst->date = (p_next->date - p_cur->date) * i_order / 2 + p_cur->date;
1909     }
1910     else
1911     {
1912         /* Fallback to something simple
1913          * XXX it is wrong when we have 2 pictures, we should not output a picture */
1914         RenderX( p_dst, p_src );
1915     }
1916 }
1917
1918 /*****************************************************************************
1919  * FilterCallback: called when changing the deinterlace method on the fly.
1920  *****************************************************************************/
1921 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1922                            vlc_value_t oldval, vlc_value_t newval,
1923                            void *p_data )
1924 {
1925     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1926     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1927     vout_sys_t *p_sys = p_vout->p_sys;
1928
1929     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1930
1931     vlc_mutex_lock( &p_sys->filter_lock );
1932     const bool b_old_half_height = p_sys->b_half_height;
1933
1934     SetFilterMethod( p_vout, newval.psz_string );
1935
1936     if( !b_old_half_height == !p_sys->b_half_height )
1937     {
1938         vlc_mutex_unlock( &p_sys->filter_lock );
1939         return VLC_SUCCESS;
1940     }
1941
1942     /* We need to kill the old vout */
1943     if( p_sys->p_vout )
1944     {
1945         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
1946         vout_CloseAndRelease( p_sys->p_vout );
1947     }
1948
1949     /* Try to open a new video output */
1950     p_sys->p_vout = SpawnRealVout( p_vout );
1951
1952     if( p_sys->p_vout == NULL )
1953     {
1954         /* Everything failed */
1955         msg_Err( p_vout, "cannot open vout, aborting" );
1956
1957         vlc_mutex_unlock( &p_sys->filter_lock );
1958         return VLC_EGENERIC;
1959     }
1960
1961     vout_filter_AddChild( p_vout, p_sys->p_vout, MouseEvent );
1962
1963     vlc_mutex_unlock( &p_sys->filter_lock );
1964     return VLC_SUCCESS;
1965 }
1966
1967 /*****************************************************************************
1968  * video filter2 functions
1969  *****************************************************************************/
1970 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1971 {
1972     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1973     picture_t *p_pic_dst;
1974
1975     /* Request output picture */
1976     p_pic_dst = filter_NewPicture( p_filter );
1977     if( p_pic_dst == NULL )
1978     {
1979         picture_Release( p_pic );
1980         return NULL;
1981     }
1982
1983     switch( p_vout->p_sys->i_mode )
1984     {
1985         case DEINTERLACE_DISCARD:
1986             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1987             break;
1988
1989         case DEINTERLACE_BOB:
1990 #if 0
1991             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1992             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1993             break;
1994 #endif
1995
1996         case DEINTERLACE_LINEAR:
1997 #if 0
1998             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1999             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
2000 #endif
2001             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2002             picture_Release( p_pic_dst );
2003             picture_Release( p_pic );
2004             return NULL;
2005
2006         case DEINTERLACE_MEAN:
2007             RenderMean( p_vout, p_pic_dst, p_pic );
2008             break;
2009
2010         case DEINTERLACE_BLEND:
2011             RenderBlend( p_vout, p_pic_dst, p_pic );
2012             break;
2013
2014         case DEINTERLACE_X:
2015             RenderX( p_pic_dst, p_pic );
2016             break;
2017
2018         case DEINTERLACE_YADIF:
2019             msg_Err( p_vout, "delaying frames is not supported yet" );
2020             picture_Release( p_pic_dst );
2021             picture_Release( p_pic );
2022             return NULL;
2023
2024         case DEINTERLACE_YADIF2X:
2025             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2026             picture_Release( p_pic_dst );
2027             picture_Release( p_pic );
2028             return NULL;
2029     }
2030
2031     picture_CopyProperties( p_pic_dst, p_pic );
2032     p_pic_dst->b_progressive = true;
2033
2034     picture_Release( p_pic );
2035     return p_pic_dst;
2036 }
2037
2038 /*****************************************************************************
2039  * OpenFilter:
2040  *****************************************************************************/
2041 static int OpenFilter( vlc_object_t *p_this )
2042 {
2043     filter_t *p_filter = (filter_t*)p_this;
2044     vout_thread_t *p_vout;
2045     vlc_value_t val;
2046
2047     if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
2048         return VLC_EGENERIC;
2049
2050     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
2051      * by spu filters */
2052     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
2053     vlc_object_attach( p_vout, p_filter );
2054     p_filter->p_sys = (filter_sys_t *)p_vout;
2055     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
2056
2057     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2058                    p_filter->p_cfg );
2059     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
2060
2061     var_Create( p_filter, "filter-deinterlace-mode", VLC_VAR_STRING );
2062     var_Set( p_filter, "filter-deinterlace-mode", val );
2063     free( val.psz_string );
2064
2065     if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
2066     {
2067         vlc_object_detach( p_vout );
2068         vlc_object_release( p_vout );
2069         return VLC_EGENERIC;
2070     }
2071
2072     video_format_t fmt;
2073     GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
2074     if( !p_filter->b_allow_fmt_out_change &&
2075         ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
2076           fmt.i_height != p_filter->fmt_in.video.i_height ) )
2077     {
2078         CloseFilter( VLC_OBJECT(p_filter) );
2079         return VLC_EGENERIC;
2080     }
2081     p_filter->fmt_out.video = fmt;
2082     p_filter->fmt_out.i_codec = fmt.i_chroma;
2083     p_filter->pf_video_filter = Deinterlace;
2084
2085     msg_Dbg( p_filter, "deinterlacing" );
2086
2087     return VLC_SUCCESS;
2088 }
2089
2090 /*****************************************************************************
2091  * CloseFilter: clean up the filter
2092  *****************************************************************************/
2093 static void CloseFilter( vlc_object_t *p_this )
2094 {
2095     filter_t *p_filter = (filter_t*)p_this;
2096     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2097
2098     Destroy( VLC_OBJECT(p_vout) );
2099     vlc_object_detach( p_vout );
2100     vlc_object_release( p_vout );
2101 }
2102