]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c
24678f43644c9914bb83b3bf586928abb504461d
[vlc] / modules / video_filter / deinterlace.c
1 /*****************************************************************************
2  * deinterlace.c : deinterlacer plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2000-2009 the VideoLAN team
5  * $Id$
6  *
7  * Author: Sam Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <assert.h>
33
34 #ifdef HAVE_ALTIVEC_H
35 #   include <altivec.h>
36 #endif
37
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
40 #include <vlc_vout.h>
41 #include <vlc_filter.h>
42 #include <vlc_cpu.h>
43
44 #ifdef CAN_COMPILE_MMXEXT
45 #   include "mmx.h"
46 #endif
47
48 #include "filter_common.h"
49
50 #define DEINTERLACE_DISCARD 1
51 #define DEINTERLACE_MEAN    2
52 #define DEINTERLACE_BLEND   3
53 #define DEINTERLACE_BOB     4
54 #define DEINTERLACE_LINEAR  5
55 #define DEINTERLACE_X       6
56 #define DEINTERLACE_YADIF   7
57 #define DEINTERLACE_YADIF2X 8
58
59 /*****************************************************************************
60  * Local protypes
61  *****************************************************************************/
62 static int  Create    ( vlc_object_t * );
63 static void Destroy   ( vlc_object_t * );
64
65 static int  Init      ( vout_thread_t * );
66 static void End       ( vout_thread_t * );
67 static void Render    ( vout_thread_t *, picture_t * );
68
69 static int  MouseEvent( vlc_object_t *p_this, char const *psz_var,
70                         vlc_value_t oldval, vlc_value_t newval, void *p_data );
71
72 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
73 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
74 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
75 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
76 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
77 static void RenderX      ( picture_t *, picture_t * );
78 static void RenderYadif  ( vout_thread_t *, picture_t *, picture_t *, int, int );
79
80 static void MergeGeneric ( void *, const void *, const void *, size_t );
81 #if defined(CAN_COMPILE_C_ALTIVEC)
82 static void MergeAltivec ( void *, const void *, const void *, size_t );
83 #endif
84 #if defined(CAN_COMPILE_MMXEXT)
85 static void MergeMMXEXT  ( void *, const void *, const void *, size_t );
86 #endif
87 #if defined(CAN_COMPILE_3DNOW)
88 static void Merge3DNow   ( void *, const void *, const void *, size_t );
89 #endif
90 #if defined(CAN_COMPILE_SSE)
91 static void MergeSSE2    ( void *, const void *, const void *, size_t );
92 #endif
93 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
94 static void EndMMX       ( void );
95 #endif
96 #if defined(CAN_COMPILE_3DNOW)
97 static void End3DNow     ( void );
98 #endif
99 #if defined __ARM_NEON__
100 static void MergeNEON (void *, const void *, const void *, size_t);
101 #endif
102
103 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method );
104 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
105
106 static int OpenFilter( vlc_object_t *p_this );
107 static void CloseFilter( vlc_object_t *p_this );
108
109 /*****************************************************************************
110  * Callback prototypes
111  *****************************************************************************/
112 static int FilterCallback( vlc_object_t *, char const *,
113                            vlc_value_t, vlc_value_t, void * );
114
115 /*****************************************************************************
116  * Module descriptor
117  *****************************************************************************/
118 #define MODE_TEXT N_("Deinterlace mode")
119 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
120
121 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
122 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
123
124 #define FILTER_CFG_PREFIX "sout-deinterlace-"
125
126 static const char *const mode_list[] = {
127     "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
128 static const char *const mode_list_text[] = {
129     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
130
131 vlc_module_begin ()
132     set_description( N_("Deinterlacing video filter") )
133     set_shortname( N_("Deinterlace" ))
134     set_capability( "video filter", 0 )
135     set_category( CAT_VIDEO )
136     set_subcategory( SUBCAT_VIDEO_VFILTER )
137
138     set_section( N_("Display"),NULL)
139     add_string( "filter-deinterlace-mode", "discard", NULL, MODE_TEXT,
140                 MODE_LONGTEXT, false )
141         change_string_list( mode_list, mode_list_text, 0 )
142         change_safe ()
143
144     add_shortcut( "deinterlace" )
145     set_callbacks( Create, Destroy )
146
147     add_submodule ()
148     set_capability( "video filter2", 0 )
149     set_section( N_("Streaming"),NULL)
150     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
151                 SOUT_MODE_LONGTEXT, false )
152         change_string_list( mode_list, mode_list_text, 0 )
153     add_shortcut( "deinterlace" )
154     set_callbacks( OpenFilter, CloseFilter )
155 vlc_module_end ()
156
157 static const char *const ppsz_filter_options[] = {
158     "mode", NULL
159 };
160
161 /*****************************************************************************
162  * vout_sys_t: Deinterlace video output method descriptor
163  *****************************************************************************
164  * This structure is part of the video output thread descriptor.
165  * It describes the Deinterlace specific properties of an output thread.
166  *****************************************************************************/
167 #define HISTORY_SIZE (3)
168 struct vout_sys_t
169 {
170     int        i_mode;        /* Deinterlace mode */
171     bool b_double_rate; /* Shall we double the framerate? */
172     bool b_half_height; /* Shall be devide the height by 2 */
173
174     mtime_t    last_date;
175     mtime_t    next_date;
176
177     vout_thread_t *p_vout;
178
179     vlc_mutex_t filter_lock;
180
181     void (*pf_merge) ( void *, const void *, const void *, size_t );
182     void (*pf_end_merge) ( void );
183
184     /* Yadif */
185     picture_t *pp_history[HISTORY_SIZE];
186 };
187
188 /*****************************************************************************
189  * Control: control facility for the vout (forwards to child vout)
190  *****************************************************************************/
191 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
192 {
193     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
194 }
195
196 /*****************************************************************************
197  * Create: allocates Deinterlace video thread output method
198  *****************************************************************************
199  * This function allocates and initializes a Deinterlace vout method.
200  *****************************************************************************/
201 static int Create( vlc_object_t *p_this )
202 {
203     vout_thread_t *p_vout = (vout_thread_t *)p_this;
204     vout_sys_t *p_sys;
205     char *psz_mode;
206
207     /* Allocate structure */
208     p_sys = p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
209     if( p_vout->p_sys == NULL )
210         return VLC_ENOMEM;
211
212     p_vout->pf_init = Init;
213     p_vout->pf_end = End;
214     p_vout->pf_manage = NULL;
215     p_vout->pf_render = Render;
216     p_vout->pf_display = NULL;
217     p_vout->pf_control = Control;
218
219     p_sys->i_mode = DEINTERLACE_DISCARD;
220     p_sys->b_double_rate = false;
221     p_sys->b_half_height = true;
222     p_sys->last_date = 0;
223     p_sys->p_vout = 0;
224     vlc_mutex_init( &p_sys->filter_lock );
225
226 #if defined(CAN_COMPILE_C_ALTIVEC)
227     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
228     {
229         p_sys->pf_merge = MergeAltivec;
230         p_sys->pf_end_merge = NULL;
231     }
232     else
233 #endif
234 #if defined(CAN_COMPILE_SSE)
235     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
236     {
237         p_sys->pf_merge = MergeSSE2;
238         p_sys->pf_end_merge = EndMMX;
239     }
240     else
241 #endif
242 #if defined(CAN_COMPILE_MMXEXT)
243     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
244     {
245         p_sys->pf_merge = MergeMMXEXT;
246         p_sys->pf_end_merge = EndMMX;
247     }
248     else
249 #endif
250 #if defined(CAN_COMPILE_3DNOW)
251     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
252     {
253         p_sys->pf_merge = Merge3DNow;
254         p_sys->pf_end_merge = End3DNow;
255     }
256     else
257 #endif
258 #if defined __ARM_NEON__
259     if( vlc_CPU() & CPU_CAPABILITY_NEON )
260     {
261         p_sys->pf_merge = MergeNEON;
262         p_sys->pf_end_merge = NULL;
263     }
264     else
265 #endif
266     {
267         p_sys->pf_merge = MergeGeneric;
268         p_sys->pf_end_merge = NULL;
269     }
270
271     /* Look what method was requested */
272     psz_mode = var_CreateGetString( p_vout, "filter-deinterlace-mode" );
273
274     if( !psz_mode )
275     {
276         msg_Err( p_vout, "configuration variable filter-deinterlace-mode empty" );
277         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
278
279         psz_mode = strdup( "discard" );
280     }
281
282     SetFilterMethod( p_vout, psz_mode );
283
284     free( psz_mode );
285
286     return VLC_SUCCESS;
287 }
288
289 /*****************************************************************************
290  * SetFilterMethod: setup the deinterlace method to use.
291  *****************************************************************************/
292 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method )
293 {
294     vout_sys_t *p_sys = p_vout->p_sys;
295     if( !strcmp( psz_method, "mean" ) )
296     {
297         p_sys->i_mode = DEINTERLACE_MEAN;
298         p_sys->b_double_rate = false;
299         p_sys->b_half_height = true;
300     }
301     else if( !strcmp( psz_method, "blend" )
302              || !strcmp( psz_method, "average" )
303              || !strcmp( psz_method, "combine-fields" ) )
304     {
305         p_sys->i_mode = DEINTERLACE_BLEND;
306         p_sys->b_double_rate = false;
307         p_sys->b_half_height = false;
308     }
309     else if( !strcmp( psz_method, "bob" )
310              || !strcmp( psz_method, "progressive-scan" ) )
311     {
312         p_sys->i_mode = DEINTERLACE_BOB;
313         p_sys->b_double_rate = true;
314         p_sys->b_half_height = false;
315     }
316     else if( !strcmp( psz_method, "linear" ) )
317     {
318         p_sys->i_mode = DEINTERLACE_LINEAR;
319         p_sys->b_double_rate = true;
320         p_sys->b_half_height = false;
321     }
322     else if( !strcmp( psz_method, "x" ) )
323     {
324         p_sys->i_mode = DEINTERLACE_X;
325         p_sys->b_double_rate = false;
326         p_sys->b_half_height = false;
327     }
328     else if( !strcmp( psz_method, "yadif" ) )
329     {
330         p_sys->i_mode = DEINTERLACE_YADIF;
331         p_sys->b_double_rate = false;
332         p_sys->b_half_height = false;
333     }
334     else if( !strcmp( psz_method, "yadif2x" ) )
335     {
336         p_sys->i_mode = DEINTERLACE_YADIF2X;
337         p_sys->b_double_rate = true;
338         p_sys->b_half_height = false;
339     }
340     else
341     {
342         const bool b_i422 = p_vout->render.i_chroma == VLC_CODEC_I422 ||
343                             p_vout->render.i_chroma == VLC_CODEC_J422;
344         if( strcmp( psz_method, "discard" ) )
345             msg_Err( p_vout, "no valid deinterlace mode provided, "
346                      "using \"discard\"" );
347
348         p_sys->i_mode = DEINTERLACE_DISCARD;
349         p_sys->b_double_rate = false;
350         p_sys->b_half_height = !b_i422;
351     }
352
353     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
354 }
355
356 static void GetOutputFormat( vout_thread_t *p_vout,
357                              video_format_t *p_dst, const video_format_t *p_src )
358 {
359     *p_dst = *p_src;
360
361     if( p_vout->p_sys->b_half_height )
362     {
363         p_dst->i_height /= 2;
364         p_dst->i_visible_height /= 2;
365         p_dst->i_y_offset /= 2;
366         p_dst->i_sar_den *= 2;
367     }
368
369     if( p_src->i_chroma == VLC_CODEC_I422 ||
370         p_src->i_chroma == VLC_CODEC_J422 )
371     {
372         switch( p_vout->p_sys->i_mode )
373         {
374         case DEINTERLACE_MEAN:
375         case DEINTERLACE_LINEAR:
376         case DEINTERLACE_X:
377         case DEINTERLACE_YADIF:
378         case DEINTERLACE_YADIF2X:
379             p_dst->i_chroma = p_src->i_chroma;
380             break;
381         default:
382             p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
383                                                                   VLC_CODEC_J420;
384             break;
385         }
386     }
387 }
388
389 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
390 {
391     return i_chroma == VLC_CODEC_I420 ||
392            i_chroma == VLC_CODEC_J420 ||
393            i_chroma == VLC_CODEC_YV12 ||
394            i_chroma == VLC_CODEC_I422 ||
395            i_chroma == VLC_CODEC_J422;
396 }
397
398 /*****************************************************************************
399  * Init: initialize Deinterlace video thread output method
400  *****************************************************************************/
401 static int Init( vout_thread_t *p_vout )
402 {
403     I_OUTPUTPICTURES = 0;
404
405     if( !IsChromaSupported( p_vout->render.i_chroma ) )
406         return VLC_EGENERIC; /* unknown chroma */
407
408     /* Initialize the output structure, full of directbuffers since we want
409      * the decoder to output directly to our structures. */
410     p_vout->output.i_chroma = p_vout->render.i_chroma;
411     p_vout->output.i_width  = p_vout->render.i_width;
412     p_vout->output.i_height = p_vout->render.i_height;
413     p_vout->output.i_aspect = p_vout->render.i_aspect;
414     p_vout->fmt_out = p_vout->fmt_in;
415
416     /* Try to open the real video output */
417     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
418
419     if( p_vout->p_sys->p_vout == NULL )
420     {
421         /* Everything failed */
422         msg_Err( p_vout, "cannot open vout, aborting" );
423
424         return VLC_EGENERIC;
425     }
426
427     for( int i = 0; i < HISTORY_SIZE; i++ )
428         p_vout->p_sys->pp_history[i] = NULL;
429
430     vout_filter_AllocateDirectBuffers( p_vout, VOUT_MAX_PICTURES );
431
432     vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
433
434     var_AddCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
435
436     return VLC_SUCCESS;
437 }
438
439 /*****************************************************************************
440  * SpawnRealVout: spawn the real video output.
441  *****************************************************************************/
442 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
443 {
444     msg_Dbg( p_vout, "spawning the real video output" );
445
446     video_format_t fmt;
447     GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
448
449     return vout_Create( p_vout, &fmt );
450 }
451
452 /*****************************************************************************
453  * End: terminate Deinterlace video thread output method
454  *****************************************************************************/
455 static void End( vout_thread_t *p_vout )
456 {
457     vout_sys_t *p_sys = p_vout->p_sys;
458
459     var_DelCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
460
461     for( int i = 0; i < HISTORY_SIZE; i++ )
462     {
463         if( p_sys->pp_history[i] )
464             picture_Release( p_sys->pp_history[i] );
465     }
466
467     if( p_sys->p_vout )
468     {
469         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
470         vout_CloseAndRelease( p_sys->p_vout );
471     }
472
473     vout_filter_ReleaseDirectBuffers( p_vout );
474 }
475
476 /*****************************************************************************
477  * Destroy: destroy Deinterlace video thread output method
478  *****************************************************************************
479  * Terminate an output method created by DeinterlaceCreateOutputMethod
480  *****************************************************************************/
481 static void Destroy( vlc_object_t *p_this )
482 {
483     vout_thread_t *p_vout = (vout_thread_t *)p_this;
484     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
485     free( p_vout->p_sys );
486 }
487
488 /**
489  * Forward mouse event with proper conversion.
490  */
491 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
492                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
493 {
494     vout_thread_t *p_vout = p_data;
495     VLC_UNUSED(p_this); VLC_UNUSED(oldval);
496
497     if( !strcmp( psz_var, "mouse-y" ) && p_vout->p_sys->b_half_height )
498         newval.i_int *= 2;
499
500     return var_Set( p_vout, psz_var, newval );
501 }
502
503 /*****************************************************************************
504  * Render: displays previously rendered output
505  *****************************************************************************
506  * This function send the currently rendered image to Deinterlace image,
507  * waits until it is displayed and switch the two rendering buffers, preparing
508  * next frame.
509  *****************************************************************************/
510 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
511 {
512     vout_sys_t *p_sys = p_vout->p_sys;
513     picture_t *pp_outpic[2];
514
515     /* FIXME are they needed ? */
516     p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
517     p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
518     p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
519     p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
520
521     /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
522     p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
523     p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
524     p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
525     p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
526     if( p_vout->p_sys->b_half_height )
527     {
528         p_sys->p_vout->fmt_in.i_y_offset /= 2;
529         p_sys->p_vout->fmt_in.i_visible_height /= 2;
530     }
531
532     if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
533     {
534         p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
535
536         p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
537         p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
538
539         video_format_t fmt = p_vout->fmt_out;
540         if( p_vout->p_sys->b_half_height )
541         {
542             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
543             fmt.i_sar_den *= 2;
544         }
545
546         p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
547     }
548     if( !p_sys->p_vout )
549         return;
550
551     pp_outpic[0] = pp_outpic[1] = NULL;
552
553     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
554
555     /* Get a new picture */
556     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
557                                                 0, 0, 0 ) )
558               == NULL )
559     {
560         if( !vlc_object_alive( p_vout ) || p_vout->b_error )
561         {
562             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
563             return;
564         }
565         msleep( VOUT_OUTMEM_SLEEP );
566     }
567
568     pp_outpic[0]->date = p_pic->date;
569
570     /* If we are using double rate, get an additional new picture */
571     if( p_vout->p_sys->b_double_rate )
572     {
573         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
574                                                  0, 0, 0 ) )
575                   == NULL )
576         {
577             if( !vlc_object_alive( p_vout ) || p_vout->b_error )
578             {
579                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
580                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
581                 return;
582             }
583             msleep( VOUT_OUTMEM_SLEEP );
584         }
585
586         /* 20ms is a bit arbitrary, but it's only for the first image we get */
587         if( !p_vout->p_sys->last_date )
588             pp_outpic[1]->date = p_pic->date + 20000;
589         else
590             pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
591         p_vout->p_sys->last_date = p_pic->date;
592     }
593
594     switch( p_vout->p_sys->i_mode )
595     {
596         case DEINTERLACE_DISCARD:
597             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
598             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
599             break;
600
601         case DEINTERLACE_BOB:
602             RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
603             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
604             RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
605             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
606             break;
607
608         case DEINTERLACE_LINEAR:
609             RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
610             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
611             RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
612             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
613             break;
614
615         case DEINTERLACE_MEAN:
616             RenderMean( p_vout, pp_outpic[0], p_pic );
617             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
618             break;
619
620         case DEINTERLACE_BLEND:
621             RenderBlend( p_vout, pp_outpic[0], p_pic );
622             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
623             break;
624
625         case DEINTERLACE_X:
626             RenderX( pp_outpic[0], p_pic );
627             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
628             break;
629
630         case DEINTERLACE_YADIF:
631             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, 0 );
632             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
633             break;
634
635         case DEINTERLACE_YADIF2X:
636             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, p_pic->b_top_field_first ? 0 : 1 );
637             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
638             RenderYadif( p_vout, pp_outpic[1], p_pic, 1, p_pic->b_top_field_first ? 1 : 0 );
639             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
640             break;
641     }
642     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
643 }
644
645 /*****************************************************************************
646  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
647  *****************************************************************************/
648 static void RenderDiscard( vout_thread_t *p_vout,
649                            picture_t *p_outpic, picture_t *p_pic, int i_field )
650 {
651     int i_plane;
652
653     /* Copy image and skip lines */
654     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
655     {
656         uint8_t *p_in, *p_out_end, *p_out;
657         int i_increment;
658
659         p_in = p_pic->p[i_plane].p_pixels
660                    + i_field * p_pic->p[i_plane].i_pitch;
661
662         p_out = p_outpic->p[i_plane].p_pixels;
663         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
664                              * p_outpic->p[i_plane].i_visible_lines;
665
666         switch( p_vout->render.i_chroma )
667         {
668         case VLC_CODEC_I420:
669         case VLC_CODEC_J420:
670         case VLC_CODEC_YV12:
671
672             for( ; p_out < p_out_end ; )
673             {
674                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
675
676                 p_out += p_outpic->p[i_plane].i_pitch;
677                 p_in += 2 * p_pic->p[i_plane].i_pitch;
678             }
679             break;
680
681         case VLC_CODEC_I422:
682         case VLC_CODEC_J422:
683
684             i_increment = 2 * p_pic->p[i_plane].i_pitch;
685
686             if( i_plane == Y_PLANE )
687             {
688                 for( ; p_out < p_out_end ; )
689                 {
690                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
691                     p_out += p_outpic->p[i_plane].i_pitch;
692                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
693                     p_out += p_outpic->p[i_plane].i_pitch;
694                     p_in += i_increment;
695                 }
696             }
697             else
698             {
699                 for( ; p_out < p_out_end ; )
700                 {
701                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
702                     p_out += p_outpic->p[i_plane].i_pitch;
703                     p_in += i_increment;
704                 }
705             }
706             break;
707
708         default:
709             break;
710         }
711     }
712 }
713
714 /*****************************************************************************
715  * RenderBob: renders a BOB picture - simple copy
716  *****************************************************************************/
717 static void RenderBob( vout_thread_t *p_vout,
718                        picture_t *p_outpic, picture_t *p_pic, int i_field )
719 {
720     int i_plane;
721
722     /* Copy image and skip lines */
723     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
724     {
725         uint8_t *p_in, *p_out_end, *p_out;
726
727         p_in = p_pic->p[i_plane].p_pixels;
728         p_out = p_outpic->p[i_plane].p_pixels;
729         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
730                              * p_outpic->p[i_plane].i_visible_lines;
731
732         switch( p_vout->render.i_chroma )
733         {
734             case VLC_CODEC_I420:
735             case VLC_CODEC_J420:
736             case VLC_CODEC_YV12:
737                 /* For BOTTOM field we need to add the first line */
738                 if( i_field == 1 )
739                 {
740                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
741                     p_in += p_pic->p[i_plane].i_pitch;
742                     p_out += p_outpic->p[i_plane].i_pitch;
743                 }
744
745                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
746
747                 for( ; p_out < p_out_end ; )
748                 {
749                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
750
751                     p_out += p_outpic->p[i_plane].i_pitch;
752
753                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
754
755                     p_in += 2 * p_pic->p[i_plane].i_pitch;
756                     p_out += p_outpic->p[i_plane].i_pitch;
757                 }
758
759                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
760
761                 /* For TOP field we need to add the last line */
762                 if( i_field == 0 )
763                 {
764                     p_in += p_pic->p[i_plane].i_pitch;
765                     p_out += p_outpic->p[i_plane].i_pitch;
766                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
767                 }
768                 break;
769
770             case VLC_CODEC_I422:
771             case VLC_CODEC_J422:
772                 /* For BOTTOM field we need to add the first line */
773                 if( i_field == 1 )
774                 {
775                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
776                     p_in += p_pic->p[i_plane].i_pitch;
777                     p_out += p_outpic->p[i_plane].i_pitch;
778                 }
779
780                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
781
782                 if( i_plane == Y_PLANE )
783                 {
784                     for( ; p_out < p_out_end ; )
785                     {
786                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
787
788                         p_out += p_outpic->p[i_plane].i_pitch;
789
790                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
791
792                         p_in += 2 * p_pic->p[i_plane].i_pitch;
793                         p_out += p_outpic->p[i_plane].i_pitch;
794                     }
795                 }
796                 else
797                 {
798                     for( ; p_out < p_out_end ; )
799                     {
800                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
801
802                         p_out += p_outpic->p[i_plane].i_pitch;
803                         p_in += 2 * p_pic->p[i_plane].i_pitch;
804                     }
805                 }
806
807                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
808
809                 /* For TOP field we need to add the last line */
810                 if( i_field == 0 )
811                 {
812                     p_in += p_pic->p[i_plane].i_pitch;
813                     p_out += p_outpic->p[i_plane].i_pitch;
814                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
815                 }
816                 break;
817         }
818     }
819 }
820
821 #define Merge p_vout->p_sys->pf_merge
822 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
823
824 /*****************************************************************************
825  * RenderLinear: BOB with linear interpolation
826  *****************************************************************************/
827 static void RenderLinear( vout_thread_t *p_vout,
828                           picture_t *p_outpic, picture_t *p_pic, int i_field )
829 {
830     int i_plane;
831
832     /* Copy image and skip lines */
833     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
834     {
835         uint8_t *p_in, *p_out_end, *p_out;
836
837         p_in = p_pic->p[i_plane].p_pixels;
838         p_out = p_outpic->p[i_plane].p_pixels;
839         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
840                              * p_outpic->p[i_plane].i_visible_lines;
841
842         /* For BOTTOM field we need to add the first line */
843         if( i_field == 1 )
844         {
845             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
846             p_in += p_pic->p[i_plane].i_pitch;
847             p_out += p_outpic->p[i_plane].i_pitch;
848         }
849
850         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
851
852         for( ; p_out < p_out_end ; )
853         {
854             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
855
856             p_out += p_outpic->p[i_plane].i_pitch;
857
858             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
859                    p_pic->p[i_plane].i_pitch );
860
861             p_in += 2 * p_pic->p[i_plane].i_pitch;
862             p_out += p_outpic->p[i_plane].i_pitch;
863         }
864
865         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
866
867         /* For TOP field we need to add the last line */
868         if( i_field == 0 )
869         {
870             p_in += p_pic->p[i_plane].i_pitch;
871             p_out += p_outpic->p[i_plane].i_pitch;
872             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
873         }
874     }
875     EndMerge();
876 }
877
878 static void RenderMean( vout_thread_t *p_vout,
879                         picture_t *p_outpic, picture_t *p_pic )
880 {
881     int i_plane;
882
883     /* Copy image and skip lines */
884     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
885     {
886         uint8_t *p_in, *p_out_end, *p_out;
887
888         p_in = p_pic->p[i_plane].p_pixels;
889
890         p_out = p_outpic->p[i_plane].p_pixels;
891         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
892                              * p_outpic->p[i_plane].i_visible_lines;
893
894         /* All lines: mean value */
895         for( ; p_out < p_out_end ; )
896         {
897             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
898                    p_pic->p[i_plane].i_pitch );
899
900             p_out += p_outpic->p[i_plane].i_pitch;
901             p_in += 2 * p_pic->p[i_plane].i_pitch;
902         }
903     }
904     EndMerge();
905 }
906
907 static void RenderBlend( vout_thread_t *p_vout,
908                          picture_t *p_outpic, picture_t *p_pic )
909 {
910     int i_plane;
911
912     /* Copy image and skip lines */
913     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
914     {
915         uint8_t *p_in, *p_out_end, *p_out;
916
917         p_in = p_pic->p[i_plane].p_pixels;
918
919         p_out = p_outpic->p[i_plane].p_pixels;
920         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
921                              * p_outpic->p[i_plane].i_visible_lines;
922
923         switch( p_vout->render.i_chroma )
924         {
925             case VLC_CODEC_I420:
926             case VLC_CODEC_J420:
927             case VLC_CODEC_YV12:
928                 /* First line: simple copy */
929                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
930                 p_out += p_outpic->p[i_plane].i_pitch;
931
932                 /* Remaining lines: mean value */
933                 for( ; p_out < p_out_end ; )
934                 {
935                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
936                            p_pic->p[i_plane].i_pitch );
937
938                     p_out += p_outpic->p[i_plane].i_pitch;
939                     p_in += p_pic->p[i_plane].i_pitch;
940                 }
941                 break;
942
943             case VLC_CODEC_I422:
944             case VLC_CODEC_J422:
945                 /* First line: simple copy */
946                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
947                 p_out += p_outpic->p[i_plane].i_pitch;
948
949                 /* Remaining lines: mean value */
950                 if( i_plane == Y_PLANE )
951                 {
952                     for( ; p_out < p_out_end ; )
953                     {
954                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
955                                p_pic->p[i_plane].i_pitch );
956
957                         p_out += p_outpic->p[i_plane].i_pitch;
958                         p_in += p_pic->p[i_plane].i_pitch;
959                     }
960                 }
961
962                 else
963                 {
964                     for( ; p_out < p_out_end ; )
965                     {
966                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
967                                p_pic->p[i_plane].i_pitch );
968
969                         p_out += p_outpic->p[i_plane].i_pitch;
970                         p_in += 2*p_pic->p[i_plane].i_pitch;
971                     }
972                 }
973                 break;
974         }
975     }
976     EndMerge();
977 }
978
979 #undef Merge
980
981 static void MergeGeneric( void *_p_dest, const void *_p_s1,
982                           const void *_p_s2, size_t i_bytes )
983 {
984     uint8_t* p_dest = (uint8_t*)_p_dest;
985     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
986     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
987     uint8_t* p_end = p_dest + i_bytes - 8;
988
989     while( p_dest < p_end )
990     {
991         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
992         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
993         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
994         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
995         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
996         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
997         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
998         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
999     }
1000
1001     p_end += 8;
1002
1003     while( p_dest < p_end )
1004     {
1005         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1006     }
1007 }
1008
1009 #if defined(CAN_COMPILE_MMXEXT)
1010 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
1011                          size_t i_bytes )
1012 {
1013     uint8_t* p_dest = (uint8_t*)_p_dest;
1014     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1015     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1016     uint8_t* p_end = p_dest + i_bytes - 8;
1017     while( p_dest < p_end )
1018     {
1019         __asm__  __volatile__( "movq %2,%%mm1;"
1020                                "pavgb %1, %%mm1;"
1021                                "movq %%mm1, %0" :"=m" (*p_dest):
1022                                                  "m" (*p_s1),
1023                                                  "m" (*p_s2) );
1024         p_dest += 8;
1025         p_s1 += 8;
1026         p_s2 += 8;
1027     }
1028
1029     p_end += 8;
1030
1031     while( p_dest < p_end )
1032     {
1033         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1034     }
1035 }
1036 #endif
1037
1038 #if defined(CAN_COMPILE_3DNOW)
1039 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
1040                         size_t i_bytes )
1041 {
1042     uint8_t* p_dest = (uint8_t*)_p_dest;
1043     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1044     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1045     uint8_t* p_end = p_dest + i_bytes - 8;
1046     while( p_dest < p_end )
1047     {
1048         __asm__  __volatile__( "movq %2,%%mm1;"
1049                                "pavgusb %1, %%mm1;"
1050                                "movq %%mm1, %0" :"=m" (*p_dest):
1051                                                  "m" (*p_s1),
1052                                                  "m" (*p_s2) );
1053         p_dest += 8;
1054         p_s1 += 8;
1055         p_s2 += 8;
1056     }
1057
1058     p_end += 8;
1059
1060     while( p_dest < p_end )
1061     {
1062         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1063     }
1064 }
1065 #endif
1066
1067 #if defined(CAN_COMPILE_SSE)
1068 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1069                        size_t i_bytes )
1070 {
1071     uint8_t* p_dest = (uint8_t*)_p_dest;
1072     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1073     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1074     uint8_t* p_end;
1075     while( (uintptr_t)p_s1 % 16 )
1076     {
1077         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1078     }
1079     p_end = p_dest + i_bytes - 16;
1080     while( p_dest < p_end )
1081     {
1082         __asm__  __volatile__( "movdqu %2,%%xmm1;"
1083                                "pavgb %1, %%xmm1;"
1084                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
1085                                                  "m" (*p_s1),
1086                                                  "m" (*p_s2) );
1087         p_dest += 16;
1088         p_s1 += 16;
1089         p_s2 += 16;
1090     }
1091
1092     p_end += 16;
1093
1094     while( p_dest < p_end )
1095     {
1096         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1097     }
1098 }
1099 #endif
1100
1101 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1102 static void EndMMX( void )
1103 {
1104     __asm__ __volatile__( "emms" :: );
1105 }
1106 #endif
1107
1108 #if defined(CAN_COMPILE_3DNOW)
1109 static void End3DNow( void )
1110 {
1111     __asm__ __volatile__( "femms" :: );
1112 }
1113 #endif
1114
1115 #ifdef CAN_COMPILE_C_ALTIVEC
1116 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1117                           const void *_p_s2, size_t i_bytes )
1118 {
1119     uint8_t *p_dest = (uint8_t *)_p_dest;
1120     uint8_t *p_s1   = (uint8_t *)_p_s1;
1121     uint8_t *p_s2   = (uint8_t *)_p_s2;
1122     uint8_t *p_end  = p_dest + i_bytes - 15;
1123
1124     /* Use C until the first 16-bytes aligned destination pixel */
1125     while( (uintptr_t)p_dest & 0xF )
1126     {
1127         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1128     }
1129
1130     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1131     {
1132         /* Unaligned source */
1133         vector unsigned char s1v, s2v, destv;
1134         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1135         vector unsigned char perm1v, perm2v;
1136
1137         perm1v = vec_lvsl( 0, p_s1 );
1138         perm2v = vec_lvsl( 0, p_s2 );
1139         s1oldv = vec_ld( 0, p_s1 );
1140         s2oldv = vec_ld( 0, p_s2 );
1141
1142         while( p_dest < p_end )
1143         {
1144             s1newv = vec_ld( 16, p_s1 );
1145             s2newv = vec_ld( 16, p_s2 );
1146             s1v    = vec_perm( s1oldv, s1newv, perm1v );
1147             s2v    = vec_perm( s2oldv, s2newv, perm2v );
1148             s1oldv = s1newv;
1149             s2oldv = s2newv;
1150             destv  = vec_avg( s1v, s2v );
1151             vec_st( destv, 0, p_dest );
1152
1153             p_s1   += 16;
1154             p_s2   += 16;
1155             p_dest += 16;
1156         }
1157     }
1158     else
1159     {
1160         /* Aligned source */
1161         vector unsigned char s1v, s2v, destv;
1162
1163         while( p_dest < p_end )
1164         {
1165             s1v   = vec_ld( 0, p_s1 );
1166             s2v   = vec_ld( 0, p_s2 );
1167             destv = vec_avg( s1v, s2v );
1168             vec_st( destv, 0, p_dest );
1169
1170             p_s1   += 16;
1171             p_s2   += 16;
1172             p_dest += 16;
1173         }
1174     }
1175
1176     p_end += 15;
1177
1178     while( p_dest < p_end )
1179     {
1180         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1181     }
1182 }
1183 #endif
1184
1185 #ifdef __ARM_NEON__
1186 static void MergeNEON (void *restrict out, const void *in1,
1187                        const void *in2, size_t n)
1188 {
1189     uint8_t *outp = out;
1190     const uint8_t *in1p = in1;
1191     const uint8_t *in2p = in2;
1192     size_t mis = ((uintptr_t)outp) & 15;
1193
1194     if (mis)
1195     {
1196         MergeGeneric (outp, in1p, in2p, mis);
1197         outp += mis;
1198         in1p += mis;
1199         in2p += mis;
1200         n -= mis;
1201     }
1202
1203     uint8_t *end = outp + (n & ~15);
1204
1205     if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
1206         while (outp < end)
1207             asm volatile (
1208                 "vld1.u8  {q0-q1}, [%[in1]]!\n"
1209                 "vld1.u8  {q2-q3}, [%[in2]]!\n"
1210                 "vhadd.u8 q4, q0, q2\n"
1211                 "vld1.u8  {q6-q7}, [%[in1]]!\n"
1212                 "vhadd.u8 q5, q1, q3\n"
1213                 "vld1.u8  {q8-q9}, [%[in2]]!\n"
1214                 "vhadd.u8 q10, q6, q8\n"
1215                 "vhadd.u8 q11, q7, q9\n"
1216                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1217                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1218                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1219                 :
1220                 : "q0", "q1", "q2", "memory");
1221     else
1222          while (outp < end)
1223             asm volatile (
1224                 "vld1.u8  {q0-q1}, [%[in1],:128]!\n"
1225                 "vld1.u8  {q2-q3}, [%[in2],:128]!\n"
1226                 "vhadd.u8 q4, q0, q2\n"
1227                 "vld1.u8  {q6-q7}, [%[in1],:128]!\n"
1228                 "vhadd.u8 q5, q1, q3\n"
1229                 "vld1.u8  {q8-q9}, [%[in2],:128]!\n"
1230                 "vhadd.u8 q10, q6, q8\n"
1231                 "vhadd.u8 q11, q7, q9\n"
1232                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1233                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1234                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1235                 :
1236                 : "q0", "q1", "q2", "memory");
1237     n &= 15;
1238     if (n)
1239         MergeGeneric (outp, in1p, in2p, n);
1240 }
1241 #endif
1242
1243 /*****************************************************************************
1244  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1245  * and apply a process to recreate the bottom field :
1246  *  If a 8x8 block is classified as :
1247  *   - progressive: it applies a small blend (1,6,1)
1248  *   - interlaced:
1249  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1250  *    good match we use MC to recreate the bottom field (with a small
1251  *    blend (1,6,1) )
1252  *    * otherwise: it recreates the bottom field by an edge oriented
1253  *    interpolation.
1254   *****************************************************************************/
1255
1256 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1257  * XXX: It need to access to 8x10
1258  * We use more than 8 lines to help with scrolling (text)
1259  * (and because XDeint8x8Frame use line 9)
1260  * XXX: smooth/uniform area with noise detection doesn't works well
1261  * but it's not really a problem because they don't have much details anyway
1262  */
1263 static inline int ssd( int a ) { return a*a; }
1264 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1265 {
1266     int y, x;
1267     int ff, fr;
1268     int fc;
1269
1270     /* Detect interlacing */
1271     fc = 0;
1272     for( y = 0; y < 7; y += 2 )
1273     {
1274         ff = fr = 0;
1275         for( x = 0; x < 8; x++ )
1276         {
1277             fr += ssd(src[      x] - src[1*i_src+x]) +
1278                   ssd(src[i_src+x] - src[2*i_src+x]);
1279             ff += ssd(src[      x] - src[2*i_src+x]) +
1280                   ssd(src[i_src+x] - src[3*i_src+x]);
1281         }
1282         if( ff < 6*fr/8 && fr > 32 )
1283             fc++;
1284
1285         src += 2*i_src;
1286     }
1287
1288     return fc < 1 ? false : true;
1289 }
1290 #ifdef CAN_COMPILE_MMXEXT
1291 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1292 {
1293
1294     int y, x;
1295     int32_t ff, fr;
1296     int fc;
1297
1298     /* Detect interlacing */
1299     fc = 0;
1300     pxor_r2r( mm7, mm7 );
1301     for( y = 0; y < 9; y += 2 )
1302     {
1303         ff = fr = 0;
1304         pxor_r2r( mm5, mm5 );
1305         pxor_r2r( mm6, mm6 );
1306         for( x = 0; x < 8; x+=4 )
1307         {
1308             movd_m2r( src[        x], mm0 );
1309             movd_m2r( src[1*i_src+x], mm1 );
1310             movd_m2r( src[2*i_src+x], mm2 );
1311             movd_m2r( src[3*i_src+x], mm3 );
1312
1313             punpcklbw_r2r( mm7, mm0 );
1314             punpcklbw_r2r( mm7, mm1 );
1315             punpcklbw_r2r( mm7, mm2 );
1316             punpcklbw_r2r( mm7, mm3 );
1317
1318             movq_r2r( mm0, mm4 );
1319
1320             psubw_r2r( mm1, mm0 );
1321             psubw_r2r( mm2, mm4 );
1322
1323             psubw_r2r( mm1, mm2 );
1324             psubw_r2r( mm1, mm3 );
1325
1326             pmaddwd_r2r( mm0, mm0 );
1327             pmaddwd_r2r( mm4, mm4 );
1328             pmaddwd_r2r( mm2, mm2 );
1329             pmaddwd_r2r( mm3, mm3 );
1330             paddd_r2r( mm0, mm2 );
1331             paddd_r2r( mm4, mm3 );
1332             paddd_r2r( mm2, mm5 );
1333             paddd_r2r( mm3, mm6 );
1334         }
1335
1336         movq_r2r( mm5, mm0 );
1337         psrlq_i2r( 32, mm0 );
1338         paddd_r2r( mm0, mm5 );
1339         movd_r2m( mm5, fr );
1340
1341         movq_r2r( mm6, mm0 );
1342         psrlq_i2r( 32, mm0 );
1343         paddd_r2r( mm0, mm6 );
1344         movd_r2m( mm6, ff );
1345
1346         if( ff < 6*fr/8 && fr > 32 )
1347             fc++;
1348
1349         src += 2*i_src;
1350     }
1351     return fc;
1352 }
1353 #endif
1354
1355 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1356                                     uint8_t *src1, int i_src1,
1357                                     uint8_t *src2, int i_src2 )
1358 {
1359     int y, x;
1360
1361     /* Progressive */
1362     for( y = 0; y < 8; y += 2 )
1363     {
1364         memcpy( dst, src1, 8 );
1365         dst  += i_dst;
1366
1367         for( x = 0; x < 8; x++ )
1368             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1369         dst += i_dst;
1370
1371         src1 += i_src1;
1372         src2 += i_src2;
1373     }
1374 }
1375
1376 #ifdef CAN_COMPILE_MMXEXT
1377 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1378                                          uint8_t *src1, int i_src1,
1379                                          uint8_t *src2, int i_src2 )
1380 {
1381     static const uint64_t m_4 = INT64_C(0x0004000400040004);
1382     int y, x;
1383
1384     /* Progressive */
1385     pxor_r2r( mm7, mm7 );
1386     for( y = 0; y < 8; y += 2 )
1387     {
1388         for( x = 0; x < 8; x +=4 )
1389         {
1390             movd_m2r( src1[x], mm0 );
1391             movd_r2m( mm0, dst[x] );
1392
1393             movd_m2r( src2[x], mm1 );
1394             movd_m2r( src1[i_src1+x], mm2 );
1395
1396             punpcklbw_r2r( mm7, mm0 );
1397             punpcklbw_r2r( mm7, mm1 );
1398             punpcklbw_r2r( mm7, mm2 );
1399             paddw_r2r( mm1, mm1 );
1400             movq_r2r( mm1, mm3 );
1401             paddw_r2r( mm3, mm3 );
1402             paddw_r2r( mm2, mm0 );
1403             paddw_r2r( mm3, mm1 );
1404             paddw_m2r( m_4, mm1 );
1405             paddw_r2r( mm1, mm0 );
1406             psraw_i2r( 3, mm0 );
1407             packuswb_r2r( mm7, mm0 );
1408             movd_r2m( mm0, dst[i_dst+x] );
1409         }
1410         dst += 2*i_dst;
1411         src1 += i_src1;
1412         src2 += i_src2;
1413     }
1414 }
1415
1416 #endif
1417
1418 /* For debug */
1419 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1420 {
1421     int y;
1422     for( y = 0; y < 8; y++ )
1423         memset( &dst[y*i_dst], v, 8 );
1424 }
1425
1426 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1427  * neighbour
1428  * (Use 8x9 pixels)
1429  * TODO: a better one for the inner part.
1430  */
1431 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1432                                      uint8_t *src, int i_src )
1433 {
1434     int y, x;
1435
1436     /* Interlaced */
1437     for( y = 0; y < 8; y += 2 )
1438     {
1439         memcpy( dst, src, 8 );
1440         dst += i_dst;
1441
1442         for( x = 0; x < 8; x++ )
1443             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1444         dst += 1*i_dst;
1445         src += 2*i_src;
1446     }
1447 }
1448 #ifdef CAN_COMPILE_MMXEXT
1449 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1450                                           uint8_t *src, int i_src )
1451 {
1452     int y;
1453
1454     /* Interlaced */
1455     for( y = 0; y < 8; y += 2 )
1456     {
1457         movq_m2r( src[0], mm0 );
1458         movq_r2m( mm0, dst[0] );
1459         dst += i_dst;
1460
1461         movq_m2r( src[2*i_src], mm1 );
1462         pavgb_r2r( mm1, mm0 );
1463
1464         movq_r2m( mm0, dst[0] );
1465
1466         dst += 1*i_dst;
1467         src += 2*i_src;
1468     }
1469 }
1470 #endif
1471
1472 /* XDeint8x8Field: Edge oriented interpolation
1473  * (Need -4 and +5 pixels H, +1 line)
1474  */
1475 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1476                                     uint8_t *src, int i_src )
1477 {
1478     int y, x;
1479
1480     /* Interlaced */
1481     for( y = 0; y < 8; y += 2 )
1482     {
1483         memcpy( dst, src, 8 );
1484         dst += i_dst;
1485
1486         for( x = 0; x < 8; x++ )
1487         {
1488             uint8_t *src2 = &src[2*i_src];
1489             /* I use 8 pixels just to match the MMX version, but it's overkill
1490              * 5 would be enough (less isn't good) */
1491             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1492                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1493                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1494                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1495
1496             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1497                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1498                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1499                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1500
1501             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1502                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1503                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1504                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1505
1506             if( c0 < c1 && c1 <= c2 )
1507                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1508             else if( c2 < c1 && c1 <= c0 )
1509                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1510             else
1511                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1512         }
1513
1514         dst += 1*i_dst;
1515         src += 2*i_src;
1516     }
1517 }
1518 #ifdef CAN_COMPILE_MMXEXT
1519 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1520                                          uint8_t *src, int i_src )
1521 {
1522     int y, x;
1523
1524     /* Interlaced */
1525     for( y = 0; y < 8; y += 2 )
1526     {
1527         memcpy( dst, src, 8 );
1528         dst += i_dst;
1529
1530         for( x = 0; x < 8; x++ )
1531         {
1532             uint8_t *src2 = &src[2*i_src];
1533             int32_t c0, c1, c2;
1534
1535             movq_m2r( src[x-2], mm0 );
1536             movq_m2r( src[x-3], mm1 );
1537             movq_m2r( src[x-4], mm2 );
1538
1539             psadbw_m2r( src2[x-4], mm0 );
1540             psadbw_m2r( src2[x-3], mm1 );
1541             psadbw_m2r( src2[x-2], mm2 );
1542
1543             movd_r2m( mm0, c2 );
1544             movd_r2m( mm1, c1 );
1545             movd_r2m( mm2, c0 );
1546
1547             if( c0 < c1 && c1 <= c2 )
1548                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1549             else if( c2 < c1 && c1 <= c0 )
1550                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1551             else
1552                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1553         }
1554
1555         dst += 1*i_dst;
1556         src += 2*i_src;
1557     }
1558 }
1559 #endif
1560
1561 /* NxN arbitray size (and then only use pixel in the NxN block)
1562  */
1563 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1564                                    int i_height, int i_width )
1565 {
1566     int y, x;
1567     int ff, fr;
1568     int fc;
1569
1570
1571     /* Detect interlacing */
1572     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1573     ff = fr = 0;
1574     fc = 0;
1575     for( y = 0; y < i_height - 2; y += 2 )
1576     {
1577         const uint8_t *s = &src[y*i_src];
1578         for( x = 0; x < i_width; x++ )
1579         {
1580             fr += ssd(s[      x] - s[1*i_src+x]);
1581             ff += ssd(s[      x] - s[2*i_src+x]);
1582         }
1583         if( ff < fr && fr > i_width / 2 )
1584             fc++;
1585     }
1586
1587     return fc < 2 ? false : true;
1588 }
1589
1590 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1591                                    uint8_t *src, int i_src,
1592                                    int i_width, int i_height )
1593 {
1594     int y, x;
1595
1596     /* Progressive */
1597     for( y = 0; y < i_height; y += 2 )
1598     {
1599         memcpy( dst, src, i_width );
1600         dst += i_dst;
1601
1602         if( y < i_height - 2 )
1603         {
1604             for( x = 0; x < i_width; x++ )
1605                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1606         }
1607         else
1608         {
1609             /* Blend last line */
1610             for( x = 0; x < i_width; x++ )
1611                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1612         }
1613         dst += 1*i_dst;
1614         src += 2*i_src;
1615     }
1616 }
1617
1618 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1619                                    uint8_t *src, int i_src,
1620                                    int i_width, int i_height )
1621 {
1622     int y, x;
1623
1624     /* Interlaced */
1625     for( y = 0; y < i_height; y += 2 )
1626     {
1627         memcpy( dst, src, i_width );
1628         dst += i_dst;
1629
1630         if( y < i_height - 2 )
1631         {
1632             for( x = 0; x < i_width; x++ )
1633                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1634         }
1635         else
1636         {
1637             /* Blend last line */
1638             for( x = 0; x < i_width; x++ )
1639                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1640         }
1641         dst += 1*i_dst;
1642         src += 2*i_src;
1643     }
1644 }
1645
1646 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1647                               int i_width, int i_height )
1648 {
1649     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1650         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1651     else
1652         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1653 }
1654
1655
1656 static inline int median( int a, int b, int c )
1657 {
1658     int min = a, max =a;
1659     if( b < min )
1660         min = b;
1661     else
1662         max = b;
1663
1664     if( c < min )
1665         min = c;
1666     else if( c > max )
1667         max = c;
1668
1669     return a + b + c - min - max;
1670 }
1671
1672
1673 /* XDeintBand8x8:
1674  */
1675 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1676                                    uint8_t *src, int i_src,
1677                                    const int i_mbx, int i_modx )
1678 {
1679     int x;
1680
1681     for( x = 0; x < i_mbx; x++ )
1682     {
1683         int s;
1684         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1685         {
1686             if( x == 0 || x == i_mbx - 1 )
1687                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1688             else
1689                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1690         }
1691         else
1692         {
1693             XDeint8x8MergeC( dst, i_dst,
1694                              &src[0*i_src], 2*i_src,
1695                              &src[1*i_src], 2*i_src );
1696         }
1697
1698         dst += 8;
1699         src += 8;
1700     }
1701
1702     if( i_modx )
1703         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1704 }
1705 #ifdef CAN_COMPILE_MMXEXT
1706 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1707                                         uint8_t *src, int i_src,
1708                                         const int i_mbx, int i_modx )
1709 {
1710     int x;
1711
1712     /* Reset current line */
1713     for( x = 0; x < i_mbx; x++ )
1714     {
1715         int s;
1716         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1717         {
1718             if( x == 0 || x == i_mbx - 1 )
1719                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1720             else
1721                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1722         }
1723         else
1724         {
1725             XDeint8x8MergeMMXEXT( dst, i_dst,
1726                                   &src[0*i_src], 2*i_src,
1727                                   &src[1*i_src], 2*i_src );
1728         }
1729
1730         dst += 8;
1731         src += 8;
1732     }
1733
1734     if( i_modx )
1735         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1736 }
1737 #endif
1738
1739 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1740 {
1741     int i_plane;
1742
1743     /* Copy image and skip lines */
1744     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1745     {
1746         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1747         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1748
1749         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1750         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1751
1752         const int i_dst = p_outpic->p[i_plane].i_pitch;
1753         const int i_src = p_pic->p[i_plane].i_pitch;
1754
1755         int y, x;
1756
1757         for( y = 0; y < i_mby; y++ )
1758         {
1759             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1760             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1761
1762 #ifdef CAN_COMPILE_MMXEXT
1763             if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1764                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1765             else
1766 #endif
1767                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1768         }
1769
1770         /* Last line (C only)*/
1771         if( i_mody )
1772         {
1773             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1774             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1775
1776             for( x = 0; x < i_mbx; x++ )
1777             {
1778                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1779
1780                 dst += 8;
1781                 src += 8;
1782             }
1783
1784             if( i_modx )
1785                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1786         }
1787     }
1788
1789 #ifdef CAN_COMPILE_MMXEXT
1790     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1791         emms();
1792 #endif
1793 }
1794
1795 /*****************************************************************************
1796  * Yadif (Yet Another DeInterlacing Filter).
1797  *****************************************************************************/
1798 /* */
1799 struct vf_priv_s {
1800     /*
1801      * 0: Output 1 frame for each frame.
1802      * 1: Output 1 frame for each field.
1803      * 2: Like 0 but skips spatial interlacing check.
1804      * 3: Like 1 but skips spatial interlacing check.
1805      *
1806      * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1807      */
1808     int mode;
1809 };
1810
1811 /* I am unsure it is the right one */
1812 typedef intptr_t x86_reg;
1813
1814 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1815 #define FFMAX(a,b)      __MAX(a,b)
1816 #define FFMAX3(a,b,c)   FFMAX(FFMAX(a,b),c)
1817 #define FFMIN(a,b)      __MIN(a,b)
1818 #define FFMIN3(a,b,c)   FFMIN(FFMIN(a,b),c)
1819
1820 /* yadif.h comes from vf_yadif.c of mplayer project */
1821 #include "yadif.h"
1822
1823 static void RenderYadif( vout_thread_t *p_vout, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1824 {
1825     vout_sys_t *p_sys = p_vout->p_sys;
1826
1827     /* */
1828     assert( i_order == 0 || i_order == 1 );
1829     assert( i_field == 0 || i_field == 1 );
1830
1831     if( i_order == 0 )
1832     {
1833         /* Duplicate the picture
1834          * TODO when the vout rework is finished, picture_Hold() might be enough
1835          * but becarefull, the pitches must match */
1836         picture_t *p_dup = picture_NewFromFormat( &p_src->format );
1837         if( p_dup )
1838             picture_Copy( p_dup, p_src );
1839
1840         /* Slide the history */
1841         if( p_sys->pp_history[0] )
1842             picture_Release( p_sys->pp_history[0]  );
1843         for( int i = 1; i < HISTORY_SIZE; i++ )
1844             p_sys->pp_history[i-1] = p_sys->pp_history[i];
1845         p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1846     }
1847
1848     /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1849     picture_t *p_prev = p_sys->pp_history[0];
1850     picture_t *p_cur  = p_sys->pp_history[1];
1851     picture_t *p_next = p_sys->pp_history[2];
1852
1853     /* Filter if we have all the pictures we need */
1854     if( p_prev && p_cur && p_next )
1855     {
1856         /* */
1857         void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1858 #if defined(HAVE_YADIF_SSE2)
1859         if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1860             filter = yadif_filter_line_mmx2;
1861         else
1862 #endif
1863             filter = yadif_filter_line_c;
1864
1865         for( int n = 0; n < p_dst->i_planes; n++ )
1866         {
1867             const plane_t *prevp = &p_prev->p[n];
1868             const plane_t *curp  = &p_cur->p[n];
1869             const plane_t *nextp = &p_next->p[n];
1870             plane_t *dstp        = &p_dst->p[n];
1871
1872             for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1873             {
1874                 if( (y % 2) == i_field )
1875                 {
1876                     vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1877                                 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1878                 }
1879                 else
1880                 {
1881                     struct vf_priv_s cfg;
1882                     /* Spatial checks only when enough data */
1883                     cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1884
1885                     assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1886                     filter( &cfg,
1887                             &dstp->p_pixels[y * dstp->i_pitch],
1888                             &prevp->p_pixels[y * prevp->i_pitch],
1889                             &curp->p_pixels[y * curp->i_pitch],
1890                             &nextp->p_pixels[y * nextp->i_pitch],
1891                             dstp->i_visible_pitch,
1892                             curp->i_pitch,
1893                             (i_field ^ (i_order == i_field)) & 1 );
1894                 }
1895
1896                 /* We duplicate the first and last lines */
1897                 if( y == 1 )
1898                     vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1899                 else if( y == dstp->i_visible_lines - 2 )
1900                     vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1901             }
1902         }
1903
1904         /* */
1905         p_dst->date = (p_next->date - p_cur->date) * i_order / 2 + p_cur->date;
1906     }
1907     else
1908     {
1909         /* Fallback to something simple
1910          * XXX it is wrong when we have 2 pictures, we should not output a picture */
1911         RenderX( p_dst, p_src );
1912     }
1913 }
1914
1915 /*****************************************************************************
1916  * FilterCallback: called when changing the deinterlace method on the fly.
1917  *****************************************************************************/
1918 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1919                            vlc_value_t oldval, vlc_value_t newval,
1920                            void *p_data )
1921 {
1922     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1923     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1924     vout_sys_t *p_sys = p_vout->p_sys;
1925
1926     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1927
1928     vlc_mutex_lock( &p_sys->filter_lock );
1929     const bool b_old_half_height = p_sys->b_half_height;
1930
1931     SetFilterMethod( p_vout, newval.psz_string );
1932
1933     if( !b_old_half_height == !p_sys->b_half_height )
1934     {
1935         vlc_mutex_unlock( &p_sys->filter_lock );
1936         return VLC_SUCCESS;
1937     }
1938
1939     /* We need to kill the old vout */
1940     if( p_sys->p_vout )
1941     {
1942         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
1943         vout_CloseAndRelease( p_sys->p_vout );
1944     }
1945
1946     /* Try to open a new video output */
1947     p_sys->p_vout = SpawnRealVout( p_vout );
1948
1949     if( p_sys->p_vout == NULL )
1950     {
1951         /* Everything failed */
1952         msg_Err( p_vout, "cannot open vout, aborting" );
1953
1954         vlc_mutex_unlock( &p_sys->filter_lock );
1955         return VLC_EGENERIC;
1956     }
1957
1958     vout_filter_AddChild( p_vout, p_sys->p_vout, MouseEvent );
1959
1960     vlc_mutex_unlock( &p_sys->filter_lock );
1961     return VLC_SUCCESS;
1962 }
1963
1964 /*****************************************************************************
1965  * video filter2 functions
1966  *****************************************************************************/
1967 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1968 {
1969     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1970     picture_t *p_pic_dst;
1971
1972     /* Request output picture */
1973     p_pic_dst = filter_NewPicture( p_filter );
1974     if( p_pic_dst == NULL )
1975     {
1976         picture_Release( p_pic );
1977         return NULL;
1978     }
1979
1980     switch( p_vout->p_sys->i_mode )
1981     {
1982         case DEINTERLACE_DISCARD:
1983             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1984             break;
1985
1986         case DEINTERLACE_BOB:
1987 #if 0
1988             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1989             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1990             break;
1991 #endif
1992
1993         case DEINTERLACE_LINEAR:
1994 #if 0
1995             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1996             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
1997 #endif
1998             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
1999             picture_Release( p_pic_dst );
2000             picture_Release( p_pic );
2001             return NULL;
2002
2003         case DEINTERLACE_MEAN:
2004             RenderMean( p_vout, p_pic_dst, p_pic );
2005             break;
2006
2007         case DEINTERLACE_BLEND:
2008             RenderBlend( p_vout, p_pic_dst, p_pic );
2009             break;
2010
2011         case DEINTERLACE_X:
2012             RenderX( p_pic_dst, p_pic );
2013             break;
2014
2015         case DEINTERLACE_YADIF:
2016             msg_Err( p_vout, "delaying frames is not supported yet" );
2017             picture_Release( p_pic_dst );
2018             picture_Release( p_pic );
2019             return NULL;
2020
2021         case DEINTERLACE_YADIF2X:
2022             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2023             picture_Release( p_pic_dst );
2024             picture_Release( p_pic );
2025             return NULL;
2026     }
2027
2028     picture_CopyProperties( p_pic_dst, p_pic );
2029     p_pic_dst->b_progressive = true;
2030
2031     picture_Release( p_pic );
2032     return p_pic_dst;
2033 }
2034
2035 /*****************************************************************************
2036  * OpenFilter:
2037  *****************************************************************************/
2038 static int OpenFilter( vlc_object_t *p_this )
2039 {
2040     filter_t *p_filter = (filter_t*)p_this;
2041     vout_thread_t *p_vout;
2042     vlc_value_t val;
2043
2044     if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
2045         return VLC_EGENERIC;
2046
2047     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
2048      * by spu filters */
2049     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
2050     vlc_object_attach( p_vout, p_filter );
2051     p_filter->p_sys = (filter_sys_t *)p_vout;
2052     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
2053
2054     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2055                    p_filter->p_cfg );
2056     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
2057
2058     var_Create( p_filter, "filter-deinterlace-mode", VLC_VAR_STRING );
2059     var_Set( p_filter, "filter-deinterlace-mode", val );
2060     free( val.psz_string );
2061
2062     if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
2063     {
2064         vlc_object_detach( p_vout );
2065         vlc_object_release( p_vout );
2066         return VLC_EGENERIC;
2067     }
2068
2069     video_format_t fmt;
2070     GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
2071     if( !p_filter->b_allow_fmt_out_change &&
2072         ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
2073           fmt.i_height != p_filter->fmt_in.video.i_height ) )
2074     {
2075         CloseFilter( VLC_OBJECT(p_filter) );
2076         return VLC_EGENERIC;
2077     }
2078     p_filter->fmt_out.video = fmt;
2079     p_filter->fmt_out.i_codec = fmt.i_chroma;
2080     p_filter->pf_video_filter = Deinterlace;
2081
2082     msg_Dbg( p_filter, "deinterlacing" );
2083
2084     return VLC_SUCCESS;
2085 }
2086
2087 /*****************************************************************************
2088  * CloseFilter: clean up the filter
2089  *****************************************************************************/
2090 static void CloseFilter( vlc_object_t *p_this )
2091 {
2092     filter_t *p_filter = (filter_t*)p_this;
2093     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2094
2095     Destroy( VLC_OBJECT(p_vout) );
2096     vlc_object_detach( p_vout );
2097     vlc_object_release( p_vout );
2098 }
2099