]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c
Make mouse-moved and mouse-clicked coordinates, remove mouse-x and -y
[vlc] / modules / video_filter / deinterlace.c
1 /*****************************************************************************
2  * deinterlace.c : deinterlacer plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2000-2009 the VideoLAN team
5  * $Id$
6  *
7  * Author: Sam Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <assert.h>
33
34 #ifdef HAVE_ALTIVEC_H
35 #   include <altivec.h>
36 #endif
37
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
40 #include <vlc_vout.h>
41 #include <vlc_filter.h>
42 #include <vlc_cpu.h>
43
44 #ifdef CAN_COMPILE_MMXEXT
45 #   include "mmx.h"
46 #endif
47
48 #include "filter_common.h"
49
50 #define DEINTERLACE_DISCARD 1
51 #define DEINTERLACE_MEAN    2
52 #define DEINTERLACE_BLEND   3
53 #define DEINTERLACE_BOB     4
54 #define DEINTERLACE_LINEAR  5
55 #define DEINTERLACE_X       6
56 #define DEINTERLACE_YADIF   7
57 #define DEINTERLACE_YADIF2X 8
58
59 /*****************************************************************************
60  * Local protypes
61  *****************************************************************************/
62 static int  Create    ( vlc_object_t * );
63 static void Destroy   ( vlc_object_t * );
64
65 static int  Init      ( vout_thread_t * );
66 static void End       ( vout_thread_t * );
67 static void Render    ( vout_thread_t *, picture_t * );
68
69 static int  MouseEvent( vlc_object_t *p_this, char const *psz_var,
70                         vlc_value_t oldval, vlc_value_t newval, void *p_data );
71
72 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
73 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
74 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
75 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
76 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
77 static void RenderX      ( picture_t *, picture_t * );
78 static void RenderYadif  ( vout_thread_t *, picture_t *, picture_t *, int, int );
79
80 static void MergeGeneric ( void *, const void *, const void *, size_t );
81 #if defined(CAN_COMPILE_C_ALTIVEC)
82 static void MergeAltivec ( void *, const void *, const void *, size_t );
83 #endif
84 #if defined(CAN_COMPILE_MMXEXT)
85 static void MergeMMXEXT  ( void *, const void *, const void *, size_t );
86 #endif
87 #if defined(CAN_COMPILE_3DNOW)
88 static void Merge3DNow   ( void *, const void *, const void *, size_t );
89 #endif
90 #if defined(CAN_COMPILE_SSE)
91 static void MergeSSE2    ( void *, const void *, const void *, size_t );
92 #endif
93 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
94 static void EndMMX       ( void );
95 #endif
96 #if defined(CAN_COMPILE_3DNOW)
97 static void End3DNow     ( void );
98 #endif
99 #if defined __ARM_NEON__
100 static void MergeNEON (void *, const void *, const void *, size_t);
101 #endif
102
103 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method );
104 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
105
106 static int OpenFilter( vlc_object_t *p_this );
107 static void CloseFilter( vlc_object_t *p_this );
108
109 /*****************************************************************************
110  * Callback prototypes
111  *****************************************************************************/
112 static int FilterCallback( vlc_object_t *, char const *,
113                            vlc_value_t, vlc_value_t, void * );
114
115 /*****************************************************************************
116  * Module descriptor
117  *****************************************************************************/
118 #define MODE_TEXT N_("Deinterlace mode")
119 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
120
121 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
122 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
123
124 #define FILTER_CFG_PREFIX "sout-deinterlace-"
125
126 static const char *const mode_list[] = {
127     "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
128 static const char *const mode_list_text[] = {
129     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
130
131 vlc_module_begin ()
132     set_description( N_("Deinterlacing video filter") )
133     set_shortname( N_("Deinterlace" ))
134     set_capability( "video filter", 0 )
135     set_category( CAT_VIDEO )
136     set_subcategory( SUBCAT_VIDEO_VFILTER )
137
138     set_section( N_("Display"),NULL)
139     add_string( "filter-deinterlace-mode", "discard", NULL, MODE_TEXT,
140                 MODE_LONGTEXT, false )
141         change_string_list( mode_list, mode_list_text, 0 )
142         change_safe ()
143
144     add_shortcut( "deinterlace" )
145     set_callbacks( Create, Destroy )
146
147     add_submodule ()
148     set_capability( "video filter2", 0 )
149     set_section( N_("Streaming"),NULL)
150     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
151                 SOUT_MODE_LONGTEXT, false )
152         change_string_list( mode_list, mode_list_text, 0 )
153     add_shortcut( "deinterlace" )
154     set_callbacks( OpenFilter, CloseFilter )
155 vlc_module_end ()
156
157 static const char *const ppsz_filter_options[] = {
158     "mode", NULL
159 };
160
161 /*****************************************************************************
162  * vout_sys_t: Deinterlace video output method descriptor
163  *****************************************************************************
164  * This structure is part of the video output thread descriptor.
165  * It describes the Deinterlace specific properties of an output thread.
166  *****************************************************************************/
167 #define HISTORY_SIZE (3)
168 struct vout_sys_t
169 {
170     int        i_mode;        /* Deinterlace mode */
171     bool b_double_rate; /* Shall we double the framerate? */
172     bool b_half_height; /* Shall be devide the height by 2 */
173
174     mtime_t    last_date;
175     mtime_t    next_date;
176
177     vout_thread_t *p_vout;
178
179     vlc_mutex_t filter_lock;
180
181     void (*pf_merge) ( void *, const void *, const void *, size_t );
182     void (*pf_end_merge) ( void );
183
184     /* Yadif */
185     picture_t *pp_history[HISTORY_SIZE];
186 };
187
188 /*****************************************************************************
189  * Control: control facility for the vout (forwards to child vout)
190  *****************************************************************************/
191 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
192 {
193     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
194 }
195
196 /*****************************************************************************
197  * Create: allocates Deinterlace video thread output method
198  *****************************************************************************
199  * This function allocates and initializes a Deinterlace vout method.
200  *****************************************************************************/
201 static int Create( vlc_object_t *p_this )
202 {
203     vout_thread_t *p_vout = (vout_thread_t *)p_this;
204     vout_sys_t *p_sys;
205     char *psz_mode;
206
207     /* Allocate structure */
208     p_sys = p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
209     if( p_vout->p_sys == NULL )
210         return VLC_ENOMEM;
211
212     p_vout->pf_init = Init;
213     p_vout->pf_end = End;
214     p_vout->pf_manage = NULL;
215     p_vout->pf_render = Render;
216     p_vout->pf_display = NULL;
217     p_vout->pf_control = Control;
218
219     p_sys->i_mode = DEINTERLACE_DISCARD;
220     p_sys->b_double_rate = false;
221     p_sys->b_half_height = true;
222     p_sys->last_date = 0;
223     p_sys->p_vout = 0;
224     vlc_mutex_init( &p_sys->filter_lock );
225
226 #if defined(CAN_COMPILE_C_ALTIVEC)
227     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
228     {
229         p_sys->pf_merge = MergeAltivec;
230         p_sys->pf_end_merge = NULL;
231     }
232     else
233 #endif
234 #if defined(CAN_COMPILE_SSE)
235     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
236     {
237         p_sys->pf_merge = MergeSSE2;
238         p_sys->pf_end_merge = EndMMX;
239     }
240     else
241 #endif
242 #if defined(CAN_COMPILE_MMXEXT)
243     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
244     {
245         p_sys->pf_merge = MergeMMXEXT;
246         p_sys->pf_end_merge = EndMMX;
247     }
248     else
249 #endif
250 #if defined(CAN_COMPILE_3DNOW)
251     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
252     {
253         p_sys->pf_merge = Merge3DNow;
254         p_sys->pf_end_merge = End3DNow;
255     }
256     else
257 #endif
258 #if defined __ARM_NEON__
259     if( vlc_CPU() & CPU_CAPABILITY_NEON )
260     {
261         p_sys->pf_merge = MergeNEON;
262         p_sys->pf_end_merge = NULL;
263     }
264     else
265 #endif
266     {
267         p_sys->pf_merge = MergeGeneric;
268         p_sys->pf_end_merge = NULL;
269     }
270
271     /* Look what method was requested */
272     psz_mode = var_CreateGetString( p_vout, "filter-deinterlace-mode" );
273
274     if( !psz_mode )
275     {
276         msg_Err( p_vout, "configuration variable filter-deinterlace-mode empty" );
277         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
278
279         psz_mode = strdup( "discard" );
280     }
281
282     SetFilterMethod( p_vout, psz_mode );
283
284     free( psz_mode );
285
286     return VLC_SUCCESS;
287 }
288
289 /*****************************************************************************
290  * SetFilterMethod: setup the deinterlace method to use.
291  *****************************************************************************/
292 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method )
293 {
294     vout_sys_t *p_sys = p_vout->p_sys;
295     if( !strcmp( psz_method, "mean" ) )
296     {
297         p_sys->i_mode = DEINTERLACE_MEAN;
298         p_sys->b_double_rate = false;
299         p_sys->b_half_height = true;
300     }
301     else if( !strcmp( psz_method, "blend" )
302              || !strcmp( psz_method, "average" )
303              || !strcmp( psz_method, "combine-fields" ) )
304     {
305         p_sys->i_mode = DEINTERLACE_BLEND;
306         p_sys->b_double_rate = false;
307         p_sys->b_half_height = false;
308     }
309     else if( !strcmp( psz_method, "bob" )
310              || !strcmp( psz_method, "progressive-scan" ) )
311     {
312         p_sys->i_mode = DEINTERLACE_BOB;
313         p_sys->b_double_rate = true;
314         p_sys->b_half_height = false;
315     }
316     else if( !strcmp( psz_method, "linear" ) )
317     {
318         p_sys->i_mode = DEINTERLACE_LINEAR;
319         p_sys->b_double_rate = true;
320         p_sys->b_half_height = false;
321     }
322     else if( !strcmp( psz_method, "x" ) )
323     {
324         p_sys->i_mode = DEINTERLACE_X;
325         p_sys->b_double_rate = false;
326         p_sys->b_half_height = false;
327     }
328     else if( !strcmp( psz_method, "yadif" ) )
329     {
330         p_sys->i_mode = DEINTERLACE_YADIF;
331         p_sys->b_double_rate = false;
332         p_sys->b_half_height = false;
333     }
334     else if( !strcmp( psz_method, "yadif2x" ) )
335     {
336         p_sys->i_mode = DEINTERLACE_YADIF2X;
337         p_sys->b_double_rate = true;
338         p_sys->b_half_height = false;
339     }
340     else
341     {
342         const bool b_i422 = p_vout->render.i_chroma == VLC_CODEC_I422 ||
343                             p_vout->render.i_chroma == VLC_CODEC_J422;
344         if( strcmp( psz_method, "discard" ) )
345             msg_Err( p_vout, "no valid deinterlace mode provided, "
346                      "using \"discard\"" );
347
348         p_sys->i_mode = DEINTERLACE_DISCARD;
349         p_sys->b_double_rate = false;
350         p_sys->b_half_height = !b_i422;
351     }
352
353     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
354 }
355
356 static void GetOutputFormat( vout_thread_t *p_vout,
357                              video_format_t *p_dst, const video_format_t *p_src )
358 {
359     *p_dst = *p_src;
360
361     if( p_vout->p_sys->b_half_height )
362     {
363         p_dst->i_height /= 2;
364         p_dst->i_visible_height /= 2;
365         p_dst->i_y_offset /= 2;
366         p_dst->i_sar_den *= 2;
367     }
368
369     if( p_src->i_chroma == VLC_CODEC_I422 ||
370         p_src->i_chroma == VLC_CODEC_J422 )
371     {
372         switch( p_vout->p_sys->i_mode )
373         {
374         case DEINTERLACE_MEAN:
375         case DEINTERLACE_LINEAR:
376         case DEINTERLACE_X:
377         case DEINTERLACE_YADIF:
378         case DEINTERLACE_YADIF2X:
379             p_dst->i_chroma = p_src->i_chroma;
380             break;
381         default:
382             p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
383                                                                   VLC_CODEC_J420;
384             break;
385         }
386     }
387 }
388
389 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
390 {
391     return i_chroma == VLC_CODEC_I420 ||
392            i_chroma == VLC_CODEC_J420 ||
393            i_chroma == VLC_CODEC_YV12 ||
394            i_chroma == VLC_CODEC_I422 ||
395            i_chroma == VLC_CODEC_J422;
396 }
397
398 /*****************************************************************************
399  * Init: initialize Deinterlace video thread output method
400  *****************************************************************************/
401 static int Init( vout_thread_t *p_vout )
402 {
403     I_OUTPUTPICTURES = 0;
404
405     if( !IsChromaSupported( p_vout->render.i_chroma ) )
406         return VLC_EGENERIC; /* unknown chroma */
407
408     /* Initialize the output structure, full of directbuffers since we want
409      * the decoder to output directly to our structures. */
410     p_vout->output.i_chroma = p_vout->render.i_chroma;
411     p_vout->output.i_width  = p_vout->render.i_width;
412     p_vout->output.i_height = p_vout->render.i_height;
413     p_vout->output.i_aspect = p_vout->render.i_aspect;
414     p_vout->fmt_out = p_vout->fmt_in;
415
416     /* Try to open the real video output */
417     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
418
419     if( p_vout->p_sys->p_vout == NULL )
420     {
421         /* Everything failed */
422         msg_Err( p_vout, "cannot open vout, aborting" );
423
424         return VLC_EGENERIC;
425     }
426
427     for( int i = 0; i < HISTORY_SIZE; i++ )
428         p_vout->p_sys->pp_history[i] = NULL;
429
430     vout_filter_AllocateDirectBuffers( p_vout, VOUT_MAX_PICTURES );
431
432     vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
433
434     var_AddCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
435
436     return VLC_SUCCESS;
437 }
438
439 /*****************************************************************************
440  * SpawnRealVout: spawn the real video output.
441  *****************************************************************************/
442 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
443 {
444     msg_Dbg( p_vout, "spawning the real video output" );
445
446     video_format_t fmt;
447     GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
448
449     return vout_Create( p_vout, &fmt );
450 }
451
452 /*****************************************************************************
453  * End: terminate Deinterlace video thread output method
454  *****************************************************************************/
455 static void End( vout_thread_t *p_vout )
456 {
457     vout_sys_t *p_sys = p_vout->p_sys;
458
459     var_DelCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
460
461     for( int i = 0; i < HISTORY_SIZE; i++ )
462     {
463         if( p_sys->pp_history[i] )
464             picture_Release( p_sys->pp_history[i] );
465     }
466
467     if( p_sys->p_vout )
468     {
469         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
470         vout_CloseAndRelease( p_sys->p_vout );
471     }
472
473     vout_filter_ReleaseDirectBuffers( p_vout );
474 }
475
476 /*****************************************************************************
477  * Destroy: destroy Deinterlace video thread output method
478  *****************************************************************************
479  * Terminate an output method created by DeinterlaceCreateOutputMethod
480  *****************************************************************************/
481 static void Destroy( vlc_object_t *p_this )
482 {
483     vout_thread_t *p_vout = (vout_thread_t *)p_this;
484     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
485     free( p_vout->p_sys );
486 }
487
488 /**
489  * Forward mouse event with proper conversion.
490  */
491 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
492                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
493 {
494     vout_thread_t *p_vout = p_data;
495     VLC_UNUSED(p_this); VLC_UNUSED(oldval);
496
497     if( !strcmp( psz_var, "mouse-button-down" ) )
498         return var_SetChecked( p_vout, psz_var, VLC_VAR_INTEGER, newval );
499
500     if( p_vout->p_sys->b_half_height )
501         newval.coords.y *= 2;
502     return var_SetChecked( p_vout, psz_var, VLC_VAR_COORDS, newval );
503 }
504
505 /*****************************************************************************
506  * Render: displays previously rendered output
507  *****************************************************************************
508  * This function send the currently rendered image to Deinterlace image,
509  * waits until it is displayed and switch the two rendering buffers, preparing
510  * next frame.
511  *****************************************************************************/
512 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
513 {
514     vout_sys_t *p_sys = p_vout->p_sys;
515     picture_t *pp_outpic[2];
516
517     /* FIXME are they needed ? */
518     p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
519     p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
520     p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
521     p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
522
523     /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
524     p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
525     p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
526     p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
527     p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
528     if( p_vout->p_sys->b_half_height )
529     {
530         p_sys->p_vout->fmt_in.i_y_offset /= 2;
531         p_sys->p_vout->fmt_in.i_visible_height /= 2;
532     }
533
534     if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
535     {
536         p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
537
538         p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
539         p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
540
541         video_format_t fmt = p_vout->fmt_out;
542         if( p_vout->p_sys->b_half_height )
543         {
544             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
545             fmt.i_sar_den *= 2;
546         }
547
548         p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
549     }
550     if( !p_sys->p_vout )
551         return;
552
553     pp_outpic[0] = pp_outpic[1] = NULL;
554
555     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
556
557     /* Get a new picture */
558     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
559                                                 0, 0, 0 ) )
560               == NULL )
561     {
562         if( !vlc_object_alive( p_vout ) || p_vout->b_error )
563         {
564             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
565             return;
566         }
567         msleep( VOUT_OUTMEM_SLEEP );
568     }
569
570     pp_outpic[0]->date = p_pic->date;
571
572     /* If we are using double rate, get an additional new picture */
573     if( p_vout->p_sys->b_double_rate )
574     {
575         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
576                                                  0, 0, 0 ) )
577                   == NULL )
578         {
579             if( !vlc_object_alive( p_vout ) || p_vout->b_error )
580             {
581                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
582                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
583                 return;
584             }
585             msleep( VOUT_OUTMEM_SLEEP );
586         }
587
588         /* 20ms is a bit arbitrary, but it's only for the first image we get */
589         if( !p_vout->p_sys->last_date )
590             pp_outpic[1]->date = p_pic->date + 20000;
591         else
592             pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
593         p_vout->p_sys->last_date = p_pic->date;
594     }
595
596     switch( p_vout->p_sys->i_mode )
597     {
598         case DEINTERLACE_DISCARD:
599             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
600             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
601             break;
602
603         case DEINTERLACE_BOB:
604             RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
605             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
606             RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
607             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
608             break;
609
610         case DEINTERLACE_LINEAR:
611             RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
612             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
613             RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
614             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
615             break;
616
617         case DEINTERLACE_MEAN:
618             RenderMean( p_vout, pp_outpic[0], p_pic );
619             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
620             break;
621
622         case DEINTERLACE_BLEND:
623             RenderBlend( p_vout, pp_outpic[0], p_pic );
624             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
625             break;
626
627         case DEINTERLACE_X:
628             RenderX( pp_outpic[0], p_pic );
629             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
630             break;
631
632         case DEINTERLACE_YADIF:
633             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, 0 );
634             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
635             break;
636
637         case DEINTERLACE_YADIF2X:
638             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, p_pic->b_top_field_first ? 0 : 1 );
639             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
640             RenderYadif( p_vout, pp_outpic[1], p_pic, 1, p_pic->b_top_field_first ? 1 : 0 );
641             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
642             break;
643     }
644     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
645 }
646
647 /*****************************************************************************
648  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
649  *****************************************************************************/
650 static void RenderDiscard( vout_thread_t *p_vout,
651                            picture_t *p_outpic, picture_t *p_pic, int i_field )
652 {
653     int i_plane;
654
655     /* Copy image and skip lines */
656     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
657     {
658         uint8_t *p_in, *p_out_end, *p_out;
659         int i_increment;
660
661         p_in = p_pic->p[i_plane].p_pixels
662                    + i_field * p_pic->p[i_plane].i_pitch;
663
664         p_out = p_outpic->p[i_plane].p_pixels;
665         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
666                              * p_outpic->p[i_plane].i_visible_lines;
667
668         switch( p_vout->render.i_chroma )
669         {
670         case VLC_CODEC_I420:
671         case VLC_CODEC_J420:
672         case VLC_CODEC_YV12:
673
674             for( ; p_out < p_out_end ; )
675             {
676                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
677
678                 p_out += p_outpic->p[i_plane].i_pitch;
679                 p_in += 2 * p_pic->p[i_plane].i_pitch;
680             }
681             break;
682
683         case VLC_CODEC_I422:
684         case VLC_CODEC_J422:
685
686             i_increment = 2 * p_pic->p[i_plane].i_pitch;
687
688             if( i_plane == Y_PLANE )
689             {
690                 for( ; p_out < p_out_end ; )
691                 {
692                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
693                     p_out += p_outpic->p[i_plane].i_pitch;
694                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
695                     p_out += p_outpic->p[i_plane].i_pitch;
696                     p_in += i_increment;
697                 }
698             }
699             else
700             {
701                 for( ; p_out < p_out_end ; )
702                 {
703                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
704                     p_out += p_outpic->p[i_plane].i_pitch;
705                     p_in += i_increment;
706                 }
707             }
708             break;
709
710         default:
711             break;
712         }
713     }
714 }
715
716 /*****************************************************************************
717  * RenderBob: renders a BOB picture - simple copy
718  *****************************************************************************/
719 static void RenderBob( vout_thread_t *p_vout,
720                        picture_t *p_outpic, picture_t *p_pic, int i_field )
721 {
722     int i_plane;
723
724     /* Copy image and skip lines */
725     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
726     {
727         uint8_t *p_in, *p_out_end, *p_out;
728
729         p_in = p_pic->p[i_plane].p_pixels;
730         p_out = p_outpic->p[i_plane].p_pixels;
731         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
732                              * p_outpic->p[i_plane].i_visible_lines;
733
734         switch( p_vout->render.i_chroma )
735         {
736             case VLC_CODEC_I420:
737             case VLC_CODEC_J420:
738             case VLC_CODEC_YV12:
739                 /* For BOTTOM field we need to add the first line */
740                 if( i_field == 1 )
741                 {
742                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
743                     p_in += p_pic->p[i_plane].i_pitch;
744                     p_out += p_outpic->p[i_plane].i_pitch;
745                 }
746
747                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
748
749                 for( ; p_out < p_out_end ; )
750                 {
751                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
752
753                     p_out += p_outpic->p[i_plane].i_pitch;
754
755                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
756
757                     p_in += 2 * p_pic->p[i_plane].i_pitch;
758                     p_out += p_outpic->p[i_plane].i_pitch;
759                 }
760
761                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
762
763                 /* For TOP field we need to add the last line */
764                 if( i_field == 0 )
765                 {
766                     p_in += p_pic->p[i_plane].i_pitch;
767                     p_out += p_outpic->p[i_plane].i_pitch;
768                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
769                 }
770                 break;
771
772             case VLC_CODEC_I422:
773             case VLC_CODEC_J422:
774                 /* For BOTTOM field we need to add the first line */
775                 if( i_field == 1 )
776                 {
777                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
778                     p_in += p_pic->p[i_plane].i_pitch;
779                     p_out += p_outpic->p[i_plane].i_pitch;
780                 }
781
782                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
783
784                 if( i_plane == Y_PLANE )
785                 {
786                     for( ; p_out < p_out_end ; )
787                     {
788                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
789
790                         p_out += p_outpic->p[i_plane].i_pitch;
791
792                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
793
794                         p_in += 2 * p_pic->p[i_plane].i_pitch;
795                         p_out += p_outpic->p[i_plane].i_pitch;
796                     }
797                 }
798                 else
799                 {
800                     for( ; p_out < p_out_end ; )
801                     {
802                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
803
804                         p_out += p_outpic->p[i_plane].i_pitch;
805                         p_in += 2 * p_pic->p[i_plane].i_pitch;
806                     }
807                 }
808
809                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
810
811                 /* For TOP field we need to add the last line */
812                 if( i_field == 0 )
813                 {
814                     p_in += p_pic->p[i_plane].i_pitch;
815                     p_out += p_outpic->p[i_plane].i_pitch;
816                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
817                 }
818                 break;
819         }
820     }
821 }
822
823 #define Merge p_vout->p_sys->pf_merge
824 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
825
826 /*****************************************************************************
827  * RenderLinear: BOB with linear interpolation
828  *****************************************************************************/
829 static void RenderLinear( vout_thread_t *p_vout,
830                           picture_t *p_outpic, picture_t *p_pic, int i_field )
831 {
832     int i_plane;
833
834     /* Copy image and skip lines */
835     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
836     {
837         uint8_t *p_in, *p_out_end, *p_out;
838
839         p_in = p_pic->p[i_plane].p_pixels;
840         p_out = p_outpic->p[i_plane].p_pixels;
841         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
842                              * p_outpic->p[i_plane].i_visible_lines;
843
844         /* For BOTTOM field we need to add the first line */
845         if( i_field == 1 )
846         {
847             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
848             p_in += p_pic->p[i_plane].i_pitch;
849             p_out += p_outpic->p[i_plane].i_pitch;
850         }
851
852         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
853
854         for( ; p_out < p_out_end ; )
855         {
856             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
857
858             p_out += p_outpic->p[i_plane].i_pitch;
859
860             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
861                    p_pic->p[i_plane].i_pitch );
862
863             p_in += 2 * p_pic->p[i_plane].i_pitch;
864             p_out += p_outpic->p[i_plane].i_pitch;
865         }
866
867         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
868
869         /* For TOP field we need to add the last line */
870         if( i_field == 0 )
871         {
872             p_in += p_pic->p[i_plane].i_pitch;
873             p_out += p_outpic->p[i_plane].i_pitch;
874             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
875         }
876     }
877     EndMerge();
878 }
879
880 static void RenderMean( vout_thread_t *p_vout,
881                         picture_t *p_outpic, picture_t *p_pic )
882 {
883     int i_plane;
884
885     /* Copy image and skip lines */
886     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
887     {
888         uint8_t *p_in, *p_out_end, *p_out;
889
890         p_in = p_pic->p[i_plane].p_pixels;
891
892         p_out = p_outpic->p[i_plane].p_pixels;
893         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
894                              * p_outpic->p[i_plane].i_visible_lines;
895
896         /* All lines: mean value */
897         for( ; p_out < p_out_end ; )
898         {
899             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
900                    p_pic->p[i_plane].i_pitch );
901
902             p_out += p_outpic->p[i_plane].i_pitch;
903             p_in += 2 * p_pic->p[i_plane].i_pitch;
904         }
905     }
906     EndMerge();
907 }
908
909 static void RenderBlend( vout_thread_t *p_vout,
910                          picture_t *p_outpic, picture_t *p_pic )
911 {
912     int i_plane;
913
914     /* Copy image and skip lines */
915     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
916     {
917         uint8_t *p_in, *p_out_end, *p_out;
918
919         p_in = p_pic->p[i_plane].p_pixels;
920
921         p_out = p_outpic->p[i_plane].p_pixels;
922         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
923                              * p_outpic->p[i_plane].i_visible_lines;
924
925         switch( p_vout->render.i_chroma )
926         {
927             case VLC_CODEC_I420:
928             case VLC_CODEC_J420:
929             case VLC_CODEC_YV12:
930                 /* First line: simple copy */
931                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
932                 p_out += p_outpic->p[i_plane].i_pitch;
933
934                 /* Remaining lines: mean value */
935                 for( ; p_out < p_out_end ; )
936                 {
937                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
938                            p_pic->p[i_plane].i_pitch );
939
940                     p_out += p_outpic->p[i_plane].i_pitch;
941                     p_in += p_pic->p[i_plane].i_pitch;
942                 }
943                 break;
944
945             case VLC_CODEC_I422:
946             case VLC_CODEC_J422:
947                 /* First line: simple copy */
948                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
949                 p_out += p_outpic->p[i_plane].i_pitch;
950
951                 /* Remaining lines: mean value */
952                 if( i_plane == Y_PLANE )
953                 {
954                     for( ; p_out < p_out_end ; )
955                     {
956                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
957                                p_pic->p[i_plane].i_pitch );
958
959                         p_out += p_outpic->p[i_plane].i_pitch;
960                         p_in += p_pic->p[i_plane].i_pitch;
961                     }
962                 }
963
964                 else
965                 {
966                     for( ; p_out < p_out_end ; )
967                     {
968                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
969                                p_pic->p[i_plane].i_pitch );
970
971                         p_out += p_outpic->p[i_plane].i_pitch;
972                         p_in += 2*p_pic->p[i_plane].i_pitch;
973                     }
974                 }
975                 break;
976         }
977     }
978     EndMerge();
979 }
980
981 #undef Merge
982
983 static void MergeGeneric( void *_p_dest, const void *_p_s1,
984                           const void *_p_s2, size_t i_bytes )
985 {
986     uint8_t* p_dest = (uint8_t*)_p_dest;
987     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
988     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
989     uint8_t* p_end = p_dest + i_bytes - 8;
990
991     while( p_dest < p_end )
992     {
993         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
994         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
995         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
996         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
997         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
998         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
999         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1000         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1001     }
1002
1003     p_end += 8;
1004
1005     while( p_dest < p_end )
1006     {
1007         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1008     }
1009 }
1010
1011 #if defined(CAN_COMPILE_MMXEXT)
1012 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
1013                          size_t i_bytes )
1014 {
1015     uint8_t* p_dest = (uint8_t*)_p_dest;
1016     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1017     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1018     uint8_t* p_end = p_dest + i_bytes - 8;
1019     while( p_dest < p_end )
1020     {
1021         __asm__  __volatile__( "movq %2,%%mm1;"
1022                                "pavgb %1, %%mm1;"
1023                                "movq %%mm1, %0" :"=m" (*p_dest):
1024                                                  "m" (*p_s1),
1025                                                  "m" (*p_s2) );
1026         p_dest += 8;
1027         p_s1 += 8;
1028         p_s2 += 8;
1029     }
1030
1031     p_end += 8;
1032
1033     while( p_dest < p_end )
1034     {
1035         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1036     }
1037 }
1038 #endif
1039
1040 #if defined(CAN_COMPILE_3DNOW)
1041 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
1042                         size_t i_bytes )
1043 {
1044     uint8_t* p_dest = (uint8_t*)_p_dest;
1045     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1046     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1047     uint8_t* p_end = p_dest + i_bytes - 8;
1048     while( p_dest < p_end )
1049     {
1050         __asm__  __volatile__( "movq %2,%%mm1;"
1051                                "pavgusb %1, %%mm1;"
1052                                "movq %%mm1, %0" :"=m" (*p_dest):
1053                                                  "m" (*p_s1),
1054                                                  "m" (*p_s2) );
1055         p_dest += 8;
1056         p_s1 += 8;
1057         p_s2 += 8;
1058     }
1059
1060     p_end += 8;
1061
1062     while( p_dest < p_end )
1063     {
1064         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1065     }
1066 }
1067 #endif
1068
1069 #if defined(CAN_COMPILE_SSE)
1070 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1071                        size_t i_bytes )
1072 {
1073     uint8_t* p_dest = (uint8_t*)_p_dest;
1074     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1075     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1076     uint8_t* p_end;
1077     while( (uintptr_t)p_s1 % 16 )
1078     {
1079         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1080     }
1081     p_end = p_dest + i_bytes - 16;
1082     while( p_dest < p_end )
1083     {
1084         __asm__  __volatile__( "movdqu %2,%%xmm1;"
1085                                "pavgb %1, %%xmm1;"
1086                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
1087                                                  "m" (*p_s1),
1088                                                  "m" (*p_s2) );
1089         p_dest += 16;
1090         p_s1 += 16;
1091         p_s2 += 16;
1092     }
1093
1094     p_end += 16;
1095
1096     while( p_dest < p_end )
1097     {
1098         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1099     }
1100 }
1101 #endif
1102
1103 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1104 static void EndMMX( void )
1105 {
1106     __asm__ __volatile__( "emms" :: );
1107 }
1108 #endif
1109
1110 #if defined(CAN_COMPILE_3DNOW)
1111 static void End3DNow( void )
1112 {
1113     __asm__ __volatile__( "femms" :: );
1114 }
1115 #endif
1116
1117 #ifdef CAN_COMPILE_C_ALTIVEC
1118 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1119                           const void *_p_s2, size_t i_bytes )
1120 {
1121     uint8_t *p_dest = (uint8_t *)_p_dest;
1122     uint8_t *p_s1   = (uint8_t *)_p_s1;
1123     uint8_t *p_s2   = (uint8_t *)_p_s2;
1124     uint8_t *p_end  = p_dest + i_bytes - 15;
1125
1126     /* Use C until the first 16-bytes aligned destination pixel */
1127     while( (uintptr_t)p_dest & 0xF )
1128     {
1129         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1130     }
1131
1132     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1133     {
1134         /* Unaligned source */
1135         vector unsigned char s1v, s2v, destv;
1136         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1137         vector unsigned char perm1v, perm2v;
1138
1139         perm1v = vec_lvsl( 0, p_s1 );
1140         perm2v = vec_lvsl( 0, p_s2 );
1141         s1oldv = vec_ld( 0, p_s1 );
1142         s2oldv = vec_ld( 0, p_s2 );
1143
1144         while( p_dest < p_end )
1145         {
1146             s1newv = vec_ld( 16, p_s1 );
1147             s2newv = vec_ld( 16, p_s2 );
1148             s1v    = vec_perm( s1oldv, s1newv, perm1v );
1149             s2v    = vec_perm( s2oldv, s2newv, perm2v );
1150             s1oldv = s1newv;
1151             s2oldv = s2newv;
1152             destv  = vec_avg( s1v, s2v );
1153             vec_st( destv, 0, p_dest );
1154
1155             p_s1   += 16;
1156             p_s2   += 16;
1157             p_dest += 16;
1158         }
1159     }
1160     else
1161     {
1162         /* Aligned source */
1163         vector unsigned char s1v, s2v, destv;
1164
1165         while( p_dest < p_end )
1166         {
1167             s1v   = vec_ld( 0, p_s1 );
1168             s2v   = vec_ld( 0, p_s2 );
1169             destv = vec_avg( s1v, s2v );
1170             vec_st( destv, 0, p_dest );
1171
1172             p_s1   += 16;
1173             p_s2   += 16;
1174             p_dest += 16;
1175         }
1176     }
1177
1178     p_end += 15;
1179
1180     while( p_dest < p_end )
1181     {
1182         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1183     }
1184 }
1185 #endif
1186
1187 #ifdef __ARM_NEON__
1188 static void MergeNEON (void *restrict out, const void *in1,
1189                        const void *in2, size_t n)
1190 {
1191     uint8_t *outp = out;
1192     const uint8_t *in1p = in1;
1193     const uint8_t *in2p = in2;
1194     size_t mis = ((uintptr_t)outp) & 15;
1195
1196     if (mis)
1197     {
1198         MergeGeneric (outp, in1p, in2p, mis);
1199         outp += mis;
1200         in1p += mis;
1201         in2p += mis;
1202         n -= mis;
1203     }
1204
1205     uint8_t *end = outp + (n & ~15);
1206
1207     if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
1208         while (outp < end)
1209             asm volatile (
1210                 "vld1.u8  {q0-q1}, [%[in1]]!\n"
1211                 "vld1.u8  {q2-q3}, [%[in2]]!\n"
1212                 "vhadd.u8 q4, q0, q2\n"
1213                 "vld1.u8  {q6-q7}, [%[in1]]!\n"
1214                 "vhadd.u8 q5, q1, q3\n"
1215                 "vld1.u8  {q8-q9}, [%[in2]]!\n"
1216                 "vhadd.u8 q10, q6, q8\n"
1217                 "vhadd.u8 q11, q7, q9\n"
1218                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1219                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1220                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1221                 :
1222                 : "q0", "q1", "q2", "memory");
1223     else
1224          while (outp < end)
1225             asm volatile (
1226                 "vld1.u8  {q0-q1}, [%[in1],:128]!\n"
1227                 "vld1.u8  {q2-q3}, [%[in2],:128]!\n"
1228                 "vhadd.u8 q4, q0, q2\n"
1229                 "vld1.u8  {q6-q7}, [%[in1],:128]!\n"
1230                 "vhadd.u8 q5, q1, q3\n"
1231                 "vld1.u8  {q8-q9}, [%[in2],:128]!\n"
1232                 "vhadd.u8 q10, q6, q8\n"
1233                 "vhadd.u8 q11, q7, q9\n"
1234                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1235                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1236                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1237                 :
1238                 : "q0", "q1", "q2", "memory");
1239     n &= 15;
1240     if (n)
1241         MergeGeneric (outp, in1p, in2p, n);
1242 }
1243 #endif
1244
1245 /*****************************************************************************
1246  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1247  * and apply a process to recreate the bottom field :
1248  *  If a 8x8 block is classified as :
1249  *   - progressive: it applies a small blend (1,6,1)
1250  *   - interlaced:
1251  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1252  *    good match we use MC to recreate the bottom field (with a small
1253  *    blend (1,6,1) )
1254  *    * otherwise: it recreates the bottom field by an edge oriented
1255  *    interpolation.
1256   *****************************************************************************/
1257
1258 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1259  * XXX: It need to access to 8x10
1260  * We use more than 8 lines to help with scrolling (text)
1261  * (and because XDeint8x8Frame use line 9)
1262  * XXX: smooth/uniform area with noise detection doesn't works well
1263  * but it's not really a problem because they don't have much details anyway
1264  */
1265 static inline int ssd( int a ) { return a*a; }
1266 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1267 {
1268     int y, x;
1269     int ff, fr;
1270     int fc;
1271
1272     /* Detect interlacing */
1273     fc = 0;
1274     for( y = 0; y < 7; y += 2 )
1275     {
1276         ff = fr = 0;
1277         for( x = 0; x < 8; x++ )
1278         {
1279             fr += ssd(src[      x] - src[1*i_src+x]) +
1280                   ssd(src[i_src+x] - src[2*i_src+x]);
1281             ff += ssd(src[      x] - src[2*i_src+x]) +
1282                   ssd(src[i_src+x] - src[3*i_src+x]);
1283         }
1284         if( ff < 6*fr/8 && fr > 32 )
1285             fc++;
1286
1287         src += 2*i_src;
1288     }
1289
1290     return fc < 1 ? false : true;
1291 }
1292 #ifdef CAN_COMPILE_MMXEXT
1293 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1294 {
1295
1296     int y, x;
1297     int32_t ff, fr;
1298     int fc;
1299
1300     /* Detect interlacing */
1301     fc = 0;
1302     pxor_r2r( mm7, mm7 );
1303     for( y = 0; y < 9; y += 2 )
1304     {
1305         ff = fr = 0;
1306         pxor_r2r( mm5, mm5 );
1307         pxor_r2r( mm6, mm6 );
1308         for( x = 0; x < 8; x+=4 )
1309         {
1310             movd_m2r( src[        x], mm0 );
1311             movd_m2r( src[1*i_src+x], mm1 );
1312             movd_m2r( src[2*i_src+x], mm2 );
1313             movd_m2r( src[3*i_src+x], mm3 );
1314
1315             punpcklbw_r2r( mm7, mm0 );
1316             punpcklbw_r2r( mm7, mm1 );
1317             punpcklbw_r2r( mm7, mm2 );
1318             punpcklbw_r2r( mm7, mm3 );
1319
1320             movq_r2r( mm0, mm4 );
1321
1322             psubw_r2r( mm1, mm0 );
1323             psubw_r2r( mm2, mm4 );
1324
1325             psubw_r2r( mm1, mm2 );
1326             psubw_r2r( mm1, mm3 );
1327
1328             pmaddwd_r2r( mm0, mm0 );
1329             pmaddwd_r2r( mm4, mm4 );
1330             pmaddwd_r2r( mm2, mm2 );
1331             pmaddwd_r2r( mm3, mm3 );
1332             paddd_r2r( mm0, mm2 );
1333             paddd_r2r( mm4, mm3 );
1334             paddd_r2r( mm2, mm5 );
1335             paddd_r2r( mm3, mm6 );
1336         }
1337
1338         movq_r2r( mm5, mm0 );
1339         psrlq_i2r( 32, mm0 );
1340         paddd_r2r( mm0, mm5 );
1341         movd_r2m( mm5, fr );
1342
1343         movq_r2r( mm6, mm0 );
1344         psrlq_i2r( 32, mm0 );
1345         paddd_r2r( mm0, mm6 );
1346         movd_r2m( mm6, ff );
1347
1348         if( ff < 6*fr/8 && fr > 32 )
1349             fc++;
1350
1351         src += 2*i_src;
1352     }
1353     return fc;
1354 }
1355 #endif
1356
1357 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1358                                     uint8_t *src1, int i_src1,
1359                                     uint8_t *src2, int i_src2 )
1360 {
1361     int y, x;
1362
1363     /* Progressive */
1364     for( y = 0; y < 8; y += 2 )
1365     {
1366         memcpy( dst, src1, 8 );
1367         dst  += i_dst;
1368
1369         for( x = 0; x < 8; x++ )
1370             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1371         dst += i_dst;
1372
1373         src1 += i_src1;
1374         src2 += i_src2;
1375     }
1376 }
1377
1378 #ifdef CAN_COMPILE_MMXEXT
1379 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1380                                          uint8_t *src1, int i_src1,
1381                                          uint8_t *src2, int i_src2 )
1382 {
1383     static const uint64_t m_4 = INT64_C(0x0004000400040004);
1384     int y, x;
1385
1386     /* Progressive */
1387     pxor_r2r( mm7, mm7 );
1388     for( y = 0; y < 8; y += 2 )
1389     {
1390         for( x = 0; x < 8; x +=4 )
1391         {
1392             movd_m2r( src1[x], mm0 );
1393             movd_r2m( mm0, dst[x] );
1394
1395             movd_m2r( src2[x], mm1 );
1396             movd_m2r( src1[i_src1+x], mm2 );
1397
1398             punpcklbw_r2r( mm7, mm0 );
1399             punpcklbw_r2r( mm7, mm1 );
1400             punpcklbw_r2r( mm7, mm2 );
1401             paddw_r2r( mm1, mm1 );
1402             movq_r2r( mm1, mm3 );
1403             paddw_r2r( mm3, mm3 );
1404             paddw_r2r( mm2, mm0 );
1405             paddw_r2r( mm3, mm1 );
1406             paddw_m2r( m_4, mm1 );
1407             paddw_r2r( mm1, mm0 );
1408             psraw_i2r( 3, mm0 );
1409             packuswb_r2r( mm7, mm0 );
1410             movd_r2m( mm0, dst[i_dst+x] );
1411         }
1412         dst += 2*i_dst;
1413         src1 += i_src1;
1414         src2 += i_src2;
1415     }
1416 }
1417
1418 #endif
1419
1420 /* For debug */
1421 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1422 {
1423     int y;
1424     for( y = 0; y < 8; y++ )
1425         memset( &dst[y*i_dst], v, 8 );
1426 }
1427
1428 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1429  * neighbour
1430  * (Use 8x9 pixels)
1431  * TODO: a better one for the inner part.
1432  */
1433 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1434                                      uint8_t *src, int i_src )
1435 {
1436     int y, x;
1437
1438     /* Interlaced */
1439     for( y = 0; y < 8; y += 2 )
1440     {
1441         memcpy( dst, src, 8 );
1442         dst += i_dst;
1443
1444         for( x = 0; x < 8; x++ )
1445             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1446         dst += 1*i_dst;
1447         src += 2*i_src;
1448     }
1449 }
1450 #ifdef CAN_COMPILE_MMXEXT
1451 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1452                                           uint8_t *src, int i_src )
1453 {
1454     int y;
1455
1456     /* Interlaced */
1457     for( y = 0; y < 8; y += 2 )
1458     {
1459         movq_m2r( src[0], mm0 );
1460         movq_r2m( mm0, dst[0] );
1461         dst += i_dst;
1462
1463         movq_m2r( src[2*i_src], mm1 );
1464         pavgb_r2r( mm1, mm0 );
1465
1466         movq_r2m( mm0, dst[0] );
1467
1468         dst += 1*i_dst;
1469         src += 2*i_src;
1470     }
1471 }
1472 #endif
1473
1474 /* XDeint8x8Field: Edge oriented interpolation
1475  * (Need -4 and +5 pixels H, +1 line)
1476  */
1477 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1478                                     uint8_t *src, int i_src )
1479 {
1480     int y, x;
1481
1482     /* Interlaced */
1483     for( y = 0; y < 8; y += 2 )
1484     {
1485         memcpy( dst, src, 8 );
1486         dst += i_dst;
1487
1488         for( x = 0; x < 8; x++ )
1489         {
1490             uint8_t *src2 = &src[2*i_src];
1491             /* I use 8 pixels just to match the MMX version, but it's overkill
1492              * 5 would be enough (less isn't good) */
1493             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1494                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1495                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1496                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1497
1498             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1499                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1500                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1501                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1502
1503             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1504                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1505                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1506                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1507
1508             if( c0 < c1 && c1 <= c2 )
1509                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1510             else if( c2 < c1 && c1 <= c0 )
1511                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1512             else
1513                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1514         }
1515
1516         dst += 1*i_dst;
1517         src += 2*i_src;
1518     }
1519 }
1520 #ifdef CAN_COMPILE_MMXEXT
1521 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1522                                          uint8_t *src, int i_src )
1523 {
1524     int y, x;
1525
1526     /* Interlaced */
1527     for( y = 0; y < 8; y += 2 )
1528     {
1529         memcpy( dst, src, 8 );
1530         dst += i_dst;
1531
1532         for( x = 0; x < 8; x++ )
1533         {
1534             uint8_t *src2 = &src[2*i_src];
1535             int32_t c0, c1, c2;
1536
1537             movq_m2r( src[x-2], mm0 );
1538             movq_m2r( src[x-3], mm1 );
1539             movq_m2r( src[x-4], mm2 );
1540
1541             psadbw_m2r( src2[x-4], mm0 );
1542             psadbw_m2r( src2[x-3], mm1 );
1543             psadbw_m2r( src2[x-2], mm2 );
1544
1545             movd_r2m( mm0, c2 );
1546             movd_r2m( mm1, c1 );
1547             movd_r2m( mm2, c0 );
1548
1549             if( c0 < c1 && c1 <= c2 )
1550                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1551             else if( c2 < c1 && c1 <= c0 )
1552                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1553             else
1554                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1555         }
1556
1557         dst += 1*i_dst;
1558         src += 2*i_src;
1559     }
1560 }
1561 #endif
1562
1563 /* NxN arbitray size (and then only use pixel in the NxN block)
1564  */
1565 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1566                                    int i_height, int i_width )
1567 {
1568     int y, x;
1569     int ff, fr;
1570     int fc;
1571
1572
1573     /* Detect interlacing */
1574     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1575     ff = fr = 0;
1576     fc = 0;
1577     for( y = 0; y < i_height - 2; y += 2 )
1578     {
1579         const uint8_t *s = &src[y*i_src];
1580         for( x = 0; x < i_width; x++ )
1581         {
1582             fr += ssd(s[      x] - s[1*i_src+x]);
1583             ff += ssd(s[      x] - s[2*i_src+x]);
1584         }
1585         if( ff < fr && fr > i_width / 2 )
1586             fc++;
1587     }
1588
1589     return fc < 2 ? false : true;
1590 }
1591
1592 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1593                                    uint8_t *src, int i_src,
1594                                    int i_width, int i_height )
1595 {
1596     int y, x;
1597
1598     /* Progressive */
1599     for( y = 0; y < i_height; y += 2 )
1600     {
1601         memcpy( dst, src, i_width );
1602         dst += i_dst;
1603
1604         if( y < i_height - 2 )
1605         {
1606             for( x = 0; x < i_width; x++ )
1607                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1608         }
1609         else
1610         {
1611             /* Blend last line */
1612             for( x = 0; x < i_width; x++ )
1613                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1614         }
1615         dst += 1*i_dst;
1616         src += 2*i_src;
1617     }
1618 }
1619
1620 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1621                                    uint8_t *src, int i_src,
1622                                    int i_width, int i_height )
1623 {
1624     int y, x;
1625
1626     /* Interlaced */
1627     for( y = 0; y < i_height; y += 2 )
1628     {
1629         memcpy( dst, src, i_width );
1630         dst += i_dst;
1631
1632         if( y < i_height - 2 )
1633         {
1634             for( x = 0; x < i_width; x++ )
1635                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1636         }
1637         else
1638         {
1639             /* Blend last line */
1640             for( x = 0; x < i_width; x++ )
1641                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1642         }
1643         dst += 1*i_dst;
1644         src += 2*i_src;
1645     }
1646 }
1647
1648 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1649                               int i_width, int i_height )
1650 {
1651     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1652         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1653     else
1654         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1655 }
1656
1657
1658 static inline int median( int a, int b, int c )
1659 {
1660     int min = a, max =a;
1661     if( b < min )
1662         min = b;
1663     else
1664         max = b;
1665
1666     if( c < min )
1667         min = c;
1668     else if( c > max )
1669         max = c;
1670
1671     return a + b + c - min - max;
1672 }
1673
1674
1675 /* XDeintBand8x8:
1676  */
1677 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1678                                    uint8_t *src, int i_src,
1679                                    const int i_mbx, int i_modx )
1680 {
1681     int x;
1682
1683     for( x = 0; x < i_mbx; x++ )
1684     {
1685         int s;
1686         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1687         {
1688             if( x == 0 || x == i_mbx - 1 )
1689                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1690             else
1691                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1692         }
1693         else
1694         {
1695             XDeint8x8MergeC( dst, i_dst,
1696                              &src[0*i_src], 2*i_src,
1697                              &src[1*i_src], 2*i_src );
1698         }
1699
1700         dst += 8;
1701         src += 8;
1702     }
1703
1704     if( i_modx )
1705         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1706 }
1707 #ifdef CAN_COMPILE_MMXEXT
1708 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1709                                         uint8_t *src, int i_src,
1710                                         const int i_mbx, int i_modx )
1711 {
1712     int x;
1713
1714     /* Reset current line */
1715     for( x = 0; x < i_mbx; x++ )
1716     {
1717         int s;
1718         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1719         {
1720             if( x == 0 || x == i_mbx - 1 )
1721                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1722             else
1723                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1724         }
1725         else
1726         {
1727             XDeint8x8MergeMMXEXT( dst, i_dst,
1728                                   &src[0*i_src], 2*i_src,
1729                                   &src[1*i_src], 2*i_src );
1730         }
1731
1732         dst += 8;
1733         src += 8;
1734     }
1735
1736     if( i_modx )
1737         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1738 }
1739 #endif
1740
1741 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1742 {
1743     int i_plane;
1744
1745     /* Copy image and skip lines */
1746     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1747     {
1748         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1749         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1750
1751         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1752         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1753
1754         const int i_dst = p_outpic->p[i_plane].i_pitch;
1755         const int i_src = p_pic->p[i_plane].i_pitch;
1756
1757         int y, x;
1758
1759         for( y = 0; y < i_mby; y++ )
1760         {
1761             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1762             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1763
1764 #ifdef CAN_COMPILE_MMXEXT
1765             if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1766                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1767             else
1768 #endif
1769                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1770         }
1771
1772         /* Last line (C only)*/
1773         if( i_mody )
1774         {
1775             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1776             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1777
1778             for( x = 0; x < i_mbx; x++ )
1779             {
1780                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1781
1782                 dst += 8;
1783                 src += 8;
1784             }
1785
1786             if( i_modx )
1787                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1788         }
1789     }
1790
1791 #ifdef CAN_COMPILE_MMXEXT
1792     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1793         emms();
1794 #endif
1795 }
1796
1797 /*****************************************************************************
1798  * Yadif (Yet Another DeInterlacing Filter).
1799  *****************************************************************************/
1800 /* */
1801 struct vf_priv_s {
1802     /*
1803      * 0: Output 1 frame for each frame.
1804      * 1: Output 1 frame for each field.
1805      * 2: Like 0 but skips spatial interlacing check.
1806      * 3: Like 1 but skips spatial interlacing check.
1807      *
1808      * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1809      */
1810     int mode;
1811 };
1812
1813 /* I am unsure it is the right one */
1814 typedef intptr_t x86_reg;
1815
1816 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1817 #define FFMAX(a,b)      __MAX(a,b)
1818 #define FFMAX3(a,b,c)   FFMAX(FFMAX(a,b),c)
1819 #define FFMIN(a,b)      __MIN(a,b)
1820 #define FFMIN3(a,b,c)   FFMIN(FFMIN(a,b),c)
1821
1822 /* yadif.h comes from vf_yadif.c of mplayer project */
1823 #include "yadif.h"
1824
1825 static void RenderYadif( vout_thread_t *p_vout, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1826 {
1827     vout_sys_t *p_sys = p_vout->p_sys;
1828
1829     /* */
1830     assert( i_order == 0 || i_order == 1 );
1831     assert( i_field == 0 || i_field == 1 );
1832
1833     if( i_order == 0 )
1834     {
1835         /* Duplicate the picture
1836          * TODO when the vout rework is finished, picture_Hold() might be enough
1837          * but becarefull, the pitches must match */
1838         picture_t *p_dup = picture_NewFromFormat( &p_src->format );
1839         if( p_dup )
1840             picture_Copy( p_dup, p_src );
1841
1842         /* Slide the history */
1843         if( p_sys->pp_history[0] )
1844             picture_Release( p_sys->pp_history[0]  );
1845         for( int i = 1; i < HISTORY_SIZE; i++ )
1846             p_sys->pp_history[i-1] = p_sys->pp_history[i];
1847         p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1848     }
1849
1850     /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1851     picture_t *p_prev = p_sys->pp_history[0];
1852     picture_t *p_cur  = p_sys->pp_history[1];
1853     picture_t *p_next = p_sys->pp_history[2];
1854
1855     /* Filter if we have all the pictures we need */
1856     if( p_prev && p_cur && p_next )
1857     {
1858         /* */
1859         void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1860 #if defined(HAVE_YADIF_SSE2)
1861         if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1862             filter = yadif_filter_line_mmx2;
1863         else
1864 #endif
1865             filter = yadif_filter_line_c;
1866
1867         for( int n = 0; n < p_dst->i_planes; n++ )
1868         {
1869             const plane_t *prevp = &p_prev->p[n];
1870             const plane_t *curp  = &p_cur->p[n];
1871             const plane_t *nextp = &p_next->p[n];
1872             plane_t *dstp        = &p_dst->p[n];
1873
1874             for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1875             {
1876                 if( (y % 2) == i_field )
1877                 {
1878                     vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1879                                 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1880                 }
1881                 else
1882                 {
1883                     struct vf_priv_s cfg;
1884                     /* Spatial checks only when enough data */
1885                     cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1886
1887                     assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1888                     filter( &cfg,
1889                             &dstp->p_pixels[y * dstp->i_pitch],
1890                             &prevp->p_pixels[y * prevp->i_pitch],
1891                             &curp->p_pixels[y * curp->i_pitch],
1892                             &nextp->p_pixels[y * nextp->i_pitch],
1893                             dstp->i_visible_pitch,
1894                             curp->i_pitch,
1895                             (i_field ^ (i_order == i_field)) & 1 );
1896                 }
1897
1898                 /* We duplicate the first and last lines */
1899                 if( y == 1 )
1900                     vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1901                 else if( y == dstp->i_visible_lines - 2 )
1902                     vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1903             }
1904         }
1905
1906         /* */
1907         p_dst->date = (p_next->date - p_cur->date) * i_order / 2 + p_cur->date;
1908     }
1909     else
1910     {
1911         /* Fallback to something simple
1912          * XXX it is wrong when we have 2 pictures, we should not output a picture */
1913         RenderX( p_dst, p_src );
1914     }
1915 }
1916
1917 /*****************************************************************************
1918  * FilterCallback: called when changing the deinterlace method on the fly.
1919  *****************************************************************************/
1920 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1921                            vlc_value_t oldval, vlc_value_t newval,
1922                            void *p_data )
1923 {
1924     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1925     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1926     vout_sys_t *p_sys = p_vout->p_sys;
1927
1928     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1929
1930     vlc_mutex_lock( &p_sys->filter_lock );
1931     const bool b_old_half_height = p_sys->b_half_height;
1932
1933     SetFilterMethod( p_vout, newval.psz_string );
1934
1935     if( !b_old_half_height == !p_sys->b_half_height )
1936     {
1937         vlc_mutex_unlock( &p_sys->filter_lock );
1938         return VLC_SUCCESS;
1939     }
1940
1941     /* We need to kill the old vout */
1942     if( p_sys->p_vout )
1943     {
1944         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
1945         vout_CloseAndRelease( p_sys->p_vout );
1946     }
1947
1948     /* Try to open a new video output */
1949     p_sys->p_vout = SpawnRealVout( p_vout );
1950
1951     if( p_sys->p_vout == NULL )
1952     {
1953         /* Everything failed */
1954         msg_Err( p_vout, "cannot open vout, aborting" );
1955
1956         vlc_mutex_unlock( &p_sys->filter_lock );
1957         return VLC_EGENERIC;
1958     }
1959
1960     vout_filter_AddChild( p_vout, p_sys->p_vout, MouseEvent );
1961
1962     vlc_mutex_unlock( &p_sys->filter_lock );
1963     return VLC_SUCCESS;
1964 }
1965
1966 /*****************************************************************************
1967  * video filter2 functions
1968  *****************************************************************************/
1969 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1970 {
1971     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1972     picture_t *p_pic_dst;
1973
1974     /* Request output picture */
1975     p_pic_dst = filter_NewPicture( p_filter );
1976     if( p_pic_dst == NULL )
1977     {
1978         picture_Release( p_pic );
1979         return NULL;
1980     }
1981
1982     switch( p_vout->p_sys->i_mode )
1983     {
1984         case DEINTERLACE_DISCARD:
1985             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1986             break;
1987
1988         case DEINTERLACE_BOB:
1989 #if 0
1990             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1991             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1992             break;
1993 #endif
1994
1995         case DEINTERLACE_LINEAR:
1996 #if 0
1997             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1998             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
1999 #endif
2000             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2001             picture_Release( p_pic_dst );
2002             picture_Release( p_pic );
2003             return NULL;
2004
2005         case DEINTERLACE_MEAN:
2006             RenderMean( p_vout, p_pic_dst, p_pic );
2007             break;
2008
2009         case DEINTERLACE_BLEND:
2010             RenderBlend( p_vout, p_pic_dst, p_pic );
2011             break;
2012
2013         case DEINTERLACE_X:
2014             RenderX( p_pic_dst, p_pic );
2015             break;
2016
2017         case DEINTERLACE_YADIF:
2018             msg_Err( p_vout, "delaying frames is not supported yet" );
2019             picture_Release( p_pic_dst );
2020             picture_Release( p_pic );
2021             return NULL;
2022
2023         case DEINTERLACE_YADIF2X:
2024             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2025             picture_Release( p_pic_dst );
2026             picture_Release( p_pic );
2027             return NULL;
2028     }
2029
2030     picture_CopyProperties( p_pic_dst, p_pic );
2031     p_pic_dst->b_progressive = true;
2032
2033     picture_Release( p_pic );
2034     return p_pic_dst;
2035 }
2036
2037 /*****************************************************************************
2038  * OpenFilter:
2039  *****************************************************************************/
2040 static int OpenFilter( vlc_object_t *p_this )
2041 {
2042     filter_t *p_filter = (filter_t*)p_this;
2043     vout_thread_t *p_vout;
2044     vlc_value_t val;
2045
2046     if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
2047         return VLC_EGENERIC;
2048
2049     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
2050      * by spu filters */
2051     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
2052     vlc_object_attach( p_vout, p_filter );
2053     p_filter->p_sys = (filter_sys_t *)p_vout;
2054     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
2055
2056     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2057                    p_filter->p_cfg );
2058     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
2059
2060     var_Create( p_filter, "filter-deinterlace-mode", VLC_VAR_STRING );
2061     var_Set( p_filter, "filter-deinterlace-mode", val );
2062     free( val.psz_string );
2063
2064     if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
2065     {
2066         vlc_object_release( p_vout );
2067         return VLC_EGENERIC;
2068     }
2069
2070     video_format_t fmt;
2071     GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
2072     if( !p_filter->b_allow_fmt_out_change &&
2073         ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
2074           fmt.i_height != p_filter->fmt_in.video.i_height ) )
2075     {
2076         CloseFilter( VLC_OBJECT(p_filter) );
2077         return VLC_EGENERIC;
2078     }
2079     p_filter->fmt_out.video = fmt;
2080     p_filter->fmt_out.i_codec = fmt.i_chroma;
2081     p_filter->pf_video_filter = Deinterlace;
2082
2083     msg_Dbg( p_filter, "deinterlacing" );
2084
2085     return VLC_SUCCESS;
2086 }
2087
2088 /*****************************************************************************
2089  * CloseFilter: clean up the filter
2090  *****************************************************************************/
2091 static void CloseFilter( vlc_object_t *p_this )
2092 {
2093     filter_t *p_filter = (filter_t*)p_this;
2094     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2095
2096     Destroy( VLC_OBJECT(p_vout) );
2097     vlc_object_release( p_vout );
2098 }
2099