]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c
Merge branch 1.0-bugfix
[vlc] / modules / video_filter / deinterlace.c
1 /*****************************************************************************
2  * deinterlace.c : deinterlacer plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001, 2002, 2003 the VideoLAN team
5  * $Id$
6  *
7  * Author: Sam Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <errno.h>
33
34 #ifdef HAVE_ALTIVEC_H
35 #   include <altivec.h>
36 #endif
37
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
40 #include <vlc_vout.h>
41 #include <vlc_sout.h>
42 #include "vlc_filter.h"
43
44 #ifdef CAN_COMPILE_MMXEXT
45 #   include "mmx.h"
46 #endif
47
48 #include "filter_common.h"
49
50 #define DEINTERLACE_DISCARD 1
51 #define DEINTERLACE_MEAN    2
52 #define DEINTERLACE_BLEND   3
53 #define DEINTERLACE_BOB     4
54 #define DEINTERLACE_LINEAR  5
55 #define DEINTERLACE_X       6
56
57 /*****************************************************************************
58  * Local protypes
59  *****************************************************************************/
60 static int  Create    ( vlc_object_t * );
61 static void Destroy   ( vlc_object_t * );
62
63 static int  Init      ( vout_thread_t * );
64 static void End       ( vout_thread_t * );
65 static void Render    ( vout_thread_t *, picture_t * );
66
67 static int  MouseEvent( vlc_object_t *p_this, char const *psz_var,
68                         vlc_value_t oldval, vlc_value_t newval, void *p_data );
69
70 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
71 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
72 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
73 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
74 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
75 static void RenderX      ( picture_t *, picture_t * );
76
77 static void MergeGeneric ( void *, const void *, const void *, size_t );
78 #if defined(CAN_COMPILE_C_ALTIVEC)
79 static void MergeAltivec ( void *, const void *, const void *, size_t );
80 #endif
81 #if defined(CAN_COMPILE_MMXEXT)
82 static void MergeMMXEXT  ( void *, const void *, const void *, size_t );
83 #endif
84 #if defined(CAN_COMPILE_3DNOW)
85 static void Merge3DNow   ( void *, const void *, const void *, size_t );
86 #endif
87 #if defined(CAN_COMPILE_SSE)
88 static void MergeSSE2    ( void *, const void *, const void *, size_t );
89 #endif
90 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
91 static void EndMMX       ( void );
92 #endif
93 #if defined(CAN_COMPILE_3DNOW)
94 static void End3DNow     ( void );
95 #endif
96
97 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method );
98 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
99
100 static int OpenFilter( vlc_object_t *p_this );
101 static void CloseFilter( vlc_object_t *p_this );
102
103 /*****************************************************************************
104  * Callback prototypes
105  *****************************************************************************/
106 static int FilterCallback( vlc_object_t *, char const *,
107                            vlc_value_t, vlc_value_t, void * );
108
109 /*****************************************************************************
110  * Module descriptor
111  *****************************************************************************/
112 #define MODE_TEXT N_("Deinterlace mode")
113 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
114
115 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
116 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
117
118 #define FILTER_CFG_PREFIX "sout-deinterlace-"
119
120 static const char *const mode_list[] = {
121     "discard", "blend", "mean", "bob", "linear", "x" };
122 static const char *const mode_list_text[] = {
123     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X" };
124
125 vlc_module_begin ()
126     set_description( N_("Deinterlacing video filter") )
127     set_shortname( N_("Deinterlace" ))
128     set_capability( "video filter", 0 )
129     set_category( CAT_VIDEO )
130     set_subcategory( SUBCAT_VIDEO_VFILTER )
131
132     set_section( N_("Display"),NULL)
133     add_string( "deinterlace-mode", "discard", NULL, MODE_TEXT,
134                 MODE_LONGTEXT, false )
135         change_string_list( mode_list, mode_list_text, 0 )
136         change_safe ()
137
138     add_shortcut( "deinterlace" )
139     set_callbacks( Create, Destroy )
140
141     add_submodule ()
142     set_capability( "video filter2", 0 )
143     set_section( N_("Streaming"),NULL)
144     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
145                 SOUT_MODE_LONGTEXT, false )
146         change_string_list( mode_list, mode_list_text, 0 )
147     add_shortcut( "deinterlace" )
148     set_callbacks( OpenFilter, CloseFilter )
149 vlc_module_end ()
150
151 static const char *const ppsz_filter_options[] = {
152     "mode", NULL
153 };
154
155 /*****************************************************************************
156  * vout_sys_t: Deinterlace video output method descriptor
157  *****************************************************************************
158  * This structure is part of the video output thread descriptor.
159  * It describes the Deinterlace specific properties of an output thread.
160  *****************************************************************************/
161 struct vout_sys_t
162 {
163     int        i_mode;        /* Deinterlace mode */
164     bool b_double_rate; /* Shall we double the framerate? */
165     bool b_half_height; /* Shall be devide the height by 2 */
166
167     mtime_t    last_date;
168     mtime_t    next_date;
169
170     vout_thread_t *p_vout;
171
172     vlc_mutex_t filter_lock;
173
174     void (*pf_merge) ( void *, const void *, const void *, size_t );
175     void (*pf_end_merge) ( void );
176 };
177
178 /*****************************************************************************
179  * Control: control facility for the vout (forwards to child vout)
180  *****************************************************************************/
181 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
182 {
183     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
184 }
185
186 /*****************************************************************************
187  * Create: allocates Deinterlace video thread output method
188  *****************************************************************************
189  * This function allocates and initializes a Deinterlace vout method.
190  *****************************************************************************/
191 static int Create( vlc_object_t *p_this )
192 {
193     vout_thread_t *p_vout = (vout_thread_t *)p_this;
194     vlc_value_t val;
195
196     /* Allocate structure */
197     p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
198     if( p_vout->p_sys == NULL )
199         return VLC_ENOMEM;
200
201     p_vout->pf_init = Init;
202     p_vout->pf_end = End;
203     p_vout->pf_manage = NULL;
204     p_vout->pf_render = Render;
205     p_vout->pf_display = NULL;
206     p_vout->pf_control = Control;
207
208     p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
209     p_vout->p_sys->b_double_rate = false;
210     p_vout->p_sys->b_half_height = true;
211     p_vout->p_sys->last_date = 0;
212     p_vout->p_sys->p_vout = 0;
213     vlc_mutex_init( &p_vout->p_sys->filter_lock );
214
215 #if defined(CAN_COMPILE_C_ALTIVEC)
216     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
217     {
218         p_vout->p_sys->pf_merge = MergeAltivec;
219         p_vout->p_sys->pf_end_merge = NULL;
220     }
221     else
222 #endif
223 #if defined(CAN_COMPILE_SSE)
224     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
225     {
226         p_vout->p_sys->pf_merge = MergeSSE2;
227         p_vout->p_sys->pf_end_merge = EndMMX;
228     }
229     else
230 #endif
231 #if defined(CAN_COMPILE_MMXEXT)
232     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
233     {
234         p_vout->p_sys->pf_merge = MergeMMXEXT;
235         p_vout->p_sys->pf_end_merge = EndMMX;
236     }
237     else
238 #endif
239 #if defined(CAN_COMPILE_3DNOW)
240     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
241     {
242         p_vout->p_sys->pf_merge = Merge3DNow;
243         p_vout->p_sys->pf_end_merge = End3DNow;
244     }
245     else
246 #endif
247     {
248         p_vout->p_sys->pf_merge = MergeGeneric;
249         p_vout->p_sys->pf_end_merge = NULL;
250     }
251
252     /* Look what method was requested */
253     var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING );
254     var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL );
255
256     if( val.psz_string == NULL )
257     {
258         msg_Err( p_vout, "configuration variable deinterlace-mode empty" );
259         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
260
261         val.psz_string = strdup( "discard" );
262     }
263
264     msg_Dbg( p_vout, "using %s deinterlace mode", val.psz_string );
265
266     SetFilterMethod( p_vout, val.psz_string );
267
268     free( val.psz_string );
269
270     return VLC_SUCCESS;
271 }
272
273 /*****************************************************************************
274  * SetFilterMethod: setup the deinterlace method to use.
275  *****************************************************************************/
276 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
277 {
278     if( !strcmp( psz_method, "mean" ) )
279     {
280         p_vout->p_sys->i_mode = DEINTERLACE_MEAN;
281         p_vout->p_sys->b_double_rate = false;
282         p_vout->p_sys->b_half_height = true;
283     }
284     else if( !strcmp( psz_method, "blend" )
285              || !strcmp( psz_method, "average" )
286              || !strcmp( psz_method, "combine-fields" ) )
287     {
288         p_vout->p_sys->i_mode = DEINTERLACE_BLEND;
289         p_vout->p_sys->b_double_rate = false;
290         p_vout->p_sys->b_half_height = false;
291     }
292     else if( !strcmp( psz_method, "bob" )
293              || !strcmp( psz_method, "progressive-scan" ) )
294     {
295         p_vout->p_sys->i_mode = DEINTERLACE_BOB;
296         p_vout->p_sys->b_double_rate = true;
297         p_vout->p_sys->b_half_height = false;
298     }
299     else if( !strcmp( psz_method, "linear" ) )
300     {
301         p_vout->p_sys->i_mode = DEINTERLACE_LINEAR;
302         p_vout->p_sys->b_double_rate = true;
303         p_vout->p_sys->b_half_height = false;
304     }
305     else if( !strcmp( psz_method, "x" ) )
306     {
307         p_vout->p_sys->i_mode = DEINTERLACE_X;
308         p_vout->p_sys->b_double_rate = false;
309         p_vout->p_sys->b_half_height = false;
310     }
311     else
312     {
313         const bool b_i422 = p_vout->render.i_chroma == VLC_CODEC_I422;
314         if( strcmp( psz_method, "discard" ) )
315             msg_Err( p_vout, "no valid deinterlace mode provided, "
316                      "using \"discard\"" );
317
318         p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
319         p_vout->p_sys->b_double_rate = false;
320         p_vout->p_sys->b_half_height = !b_i422;
321     }
322
323     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
324 }
325
326 static void GetOutputFormat( vout_thread_t *p_vout,
327                              video_format_t *p_dst, const video_format_t *p_src )
328 {
329     *p_dst = *p_src;
330
331     if( p_vout->p_sys->b_half_height )
332     {
333         p_dst->i_height /= 2;
334         p_dst->i_visible_height /= 2;
335         p_dst->i_y_offset /= 2;
336         p_dst->i_sar_den *= 2;
337     }
338
339     if( p_src->i_chroma == VLC_CODEC_I422 )
340     {
341         switch( p_vout->p_sys->i_mode )
342         {
343         case DEINTERLACE_MEAN:
344         case DEINTERLACE_LINEAR:
345         case DEINTERLACE_X:
346             p_dst->i_chroma = VLC_CODEC_I422;
347             break;
348         default:
349             p_dst->i_chroma = VLC_CODEC_I420;
350             break;
351         }
352     }
353 }
354
355 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
356 {
357     return i_chroma == VLC_CODEC_I420 ||
358            i_chroma == VLC_CODEC_YV12 ||
359            i_chroma == VLC_CODEC_I422;
360 }
361
362 /*****************************************************************************
363  * Init: initialize Deinterlace video thread output method
364  *****************************************************************************/
365 static int Init( vout_thread_t *p_vout )
366 {
367     I_OUTPUTPICTURES = 0;
368
369     if( !IsChromaSupported( p_vout->render.i_chroma ) )
370         return VLC_EGENERIC; /* unknown chroma */
371
372     /* Initialize the output structure, full of directbuffers since we want
373      * the decoder to output directly to our structures. */
374     p_vout->output.i_chroma = p_vout->render.i_chroma;
375     p_vout->output.i_width  = p_vout->render.i_width;
376     p_vout->output.i_height = p_vout->render.i_height;
377     p_vout->output.i_aspect = p_vout->render.i_aspect;
378     p_vout->fmt_out = p_vout->fmt_in;
379
380     /* Try to open the real video output */
381     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
382
383     if( p_vout->p_sys->p_vout == NULL )
384     {
385         /* Everything failed */
386         msg_Err( p_vout, "cannot open vout, aborting" );
387
388         return VLC_EGENERIC;
389     }
390
391     var_AddCallback( p_vout, "deinterlace-mode", FilterCallback, NULL );
392
393     vout_filter_AllocateDirectBuffers( p_vout, VOUT_MAX_PICTURES );
394
395     vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
396
397     return VLC_SUCCESS;
398 }
399
400 /*****************************************************************************
401  * SpawnRealVout: spawn the real video output.
402  *****************************************************************************/
403 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
404 {
405     msg_Dbg( p_vout, "spawning the real video output" );
406
407     video_format_t fmt;
408     GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
409
410     return vout_Create( p_vout, &fmt );
411 }
412
413 /*****************************************************************************
414  * End: terminate Deinterlace video thread output method
415  *****************************************************************************/
416 static void End( vout_thread_t *p_vout )
417 {
418     vout_sys_t *p_sys = p_vout->p_sys;
419
420     if( p_sys->p_vout )
421     {
422         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
423         vout_CloseAndRelease( p_sys->p_vout );
424     }
425
426     vout_filter_ReleaseDirectBuffers( p_vout );
427 }
428
429 /*****************************************************************************
430  * Destroy: destroy Deinterlace video thread output method
431  *****************************************************************************
432  * Terminate an output method created by DeinterlaceCreateOutputMethod
433  *****************************************************************************/
434 static void Destroy( vlc_object_t *p_this )
435 {
436     vout_thread_t *p_vout = (vout_thread_t *)p_this;
437     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
438     free( p_vout->p_sys );
439 }
440
441 /**
442  * Forward mouse event with proper conversion.
443  */
444 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
445                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
446 {
447     vout_thread_t *p_vout = p_data;
448     VLC_UNUSED(p_this); VLC_UNUSED(oldval);
449
450     if( !strcmp( psz_var, "mouse-y" ) && p_vout->p_sys->b_half_height )
451         newval.i_int *= 2;
452
453     return var_Set( p_vout, psz_var, newval );
454 }
455
456 /*****************************************************************************
457  * Render: displays previously rendered output
458  *****************************************************************************
459  * This function send the currently rendered image to Deinterlace image,
460  * waits until it is displayed and switch the two rendering buffers, preparing
461  * next frame.
462  *****************************************************************************/
463 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
464 {
465     vout_sys_t *p_sys = p_vout->p_sys;
466     picture_t *pp_outpic[2];
467
468     /* FIXME are they needed ? */
469     p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
470     p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
471     p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
472     p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
473
474     /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
475     p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
476     p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
477     p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
478     p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
479     if( p_vout->p_sys->b_half_height )
480     {
481         p_sys->p_vout->fmt_in.i_y_offset /= 2;
482         p_sys->p_vout->fmt_in.i_visible_height /= 2;
483     }
484
485     if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
486     {
487         p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
488
489         p_vout->fmt_out.i_aspect = p_vout->fmt_in.i_aspect;
490         p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
491         p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
492
493         video_format_t fmt = p_vout->fmt_out;
494         if( p_vout->p_sys->b_half_height )
495         {
496             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
497             fmt.i_sar_den *= 2;
498         }
499
500         p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
501     }
502     if( !p_sys->p_vout )
503         return;
504
505     pp_outpic[0] = pp_outpic[1] = NULL;
506
507     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
508
509     /* Get a new picture */
510     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
511                                                 0, 0, 0 ) )
512               == NULL )
513     {
514         if( !vlc_object_alive( p_vout ) || p_vout->b_error )
515         {
516             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
517             return;
518         }
519         msleep( VOUT_OUTMEM_SLEEP );
520     }
521
522     pp_outpic[0]->date = p_pic->date;
523
524     /* If we are using double rate, get an additional new picture */
525     if( p_vout->p_sys->b_double_rate )
526     {
527         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
528                                                  0, 0, 0 ) )
529                   == NULL )
530         {
531             if( !vlc_object_alive( p_vout ) || p_vout->b_error )
532             {
533                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
534                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
535                 return;
536             }
537             msleep( VOUT_OUTMEM_SLEEP );
538         }
539
540         /* 20ms is a bit arbitrary, but it's only for the first image we get */
541         if( !p_vout->p_sys->last_date )
542             pp_outpic[1]->date = p_pic->date + 20000;
543         else
544             pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
545         p_vout->p_sys->last_date = p_pic->date;
546     }
547
548     switch( p_vout->p_sys->i_mode )
549     {
550         case DEINTERLACE_DISCARD:
551             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
552             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
553             break;
554
555         case DEINTERLACE_BOB:
556             RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
557             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
558             RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
559             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
560             break;
561
562         case DEINTERLACE_LINEAR:
563             RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
564             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
565             RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
566             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
567             break;
568
569         case DEINTERLACE_MEAN:
570             RenderMean( p_vout, pp_outpic[0], p_pic );
571             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
572             break;
573
574         case DEINTERLACE_BLEND:
575             RenderBlend( p_vout, pp_outpic[0], p_pic );
576             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
577             break;
578
579         case DEINTERLACE_X:
580             RenderX( pp_outpic[0], p_pic );
581             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
582             break;
583     }
584     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
585 }
586
587 /*****************************************************************************
588  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
589  *****************************************************************************/
590 static void RenderDiscard( vout_thread_t *p_vout,
591                            picture_t *p_outpic, picture_t *p_pic, int i_field )
592 {
593     int i_plane;
594
595     /* Copy image and skip lines */
596     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
597     {
598         uint8_t *p_in, *p_out_end, *p_out;
599         int i_increment;
600
601         p_in = p_pic->p[i_plane].p_pixels
602                    + i_field * p_pic->p[i_plane].i_pitch;
603
604         p_out = p_outpic->p[i_plane].p_pixels;
605         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
606                              * p_outpic->p[i_plane].i_visible_lines;
607
608         switch( p_vout->render.i_chroma )
609         {
610         case VLC_CODEC_I420:
611         case VLC_CODEC_YV12:
612
613             for( ; p_out < p_out_end ; )
614             {
615                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
616
617                 p_out += p_outpic->p[i_plane].i_pitch;
618                 p_in += 2 * p_pic->p[i_plane].i_pitch;
619             }
620             break;
621
622         case VLC_CODEC_I422:
623
624             i_increment = 2 * p_pic->p[i_plane].i_pitch;
625
626             if( i_plane == Y_PLANE )
627             {
628                 for( ; p_out < p_out_end ; )
629                 {
630                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
631                     p_out += p_outpic->p[i_plane].i_pitch;
632                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
633                     p_out += p_outpic->p[i_plane].i_pitch;
634                     p_in += i_increment;
635                 }
636             }
637             else
638             {
639                 for( ; p_out < p_out_end ; )
640                 {
641                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
642                     p_out += p_outpic->p[i_plane].i_pitch;
643                     p_in += i_increment;
644                 }
645             }
646             break;
647
648         default:
649             break;
650         }
651     }
652 }
653
654 /*****************************************************************************
655  * RenderBob: renders a BOB picture - simple copy
656  *****************************************************************************/
657 static void RenderBob( vout_thread_t *p_vout,
658                        picture_t *p_outpic, picture_t *p_pic, int i_field )
659 {
660     int i_plane;
661
662     /* Copy image and skip lines */
663     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
664     {
665         uint8_t *p_in, *p_out_end, *p_out;
666
667         p_in = p_pic->p[i_plane].p_pixels;
668         p_out = p_outpic->p[i_plane].p_pixels;
669         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
670                              * p_outpic->p[i_plane].i_visible_lines;
671
672         switch( p_vout->render.i_chroma )
673         {
674             case VLC_CODEC_I420:
675             case VLC_CODEC_YV12:
676                 /* For BOTTOM field we need to add the first line */
677                 if( i_field == 1 )
678                 {
679                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
680                     p_in += p_pic->p[i_plane].i_pitch;
681                     p_out += p_outpic->p[i_plane].i_pitch;
682                 }
683
684                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
685
686                 for( ; p_out < p_out_end ; )
687                 {
688                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
689
690                     p_out += p_outpic->p[i_plane].i_pitch;
691
692                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
693
694                     p_in += 2 * p_pic->p[i_plane].i_pitch;
695                     p_out += p_outpic->p[i_plane].i_pitch;
696                 }
697
698                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
699
700                 /* For TOP field we need to add the last line */
701                 if( i_field == 0 )
702                 {
703                     p_in += p_pic->p[i_plane].i_pitch;
704                     p_out += p_outpic->p[i_plane].i_pitch;
705                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
706                 }
707                 break;
708
709             case VLC_CODEC_I422:
710                 /* For BOTTOM field we need to add the first line */
711                 if( i_field == 1 )
712                 {
713                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
714                     p_in += p_pic->p[i_plane].i_pitch;
715                     p_out += p_outpic->p[i_plane].i_pitch;
716                 }
717
718                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
719
720                 if( i_plane == Y_PLANE )
721                 {
722                     for( ; p_out < p_out_end ; )
723                     {
724                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
725
726                         p_out += p_outpic->p[i_plane].i_pitch;
727
728                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
729
730                         p_in += 2 * p_pic->p[i_plane].i_pitch;
731                         p_out += p_outpic->p[i_plane].i_pitch;
732                     }
733                 }
734                 else
735                 {
736                     for( ; p_out < p_out_end ; )
737                     {
738                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
739
740                         p_out += p_outpic->p[i_plane].i_pitch;
741                         p_in += 2 * p_pic->p[i_plane].i_pitch;
742                     }
743                 }
744
745                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
746
747                 /* For TOP field we need to add the last line */
748                 if( i_field == 0 )
749                 {
750                     p_in += p_pic->p[i_plane].i_pitch;
751                     p_out += p_outpic->p[i_plane].i_pitch;
752                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
753                 }
754                 break;
755         }
756     }
757 }
758
759 #define Merge p_vout->p_sys->pf_merge
760 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
761
762 /*****************************************************************************
763  * RenderLinear: BOB with linear interpolation
764  *****************************************************************************/
765 static void RenderLinear( vout_thread_t *p_vout,
766                           picture_t *p_outpic, picture_t *p_pic, int i_field )
767 {
768     int i_plane;
769
770     /* Copy image and skip lines */
771     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
772     {
773         uint8_t *p_in, *p_out_end, *p_out;
774
775         p_in = p_pic->p[i_plane].p_pixels;
776         p_out = p_outpic->p[i_plane].p_pixels;
777         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
778                              * p_outpic->p[i_plane].i_visible_lines;
779
780         /* For BOTTOM field we need to add the first line */
781         if( i_field == 1 )
782         {
783             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
784             p_in += p_pic->p[i_plane].i_pitch;
785             p_out += p_outpic->p[i_plane].i_pitch;
786         }
787
788         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
789
790         for( ; p_out < p_out_end ; )
791         {
792             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
793
794             p_out += p_outpic->p[i_plane].i_pitch;
795
796             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
797                    p_pic->p[i_plane].i_pitch );
798
799             p_in += 2 * p_pic->p[i_plane].i_pitch;
800             p_out += p_outpic->p[i_plane].i_pitch;
801         }
802
803         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
804
805         /* For TOP field we need to add the last line */
806         if( i_field == 0 )
807         {
808             p_in += p_pic->p[i_plane].i_pitch;
809             p_out += p_outpic->p[i_plane].i_pitch;
810             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
811         }
812     }
813     EndMerge();
814 }
815
816 static void RenderMean( vout_thread_t *p_vout,
817                         picture_t *p_outpic, picture_t *p_pic )
818 {
819     int i_plane;
820
821     /* Copy image and skip lines */
822     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
823     {
824         uint8_t *p_in, *p_out_end, *p_out;
825
826         p_in = p_pic->p[i_plane].p_pixels;
827
828         p_out = p_outpic->p[i_plane].p_pixels;
829         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
830                              * p_outpic->p[i_plane].i_visible_lines;
831
832         /* All lines: mean value */
833         for( ; p_out < p_out_end ; )
834         {
835             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
836                    p_pic->p[i_plane].i_pitch );
837
838             p_out += p_outpic->p[i_plane].i_pitch;
839             p_in += 2 * p_pic->p[i_plane].i_pitch;
840         }
841     }
842     EndMerge();
843 }
844
845 static void RenderBlend( vout_thread_t *p_vout,
846                          picture_t *p_outpic, picture_t *p_pic )
847 {
848     int i_plane;
849
850     /* Copy image and skip lines */
851     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
852     {
853         uint8_t *p_in, *p_out_end, *p_out;
854
855         p_in = p_pic->p[i_plane].p_pixels;
856
857         p_out = p_outpic->p[i_plane].p_pixels;
858         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
859                              * p_outpic->p[i_plane].i_visible_lines;
860
861         switch( p_vout->render.i_chroma )
862         {
863             case VLC_CODEC_I420:
864             case VLC_CODEC_YV12:
865                 /* First line: simple copy */
866                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
867                 p_out += p_outpic->p[i_plane].i_pitch;
868
869                 /* Remaining lines: mean value */
870                 for( ; p_out < p_out_end ; )
871                 {
872                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
873                            p_pic->p[i_plane].i_pitch );
874
875                     p_out += p_outpic->p[i_plane].i_pitch;
876                     p_in += p_pic->p[i_plane].i_pitch;
877                 }
878                 break;
879
880             case VLC_CODEC_I422:
881                 /* First line: simple copy */
882                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
883                 p_out += p_outpic->p[i_plane].i_pitch;
884
885                 /* Remaining lines: mean value */
886                 if( i_plane == Y_PLANE )
887                 {
888                     for( ; p_out < p_out_end ; )
889                     {
890                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
891                                p_pic->p[i_plane].i_pitch );
892
893                         p_out += p_outpic->p[i_plane].i_pitch;
894                         p_in += p_pic->p[i_plane].i_pitch;
895                     }
896                 }
897
898                 else
899                 {
900                     for( ; p_out < p_out_end ; )
901                     {
902                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
903                                p_pic->p[i_plane].i_pitch );
904
905                         p_out += p_outpic->p[i_plane].i_pitch;
906                         p_in += 2*p_pic->p[i_plane].i_pitch;
907                     }
908                 }
909                 break;
910         }
911     }
912     EndMerge();
913 }
914
915 #undef Merge
916
917 static void MergeGeneric( void *_p_dest, const void *_p_s1,
918                           const void *_p_s2, size_t i_bytes )
919 {
920     uint8_t* p_dest = (uint8_t*)_p_dest;
921     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
922     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
923     uint8_t* p_end = p_dest + i_bytes - 8;
924
925     while( p_dest < p_end )
926     {
927         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
928         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
929         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
930         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
931         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
932         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
933         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
934         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
935     }
936
937     p_end += 8;
938
939     while( p_dest < p_end )
940     {
941         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
942     }
943 }
944
945 #if defined(CAN_COMPILE_MMXEXT)
946 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
947                          size_t i_bytes )
948 {
949     uint8_t* p_dest = (uint8_t*)_p_dest;
950     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
951     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
952     uint8_t* p_end = p_dest + i_bytes - 8;
953     while( p_dest < p_end )
954     {
955         __asm__  __volatile__( "movq %2,%%mm1;"
956                                "pavgb %1, %%mm1;"
957                                "movq %%mm1, %0" :"=m" (*p_dest):
958                                                  "m" (*p_s1),
959                                                  "m" (*p_s2) );
960         p_dest += 8;
961         p_s1 += 8;
962         p_s2 += 8;
963     }
964
965     p_end += 8;
966
967     while( p_dest < p_end )
968     {
969         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
970     }
971 }
972 #endif
973
974 #if defined(CAN_COMPILE_3DNOW)
975 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
976                         size_t i_bytes )
977 {
978     uint8_t* p_dest = (uint8_t*)_p_dest;
979     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
980     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
981     uint8_t* p_end = p_dest + i_bytes - 8;
982     while( p_dest < p_end )
983     {
984         __asm__  __volatile__( "movq %2,%%mm1;"
985                                "pavgusb %1, %%mm1;"
986                                "movq %%mm1, %0" :"=m" (*p_dest):
987                                                  "m" (*p_s1),
988                                                  "m" (*p_s2) );
989         p_dest += 8;
990         p_s1 += 8;
991         p_s2 += 8;
992     }
993
994     p_end += 8;
995
996     while( p_dest < p_end )
997     {
998         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
999     }
1000 }
1001 #endif
1002
1003 #if defined(CAN_COMPILE_SSE)
1004 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1005                        size_t i_bytes )
1006 {
1007     uint8_t* p_dest = (uint8_t*)_p_dest;
1008     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1009     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1010     uint8_t* p_end;
1011     while( (uintptr_t)p_s1 % 16 )
1012     {
1013         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1014     }
1015     p_end = p_dest + i_bytes - 16;
1016     while( p_dest < p_end )
1017     {
1018         __asm__  __volatile__( "movdqu %2,%%xmm1;"
1019                                "pavgb %1, %%xmm1;"
1020                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
1021                                                  "m" (*p_s1),
1022                                                  "m" (*p_s2) );
1023         p_dest += 16;
1024         p_s1 += 16;
1025         p_s2 += 16;
1026     }
1027
1028     p_end += 16;
1029
1030     while( p_dest < p_end )
1031     {
1032         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1033     }
1034 }
1035 #endif
1036
1037 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1038 static void EndMMX( void )
1039 {
1040     __asm__ __volatile__( "emms" :: );
1041 }
1042 #endif
1043
1044 #if defined(CAN_COMPILE_3DNOW)
1045 static void End3DNow( void )
1046 {
1047     __asm__ __volatile__( "femms" :: );
1048 }
1049 #endif
1050
1051 #ifdef CAN_COMPILE_C_ALTIVEC
1052 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1053                           const void *_p_s2, size_t i_bytes )
1054 {
1055     uint8_t *p_dest = (uint8_t *)_p_dest;
1056     uint8_t *p_s1   = (uint8_t *)_p_s1;
1057     uint8_t *p_s2   = (uint8_t *)_p_s2;
1058     uint8_t *p_end  = p_dest + i_bytes - 15;
1059
1060     /* Use C until the first 16-bytes aligned destination pixel */
1061     while( (int)p_dest & 0xF )
1062     {
1063         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1064     }
1065
1066     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1067     {
1068         /* Unaligned source */
1069         vector unsigned char s1v, s2v, destv;
1070         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1071         vector unsigned char perm1v, perm2v;
1072
1073         perm1v = vec_lvsl( 0, p_s1 );
1074         perm2v = vec_lvsl( 0, p_s2 );
1075         s1oldv = vec_ld( 0, p_s1 );
1076         s2oldv = vec_ld( 0, p_s2 );
1077
1078         while( p_dest < p_end )
1079         {
1080             s1newv = vec_ld( 16, p_s1 );
1081             s2newv = vec_ld( 16, p_s2 );
1082             s1v    = vec_perm( s1oldv, s1newv, perm1v );
1083             s2v    = vec_perm( s2oldv, s2newv, perm2v );
1084             s1oldv = s1newv;
1085             s2oldv = s2newv;
1086             destv  = vec_avg( s1v, s2v );
1087             vec_st( destv, 0, p_dest );
1088
1089             p_s1   += 16;
1090             p_s2   += 16;
1091             p_dest += 16;
1092         }
1093     }
1094     else
1095     {
1096         /* Aligned source */
1097         vector unsigned char s1v, s2v, destv;
1098
1099         while( p_dest < p_end )
1100         {
1101             s1v   = vec_ld( 0, p_s1 );
1102             s2v   = vec_ld( 0, p_s2 );
1103             destv = vec_avg( s1v, s2v );
1104             vec_st( destv, 0, p_dest );
1105
1106             p_s1   += 16;
1107             p_s2   += 16;
1108             p_dest += 16;
1109         }
1110     }
1111
1112     p_end += 15;
1113
1114     while( p_dest < p_end )
1115     {
1116         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1117     }
1118 }
1119 #endif
1120
1121 /*****************************************************************************
1122  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1123  * and apply a process to recreate the bottom field :
1124  *  If a 8x8 block is classified as :
1125  *   - progressive: it applies a small blend (1,6,1)
1126  *   - interlaced:
1127  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1128  *    good match we use MC to recreate the bottom field (with a small
1129  *    blend (1,6,1) )
1130  *    * otherwise: it recreates the bottom field by an edge oriented
1131  *    interpolation.
1132   *****************************************************************************/
1133
1134 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1135  * XXX: It need to access to 8x10
1136  * We use more than 8 lines to help with scrolling (text)
1137  * (and because XDeint8x8Frame use line 9)
1138  * XXX: smooth/uniform area with noise detection doesn't works well
1139  * but it's not really a problem because they don't have much details anyway
1140  */
1141 static inline int ssd( int a ) { return a*a; }
1142 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1143 {
1144     int y, x;
1145     int ff, fr;
1146     int fc;
1147
1148     /* Detect interlacing */
1149     fc = 0;
1150     for( y = 0; y < 7; y += 2 )
1151     {
1152         ff = fr = 0;
1153         for( x = 0; x < 8; x++ )
1154         {
1155             fr += ssd(src[      x] - src[1*i_src+x]) +
1156                   ssd(src[i_src+x] - src[2*i_src+x]);
1157             ff += ssd(src[      x] - src[2*i_src+x]) +
1158                   ssd(src[i_src+x] - src[3*i_src+x]);
1159         }
1160         if( ff < 6*fr/8 && fr > 32 )
1161             fc++;
1162
1163         src += 2*i_src;
1164     }
1165
1166     return fc < 1 ? false : true;
1167 }
1168 #ifdef CAN_COMPILE_MMXEXT
1169 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1170 {
1171
1172     int y, x;
1173     int32_t ff, fr;
1174     int fc;
1175
1176     /* Detect interlacing */
1177     fc = 0;
1178     pxor_r2r( mm7, mm7 );
1179     for( y = 0; y < 9; y += 2 )
1180     {
1181         ff = fr = 0;
1182         pxor_r2r( mm5, mm5 );
1183         pxor_r2r( mm6, mm6 );
1184         for( x = 0; x < 8; x+=4 )
1185         {
1186             movd_m2r( src[        x], mm0 );
1187             movd_m2r( src[1*i_src+x], mm1 );
1188             movd_m2r( src[2*i_src+x], mm2 );
1189             movd_m2r( src[3*i_src+x], mm3 );
1190
1191             punpcklbw_r2r( mm7, mm0 );
1192             punpcklbw_r2r( mm7, mm1 );
1193             punpcklbw_r2r( mm7, mm2 );
1194             punpcklbw_r2r( mm7, mm3 );
1195
1196             movq_r2r( mm0, mm4 );
1197
1198             psubw_r2r( mm1, mm0 );
1199             psubw_r2r( mm2, mm4 );
1200
1201             psubw_r2r( mm1, mm2 );
1202             psubw_r2r( mm1, mm3 );
1203
1204             pmaddwd_r2r( mm0, mm0 );
1205             pmaddwd_r2r( mm4, mm4 );
1206             pmaddwd_r2r( mm2, mm2 );
1207             pmaddwd_r2r( mm3, mm3 );
1208             paddd_r2r( mm0, mm2 );
1209             paddd_r2r( mm4, mm3 );
1210             paddd_r2r( mm2, mm5 );
1211             paddd_r2r( mm3, mm6 );
1212         }
1213
1214         movq_r2r( mm5, mm0 );
1215         psrlq_i2r( 32, mm0 );
1216         paddd_r2r( mm0, mm5 );
1217         movd_r2m( mm5, fr );
1218
1219         movq_r2r( mm6, mm0 );
1220         psrlq_i2r( 32, mm0 );
1221         paddd_r2r( mm0, mm6 );
1222         movd_r2m( mm6, ff );
1223
1224         if( ff < 6*fr/8 && fr > 32 )
1225             fc++;
1226
1227         src += 2*i_src;
1228     }
1229     return fc;
1230 }
1231 #endif
1232
1233 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1234                                     uint8_t *src1, int i_src1,
1235                                     uint8_t *src2, int i_src2 )
1236 {
1237     int y, x;
1238
1239     /* Progressive */
1240     for( y = 0; y < 8; y += 2 )
1241     {
1242         memcpy( dst, src1, 8 );
1243         dst  += i_dst;
1244
1245         for( x = 0; x < 8; x++ )
1246             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1247         dst += i_dst;
1248
1249         src1 += i_src1;
1250         src2 += i_src2;
1251     }
1252 }
1253
1254 #ifdef CAN_COMPILE_MMXEXT
1255 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1256                                          uint8_t *src1, int i_src1,
1257                                          uint8_t *src2, int i_src2 )
1258 {
1259     static const uint64_t m_4 = INT64_C(0x0004000400040004);
1260     int y, x;
1261
1262     /* Progressive */
1263     pxor_r2r( mm7, mm7 );
1264     for( y = 0; y < 8; y += 2 )
1265     {
1266         for( x = 0; x < 8; x +=4 )
1267         {
1268             movd_m2r( src1[x], mm0 );
1269             movd_r2m( mm0, dst[x] );
1270
1271             movd_m2r( src2[x], mm1 );
1272             movd_m2r( src1[i_src1+x], mm2 );
1273
1274             punpcklbw_r2r( mm7, mm0 );
1275             punpcklbw_r2r( mm7, mm1 );
1276             punpcklbw_r2r( mm7, mm2 );
1277             paddw_r2r( mm1, mm1 );
1278             movq_r2r( mm1, mm3 );
1279             paddw_r2r( mm3, mm3 );
1280             paddw_r2r( mm2, mm0 );
1281             paddw_r2r( mm3, mm1 );
1282             paddw_m2r( m_4, mm1 );
1283             paddw_r2r( mm1, mm0 );
1284             psraw_i2r( 3, mm0 );
1285             packuswb_r2r( mm7, mm0 );
1286             movd_r2m( mm0, dst[i_dst+x] );
1287         }
1288         dst += 2*i_dst;
1289         src1 += i_src1;
1290         src2 += i_src2;
1291     }
1292 }
1293
1294 #endif
1295
1296 /* For debug */
1297 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1298 {
1299     int y;
1300     for( y = 0; y < 8; y++ )
1301         memset( &dst[y*i_dst], v, 8 );
1302 }
1303
1304 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1305  * neighbour
1306  * (Use 8x9 pixels)
1307  * TODO: a better one for the inner part.
1308  */
1309 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1310                                      uint8_t *src, int i_src )
1311 {
1312     int y, x;
1313
1314     /* Interlaced */
1315     for( y = 0; y < 8; y += 2 )
1316     {
1317         memcpy( dst, src, 8 );
1318         dst += i_dst;
1319
1320         for( x = 0; x < 8; x++ )
1321             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1322         dst += 1*i_dst;
1323         src += 2*i_src;
1324     }
1325 }
1326 #ifdef CAN_COMPILE_MMXEXT
1327 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1328                                           uint8_t *src, int i_src )
1329 {
1330     int y;
1331
1332     /* Interlaced */
1333     for( y = 0; y < 8; y += 2 )
1334     {
1335         movq_m2r( src[0], mm0 );
1336         movq_r2m( mm0, dst[0] );
1337         dst += i_dst;
1338
1339         movq_m2r( src[2*i_src], mm1 );
1340         pavgb_r2r( mm1, mm0 );
1341
1342         movq_r2m( mm0, dst[0] );
1343
1344         dst += 1*i_dst;
1345         src += 2*i_src;
1346     }
1347 }
1348 #endif
1349
1350 /* XDeint8x8Field: Edge oriented interpolation
1351  * (Need -4 and +5 pixels H, +1 line)
1352  */
1353 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1354                                     uint8_t *src, int i_src )
1355 {
1356     int y, x;
1357
1358     /* Interlaced */
1359     for( y = 0; y < 8; y += 2 )
1360     {
1361         memcpy( dst, src, 8 );
1362         dst += i_dst;
1363
1364         for( x = 0; x < 8; x++ )
1365         {
1366             uint8_t *src2 = &src[2*i_src];
1367             /* I use 8 pixels just to match the MMX version, but it's overkill
1368              * 5 would be enough (less isn't good) */
1369             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1370                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1371                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1372                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1373
1374             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1375                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1376                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1377                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1378
1379             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1380                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1381                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1382                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1383
1384             if( c0 < c1 && c1 <= c2 )
1385                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1386             else if( c2 < c1 && c1 <= c0 )
1387                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1388             else
1389                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1390         }
1391
1392         dst += 1*i_dst;
1393         src += 2*i_src;
1394     }
1395 }
1396 #ifdef CAN_COMPILE_MMXEXT
1397 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1398                                          uint8_t *src, int i_src )
1399 {
1400     int y, x;
1401
1402     /* Interlaced */
1403     for( y = 0; y < 8; y += 2 )
1404     {
1405         memcpy( dst, src, 8 );
1406         dst += i_dst;
1407
1408         for( x = 0; x < 8; x++ )
1409         {
1410             uint8_t *src2 = &src[2*i_src];
1411             int32_t c0, c1, c2;
1412
1413             movq_m2r( src[x-2], mm0 );
1414             movq_m2r( src[x-3], mm1 );
1415             movq_m2r( src[x-4], mm2 );
1416
1417             psadbw_m2r( src2[x-4], mm0 );
1418             psadbw_m2r( src2[x-3], mm1 );
1419             psadbw_m2r( src2[x-2], mm2 );
1420
1421             movd_r2m( mm0, c2 );
1422             movd_r2m( mm1, c1 );
1423             movd_r2m( mm2, c0 );
1424
1425             if( c0 < c1 && c1 <= c2 )
1426                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1427             else if( c2 < c1 && c1 <= c0 )
1428                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1429             else
1430                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1431         }
1432
1433         dst += 1*i_dst;
1434         src += 2*i_src;
1435     }
1436 }
1437 #endif
1438
1439 /* NxN arbitray size (and then only use pixel in the NxN block)
1440  */
1441 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1442                                    int i_height, int i_width )
1443 {
1444     int y, x;
1445     int ff, fr;
1446     int fc;
1447
1448
1449     /* Detect interlacing */
1450     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1451     ff = fr = 0;
1452     fc = 0;
1453     for( y = 0; y < i_height - 2; y += 2 )
1454     {
1455         const uint8_t *s = &src[y*i_src];
1456         for( x = 0; x < i_width; x++ )
1457         {
1458             fr += ssd(s[      x] - s[1*i_src+x]);
1459             ff += ssd(s[      x] - s[2*i_src+x]);
1460         }
1461         if( ff < fr && fr > i_width / 2 )
1462             fc++;
1463     }
1464
1465     return fc < 2 ? false : true;
1466 }
1467
1468 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1469                                    uint8_t *src, int i_src,
1470                                    int i_width, int i_height )
1471 {
1472     int y, x;
1473
1474     /* Progressive */
1475     for( y = 0; y < i_height; y += 2 )
1476     {
1477         memcpy( dst, src, i_width );
1478         dst += i_dst;
1479
1480         if( y < i_height - 2 )
1481         {
1482             for( x = 0; x < i_width; x++ )
1483                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1484         }
1485         else
1486         {
1487             /* Blend last line */
1488             for( x = 0; x < i_width; x++ )
1489                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1490         }
1491         dst += 1*i_dst;
1492         src += 2*i_src;
1493     }
1494 }
1495
1496 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1497                                    uint8_t *src, int i_src,
1498                                    int i_width, int i_height )
1499 {
1500     int y, x;
1501
1502     /* Interlaced */
1503     for( y = 0; y < i_height; y += 2 )
1504     {
1505         memcpy( dst, src, i_width );
1506         dst += i_dst;
1507
1508         if( y < i_height - 2 )
1509         {
1510             for( x = 0; x < i_width; x++ )
1511                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1512         }
1513         else
1514         {
1515             /* Blend last line */
1516             for( x = 0; x < i_width; x++ )
1517                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1518         }
1519         dst += 1*i_dst;
1520         src += 2*i_src;
1521     }
1522 }
1523
1524 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1525                               int i_width, int i_height )
1526 {
1527     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1528         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1529     else
1530         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1531 }
1532
1533
1534 static inline int median( int a, int b, int c )
1535 {
1536     int min = a, max =a;
1537     if( b < min )
1538         min = b;
1539     else
1540         max = b;
1541
1542     if( c < min )
1543         min = c;
1544     else if( c > max )
1545         max = c;
1546
1547     return a + b + c - min - max;
1548 }
1549
1550
1551 /* XDeintBand8x8:
1552  */
1553 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1554                                    uint8_t *src, int i_src,
1555                                    const int i_mbx, int i_modx )
1556 {
1557     int x;
1558
1559     for( x = 0; x < i_mbx; x++ )
1560     {
1561         int s;
1562         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1563         {
1564             if( x == 0 || x == i_mbx - 1 )
1565                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1566             else
1567                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1568         }
1569         else
1570         {
1571             XDeint8x8MergeC( dst, i_dst,
1572                              &src[0*i_src], 2*i_src,
1573                              &src[1*i_src], 2*i_src );
1574         }
1575
1576         dst += 8;
1577         src += 8;
1578     }
1579
1580     if( i_modx )
1581         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1582 }
1583 #ifdef CAN_COMPILE_MMXEXT
1584 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1585                                         uint8_t *src, int i_src,
1586                                         const int i_mbx, int i_modx )
1587 {
1588     int x;
1589
1590     /* Reset current line */
1591     for( x = 0; x < i_mbx; x++ )
1592     {
1593         int s;
1594         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1595         {
1596             if( x == 0 || x == i_mbx - 1 )
1597                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1598             else
1599                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1600         }
1601         else
1602         {
1603             XDeint8x8MergeMMXEXT( dst, i_dst,
1604                                   &src[0*i_src], 2*i_src,
1605                                   &src[1*i_src], 2*i_src );
1606         }
1607
1608         dst += 8;
1609         src += 8;
1610     }
1611
1612     if( i_modx )
1613         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1614 }
1615 #endif
1616
1617 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1618 {
1619     int i_plane;
1620
1621     /* Copy image and skip lines */
1622     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1623     {
1624         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1625         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1626
1627         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1628         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1629
1630         const int i_dst = p_outpic->p[i_plane].i_pitch;
1631         const int i_src = p_pic->p[i_plane].i_pitch;
1632
1633         int y, x;
1634
1635         for( y = 0; y < i_mby; y++ )
1636         {
1637             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1638             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1639
1640 #ifdef CAN_COMPILE_MMXEXT
1641             if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1642                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1643             else
1644 #endif
1645                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1646         }
1647
1648         /* Last line (C only)*/
1649         if( i_mody )
1650         {
1651             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1652             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1653
1654             for( x = 0; x < i_mbx; x++ )
1655             {
1656                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1657
1658                 dst += 8;
1659                 src += 8;
1660             }
1661
1662             if( i_modx )
1663                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1664         }
1665     }
1666
1667 #ifdef CAN_COMPILE_MMXEXT
1668     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1669         emms();
1670 #endif
1671 }
1672
1673 /*****************************************************************************
1674  * FilterCallback: called when changing the deinterlace method on the fly.
1675  *****************************************************************************/
1676 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1677                            vlc_value_t oldval, vlc_value_t newval,
1678                            void *p_data )
1679 {
1680     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1681     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1682
1683     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1684
1685     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
1686     const bool b_old_half_height = p_vout->p_sys->b_half_height;
1687
1688     SetFilterMethod( p_vout, newval.psz_string );
1689
1690     if( !b_old_half_height == !p_vout->p_sys->b_half_height )
1691     {
1692         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1693         return VLC_SUCCESS;
1694     }
1695
1696     /* We need to kill the old vout */
1697     if( p_vout->p_sys->p_vout )
1698     {
1699         vout_filter_DelChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
1700         vout_CloseAndRelease( p_vout->p_sys->p_vout );
1701     }
1702
1703     /* Try to open a new video output */
1704     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
1705
1706     if( p_vout->p_sys->p_vout == NULL )
1707     {
1708         /* Everything failed */
1709         msg_Err( p_vout, "cannot open vout, aborting" );
1710
1711         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1712         return VLC_EGENERIC;
1713     }
1714
1715     vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
1716
1717     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1718     return VLC_SUCCESS;
1719 }
1720
1721 /*****************************************************************************
1722  * video filter2 functions
1723  *****************************************************************************/
1724 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1725 {
1726     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1727     picture_t *p_pic_dst;
1728
1729     /* Request output picture */
1730     p_pic_dst = filter_NewPicture( p_filter );
1731     if( p_pic_dst == NULL )
1732     {
1733         picture_Release( p_pic );
1734         return NULL;
1735     }
1736
1737     switch( p_vout->p_sys->i_mode )
1738     {
1739         case DEINTERLACE_DISCARD:
1740             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1741             break;
1742
1743         case DEINTERLACE_BOB:
1744 #if 0
1745             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1746             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1747             break;
1748 #endif
1749
1750         case DEINTERLACE_LINEAR:
1751 #if 0
1752             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1753             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
1754 #endif
1755             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
1756             picture_Release( p_pic_dst );
1757             picture_Release( p_pic );
1758             return NULL;
1759
1760         case DEINTERLACE_MEAN:
1761             RenderMean( p_vout, p_pic_dst, p_pic );
1762             break;
1763
1764         case DEINTERLACE_BLEND:
1765             RenderBlend( p_vout, p_pic_dst, p_pic );
1766             break;
1767
1768         case DEINTERLACE_X:
1769             RenderX( p_pic_dst, p_pic );
1770             break;
1771     }
1772
1773     picture_CopyProperties( p_pic_dst, p_pic );
1774     p_pic_dst->b_progressive = true;
1775
1776     picture_Release( p_pic );
1777     return p_pic_dst;
1778 }
1779
1780 /*****************************************************************************
1781  * OpenFilter:
1782  *****************************************************************************/
1783 static int OpenFilter( vlc_object_t *p_this )
1784 {
1785     filter_t *p_filter = (filter_t*)p_this;
1786     vout_thread_t *p_vout;
1787     vlc_value_t val;
1788
1789     if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
1790         return VLC_EGENERIC;
1791
1792     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
1793      * by spu filters */
1794     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
1795     vlc_object_attach( p_vout, p_filter );
1796     p_filter->p_sys = (filter_sys_t *)p_vout;
1797     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
1798
1799     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
1800                    p_filter->p_cfg );
1801     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
1802
1803     var_Create( p_filter, "deinterlace-mode", VLC_VAR_STRING );
1804     var_Set( p_filter, "deinterlace-mode", val );
1805     free( val.psz_string );
1806
1807     if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
1808     {
1809         vlc_object_detach( p_vout );
1810         vlc_object_release( p_vout );
1811         return VLC_EGENERIC;
1812     }
1813
1814     video_format_t fmt;
1815     GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
1816     if( !p_filter->b_allow_fmt_out_change &&
1817         ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
1818           fmt.i_height != p_filter->fmt_in.video.i_height ) )
1819     {
1820         CloseFilter( VLC_OBJECT(p_filter) );
1821         return VLC_EGENERIC;
1822     }
1823     p_filter->fmt_out.video = fmt;
1824     p_filter->fmt_out.i_codec = fmt.i_chroma;
1825     p_filter->pf_video_filter = Deinterlace;
1826
1827     msg_Dbg( p_filter, "deinterlacing" );
1828
1829     return VLC_SUCCESS;
1830 }
1831
1832 /*****************************************************************************
1833  * CloseFilter: clean up the filter
1834  *****************************************************************************/
1835 static void CloseFilter( vlc_object_t *p_this )
1836 {
1837     filter_t *p_filter = (filter_t*)p_this;
1838     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1839
1840     Destroy( VLC_OBJECT(p_vout) );
1841     vlc_object_detach( p_vout );
1842     vlc_object_release( p_vout );
1843 }
1844