]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c
Improved a bit deinterlace video filter2.
[vlc] / modules / video_filter / deinterlace.c
1 /*****************************************************************************
2  * deinterlace.c : deinterlacer plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001, 2002, 2003 the VideoLAN team
5  * $Id$
6  *
7  * Author: Sam Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <errno.h>
33
34 #ifdef HAVE_ALTIVEC_H
35 #   include <altivec.h>
36 #endif
37
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
40 #include <vlc_vout.h>
41 #include <vlc_sout.h>
42 #include "vlc_filter.h"
43
44 #ifdef CAN_COMPILE_MMXEXT
45 #   include "mmx.h"
46 #endif
47
48 #include "filter_common.h"
49
50 #define DEINTERLACE_DISCARD 1
51 #define DEINTERLACE_MEAN    2
52 #define DEINTERLACE_BLEND   3
53 #define DEINTERLACE_BOB     4
54 #define DEINTERLACE_LINEAR  5
55 #define DEINTERLACE_X       6
56
57 /*****************************************************************************
58  * Local protypes
59  *****************************************************************************/
60 static int  Create    ( vlc_object_t * );
61 static void Destroy   ( vlc_object_t * );
62
63 static int  Init      ( vout_thread_t * );
64 static void End       ( vout_thread_t * );
65 static void Render    ( vout_thread_t *, picture_t * );
66
67 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
68 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
69 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
70 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
71 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
72 static void RenderX      ( picture_t *, picture_t * );
73
74 static void MergeGeneric ( void *, const void *, const void *, size_t );
75 #if defined(CAN_COMPILE_C_ALTIVEC)
76 static void MergeAltivec ( void *, const void *, const void *, size_t );
77 #endif
78 #if defined(CAN_COMPILE_MMXEXT)
79 static void MergeMMXEXT  ( void *, const void *, const void *, size_t );
80 #endif
81 #if defined(CAN_COMPILE_3DNOW)
82 static void Merge3DNow   ( void *, const void *, const void *, size_t );
83 #endif
84 #if defined(CAN_COMPILE_SSE)
85 static void MergeSSE2    ( void *, const void *, const void *, size_t );
86 #endif
87 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
88 static void EndMMX       ( void );
89 #endif
90 #if defined(CAN_COMPILE_3DNOW)
91 static void End3DNow     ( void );
92 #endif
93
94 static int  SendEvents   ( vlc_object_t *, char const *,
95                            vlc_value_t, vlc_value_t, void * );
96
97 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method );
98 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
99
100 static int OpenFilter( vlc_object_t *p_this );
101 static void CloseFilter( vlc_object_t *p_this );
102
103 /*****************************************************************************
104  * Callback prototypes
105  *****************************************************************************/
106 static int FilterCallback( vlc_object_t *, char const *,
107                            vlc_value_t, vlc_value_t, void * );
108
109 /*****************************************************************************
110  * Module descriptor
111  *****************************************************************************/
112 #define MODE_TEXT N_("Deinterlace mode")
113 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
114
115 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
116 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
117
118 #define FILTER_CFG_PREFIX "sout-deinterlace-"
119
120 static const char *const mode_list[] = {
121     "discard", "blend", "mean", "bob", "linear", "x" };
122 static const char *const mode_list_text[] = {
123     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X" };
124
125 vlc_module_begin ()
126     set_description( N_("Deinterlacing video filter") )
127     set_shortname( N_("Deinterlace" ))
128     set_capability( "video filter", 0 )
129     set_category( CAT_VIDEO )
130     set_subcategory( SUBCAT_VIDEO_VFILTER )
131
132     set_section( N_("Display"),NULL)
133     add_string( "deinterlace-mode", "discard", NULL, MODE_TEXT,
134                 MODE_LONGTEXT, false )
135         change_string_list( mode_list, mode_list_text, 0 )
136
137     add_shortcut( "deinterlace" )
138     set_callbacks( Create, Destroy )
139
140     add_submodule ()
141     set_capability( "video filter2", 0 )
142     set_section( N_("Streaming"),NULL)
143     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
144                 SOUT_MODE_LONGTEXT, false )
145         change_string_list( mode_list, mode_list_text, 0 )
146     set_callbacks( OpenFilter, CloseFilter )
147 vlc_module_end ()
148
149 static const char *const ppsz_filter_options[] = {
150     "mode", NULL
151 };
152
153 /*****************************************************************************
154  * vout_sys_t: Deinterlace video output method descriptor
155  *****************************************************************************
156  * This structure is part of the video output thread descriptor.
157  * It describes the Deinterlace specific properties of an output thread.
158  *****************************************************************************/
159 struct vout_sys_t
160 {
161     int        i_mode;        /* Deinterlace mode */
162     bool b_double_rate; /* Shall we double the framerate? */
163     bool b_half_height; /* Shall be devide the height by 2 */
164
165     mtime_t    last_date;
166     mtime_t    next_date;
167
168     vout_thread_t *p_vout;
169
170     vlc_mutex_t filter_lock;
171
172     void (*pf_merge) ( void *, const void *, const void *, size_t );
173     void (*pf_end_merge) ( void );
174 };
175
176 /*****************************************************************************
177  * Control: control facility for the vout (forwards to child vout)
178  *****************************************************************************/
179 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
180 {
181     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
182 }
183
184 /*****************************************************************************
185  * Create: allocates Deinterlace video thread output method
186  *****************************************************************************
187  * This function allocates and initializes a Deinterlace vout method.
188  *****************************************************************************/
189 static int Create( vlc_object_t *p_this )
190 {
191     vout_thread_t *p_vout = (vout_thread_t *)p_this;
192     vlc_value_t val;
193
194     /* Allocate structure */
195     p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
196     if( p_vout->p_sys == NULL )
197         return VLC_ENOMEM;
198
199     p_vout->pf_init = Init;
200     p_vout->pf_end = End;
201     p_vout->pf_manage = NULL;
202     p_vout->pf_render = Render;
203     p_vout->pf_display = NULL;
204     p_vout->pf_control = Control;
205
206     p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
207     p_vout->p_sys->b_double_rate = false;
208     p_vout->p_sys->b_half_height = true;
209     p_vout->p_sys->last_date = 0;
210     p_vout->p_sys->p_vout = 0;
211     vlc_mutex_init( &p_vout->p_sys->filter_lock );
212
213 #if defined(CAN_COMPILE_C_ALTIVEC)
214     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
215     {
216         p_vout->p_sys->pf_merge = MergeAltivec;
217         p_vout->p_sys->pf_end_merge = NULL;
218     }
219     else
220 #endif
221 #if defined(CAN_COMPILE_SSE)
222     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
223     {
224         p_vout->p_sys->pf_merge = MergeSSE2;
225         p_vout->p_sys->pf_end_merge = EndMMX;
226     }
227     else
228 #endif
229 #if defined(CAN_COMPILE_MMXEXT)
230     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
231     {
232         p_vout->p_sys->pf_merge = MergeMMXEXT;
233         p_vout->p_sys->pf_end_merge = EndMMX;
234     }
235     else
236 #endif
237 #if defined(CAN_COMPILE_3DNOW)
238     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
239     {
240         p_vout->p_sys->pf_merge = Merge3DNow;
241         p_vout->p_sys->pf_end_merge = End3DNow;
242     }
243     else
244 #endif
245     {
246         p_vout->p_sys->pf_merge = MergeGeneric;
247         p_vout->p_sys->pf_end_merge = NULL;
248     }
249
250     /* Look what method was requested */
251     var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING );
252     var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL );
253
254     if( val.psz_string == NULL )
255     {
256         msg_Err( p_vout, "configuration variable deinterlace-mode empty" );
257         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
258
259         val.psz_string = strdup( "discard" );
260     }
261
262     msg_Dbg( p_vout, "using %s deinterlace mode", val.psz_string );
263
264     SetFilterMethod( p_vout, val.psz_string );
265
266     free( val.psz_string );
267
268     return VLC_SUCCESS;
269 }
270
271 /*****************************************************************************
272  * SetFilterMethod: setup the deinterlace method to use.
273  *****************************************************************************/
274 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
275 {
276     if( !strcmp( psz_method, "mean" ) )
277     {
278         p_vout->p_sys->i_mode = DEINTERLACE_MEAN;
279         p_vout->p_sys->b_double_rate = false;
280         p_vout->p_sys->b_half_height = true;
281     }
282     else if( !strcmp( psz_method, "blend" )
283              || !strcmp( psz_method, "average" )
284              || !strcmp( psz_method, "combine-fields" ) )
285     {
286         p_vout->p_sys->i_mode = DEINTERLACE_BLEND;
287         p_vout->p_sys->b_double_rate = false;
288         p_vout->p_sys->b_half_height = false;
289     }
290     else if( !strcmp( psz_method, "bob" )
291              || !strcmp( psz_method, "progressive-scan" ) )
292     {
293         p_vout->p_sys->i_mode = DEINTERLACE_BOB;
294         p_vout->p_sys->b_double_rate = true;
295         p_vout->p_sys->b_half_height = false;
296     }
297     else if( !strcmp( psz_method, "linear" ) )
298     {
299         p_vout->p_sys->i_mode = DEINTERLACE_LINEAR;
300         p_vout->p_sys->b_double_rate = true;
301         p_vout->p_sys->b_half_height = false;
302     }
303     else if( !strcmp( psz_method, "x" ) )
304     {
305         p_vout->p_sys->i_mode = DEINTERLACE_X;
306         p_vout->p_sys->b_double_rate = false;
307         p_vout->p_sys->b_half_height = false;
308     }
309     else
310     {
311         const bool b_i422 = p_vout->render.i_chroma == VLC_FOURCC('I','4','2','2');
312         if( strcmp( psz_method, "discard" ) )
313             msg_Err( p_vout, "no valid deinterlace mode provided, "
314                      "using \"discard\"" );
315
316         p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
317         p_vout->p_sys->b_double_rate = false;
318         p_vout->p_sys->b_half_height = !b_i422;
319     }
320
321     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
322 }
323
324 static void GetOutputFormat( vout_thread_t *p_vout,
325                              video_format_t *p_dst, const video_format_t *p_src )
326 {
327     *p_dst = *p_src;
328
329     if( p_vout->p_sys->b_half_height )
330     {
331         p_dst->i_height /= 2;
332         p_dst->i_visible_height /= 2;
333         p_dst->i_y_offset /= 2;
334         p_dst->i_sar_den *= 2;
335     }
336
337     if( p_src->i_chroma == VLC_FOURCC('I','4','2','2') )
338     {
339         switch( p_vout->p_sys->i_mode )
340         {
341         case DEINTERLACE_MEAN:
342         case DEINTERLACE_LINEAR:
343         case DEINTERLACE_X:
344             p_dst->i_chroma = VLC_FOURCC('I','4','2','2');
345             break;
346         default:
347             p_dst->i_chroma = VLC_FOURCC('I','4','2','0');
348             break;
349         }
350     }
351 }
352
353 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
354 {
355     return i_chroma == VLC_FOURCC('I','4','2','0') ||
356            i_chroma == VLC_FOURCC('I','Y','U','V') ||
357            i_chroma == VLC_FOURCC('Y','V','1','2') ||
358            i_chroma == VLC_FOURCC('I','4','2','2');
359 }
360
361 /*****************************************************************************
362  * Init: initialize Deinterlace video thread output method
363  *****************************************************************************/
364 static int Init( vout_thread_t *p_vout )
365 {
366     int i_index;
367     picture_t *p_pic;
368
369     I_OUTPUTPICTURES = 0;
370
371     if( !IsChromaSupported( p_vout->render.i_chroma ) )
372         return VLC_EGENERIC; /* unknown chroma */
373
374     /* Initialize the output structure, full of directbuffers since we want
375      * the decoder to output directly to our structures. */
376     p_vout->output.i_chroma = p_vout->render.i_chroma;
377     p_vout->output.i_width  = p_vout->render.i_width;
378     p_vout->output.i_height = p_vout->render.i_height;
379     p_vout->output.i_aspect = p_vout->render.i_aspect;
380     p_vout->fmt_out = p_vout->fmt_in;
381
382     /* Try to open the real video output */
383     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
384
385     if( p_vout->p_sys->p_vout == NULL )
386     {
387         /* Everything failed */
388         msg_Err( p_vout, "cannot open vout, aborting" );
389
390         return VLC_EGENERIC;
391     }
392
393     var_AddCallback( p_vout, "deinterlace-mode", FilterCallback, NULL );
394
395     ALLOCATE_DIRECTBUFFERS( VOUT_MAX_PICTURES );
396
397     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
398
399     ADD_PARENT_CALLBACKS( SendEventsToChild );
400
401     return VLC_SUCCESS;
402 }
403
404 /*****************************************************************************
405  * SpawnRealVout: spawn the real video output.
406  *****************************************************************************/
407 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
408 {
409     msg_Dbg( p_vout, "spawning the real video output" );
410
411     video_format_t fmt;
412     GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
413
414     return vout_Create( p_vout, &fmt );
415 }
416
417 /*****************************************************************************
418  * End: terminate Deinterlace video thread output method
419  *****************************************************************************/
420 static void End( vout_thread_t *p_vout )
421 {
422     int i_index;
423
424     DEL_PARENT_CALLBACKS( SendEventsToChild );
425
426     if( p_vout->p_sys->p_vout )
427         DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
428
429     /* Free the fake output buffers we allocated */
430     for( i_index = I_OUTPUTPICTURES ; i_index ; )
431     {
432         i_index--;
433         free( PP_OUTPUTPICTURE[ i_index ]->p_data_orig );
434     }
435
436     if( p_vout->p_sys->p_vout )
437         vout_CloseAndRelease( p_vout->p_sys->p_vout );
438 }
439
440 /*****************************************************************************
441  * Destroy: destroy Deinterlace video thread output method
442  *****************************************************************************
443  * Terminate an output method created by DeinterlaceCreateOutputMethod
444  *****************************************************************************/
445 static void Destroy( vlc_object_t *p_this )
446 {
447     vout_thread_t *p_vout = (vout_thread_t *)p_this;
448     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
449     free( p_vout->p_sys );
450 }
451
452 /*****************************************************************************
453  * Render: displays previously rendered output
454  *****************************************************************************
455  * This function send the currently rendered image to Deinterlace image,
456  * waits until it is displayed and switch the two rendering buffers, preparing
457  * next frame.
458  *****************************************************************************/
459 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
460 {
461     vout_sys_t *p_sys = p_vout->p_sys;
462     picture_t *pp_outpic[2];
463
464     /* FIXME are they needed ? */
465     p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
466     p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
467     p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
468     p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
469
470     /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
471     p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
472     p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
473     p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
474     p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
475     if( p_vout->p_sys->b_half_height )
476     {
477         p_sys->p_vout->fmt_in.i_y_offset /= 2;
478         p_sys->p_vout->fmt_in.i_visible_height /= 2;
479     }
480
481     if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
482     {
483         p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
484
485         p_vout->fmt_out.i_aspect = p_vout->fmt_in.i_aspect;
486         p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
487         p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
488
489         video_format_t fmt = p_vout->fmt_out;
490         if( p_vout->p_sys->b_half_height )
491         {
492             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
493             fmt.i_sar_den *= 2;
494         }
495
496         p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
497     }
498     if( !p_sys->p_vout )
499         return;
500
501     pp_outpic[0] = pp_outpic[1] = NULL;
502
503     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
504
505     /* Get a new picture */
506     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
507                                                 0, 0, 0 ) )
508               == NULL )
509     {
510         if( !vlc_object_alive( p_vout ) || p_vout->b_error )
511         {
512             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
513             return;
514         }
515         msleep( VOUT_OUTMEM_SLEEP );
516     }
517
518     pp_outpic[0]->date = p_pic->date;
519
520     /* If we are using double rate, get an additional new picture */
521     if( p_vout->p_sys->b_double_rate )
522     {
523         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
524                                                  0, 0, 0 ) )
525                   == NULL )
526         {
527             if( !vlc_object_alive( p_vout ) || p_vout->b_error )
528             {
529                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
530                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
531                 return;
532             }
533             msleep( VOUT_OUTMEM_SLEEP );
534         }
535
536         /* 20ms is a bit arbitrary, but it's only for the first image we get */
537         if( !p_vout->p_sys->last_date )
538             pp_outpic[1]->date = p_pic->date + 20000;
539         else
540             pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
541         p_vout->p_sys->last_date = p_pic->date;
542     }
543
544     switch( p_vout->p_sys->i_mode )
545     {
546         case DEINTERLACE_DISCARD:
547             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
548             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
549             break;
550
551         case DEINTERLACE_BOB:
552             RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
553             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
554             RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
555             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
556             break;
557
558         case DEINTERLACE_LINEAR:
559             RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
560             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
561             RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
562             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
563             break;
564
565         case DEINTERLACE_MEAN:
566             RenderMean( p_vout, pp_outpic[0], p_pic );
567             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
568             break;
569
570         case DEINTERLACE_BLEND:
571             RenderBlend( p_vout, pp_outpic[0], p_pic );
572             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
573             break;
574
575         case DEINTERLACE_X:
576             RenderX( pp_outpic[0], p_pic );
577             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
578             break;
579     }
580     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
581 }
582
583 /*****************************************************************************
584  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
585  *****************************************************************************/
586 static void RenderDiscard( vout_thread_t *p_vout,
587                            picture_t *p_outpic, picture_t *p_pic, int i_field )
588 {
589     int i_plane;
590
591     /* Copy image and skip lines */
592     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
593     {
594         uint8_t *p_in, *p_out_end, *p_out;
595         int i_increment;
596
597         p_in = p_pic->p[i_plane].p_pixels
598                    + i_field * p_pic->p[i_plane].i_pitch;
599
600         p_out = p_outpic->p[i_plane].p_pixels;
601         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
602                              * p_outpic->p[i_plane].i_visible_lines;
603
604         switch( p_vout->render.i_chroma )
605         {
606         case VLC_FOURCC('I','4','2','0'):
607         case VLC_FOURCC('I','Y','U','V'):
608         case VLC_FOURCC('Y','V','1','2'):
609
610             for( ; p_out < p_out_end ; )
611             {
612                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
613
614                 p_out += p_outpic->p[i_plane].i_pitch;
615                 p_in += 2 * p_pic->p[i_plane].i_pitch;
616             }
617             break;
618
619         case VLC_FOURCC('I','4','2','2'):
620
621             i_increment = 2 * p_pic->p[i_plane].i_pitch;
622
623             if( i_plane == Y_PLANE )
624             {
625                 for( ; p_out < p_out_end ; )
626                 {
627                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
628                     p_out += p_outpic->p[i_plane].i_pitch;
629                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
630                     p_out += p_outpic->p[i_plane].i_pitch;
631                     p_in += i_increment;
632                 }
633             }
634             else
635             {
636                 for( ; p_out < p_out_end ; )
637                 {
638                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
639                     p_out += p_outpic->p[i_plane].i_pitch;
640                     p_in += i_increment;
641                 }
642             }
643             break;
644
645         default:
646             break;
647         }
648     }
649 }
650
651 /*****************************************************************************
652  * RenderBob: renders a BOB picture - simple copy
653  *****************************************************************************/
654 static void RenderBob( vout_thread_t *p_vout,
655                        picture_t *p_outpic, picture_t *p_pic, int i_field )
656 {
657     int i_plane;
658
659     /* Copy image and skip lines */
660     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
661     {
662         uint8_t *p_in, *p_out_end, *p_out;
663
664         p_in = p_pic->p[i_plane].p_pixels;
665         p_out = p_outpic->p[i_plane].p_pixels;
666         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
667                              * p_outpic->p[i_plane].i_visible_lines;
668
669         switch( p_vout->render.i_chroma )
670         {
671             case VLC_FOURCC('I','4','2','0'):
672             case VLC_FOURCC('I','Y','U','V'):
673             case VLC_FOURCC('Y','V','1','2'):
674                 /* For BOTTOM field we need to add the first line */
675                 if( i_field == 1 )
676                 {
677                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
678                     p_in += p_pic->p[i_plane].i_pitch;
679                     p_out += p_outpic->p[i_plane].i_pitch;
680                 }
681
682                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
683
684                 for( ; p_out < p_out_end ; )
685                 {
686                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
687
688                     p_out += p_outpic->p[i_plane].i_pitch;
689
690                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
691
692                     p_in += 2 * p_pic->p[i_plane].i_pitch;
693                     p_out += p_outpic->p[i_plane].i_pitch;
694                 }
695
696                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
697
698                 /* For TOP field we need to add the last line */
699                 if( i_field == 0 )
700                 {
701                     p_in += p_pic->p[i_plane].i_pitch;
702                     p_out += p_outpic->p[i_plane].i_pitch;
703                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
704                 }
705                 break;
706
707             case VLC_FOURCC('I','4','2','2'):
708                 /* For BOTTOM field we need to add the first line */
709                 if( i_field == 1 )
710                 {
711                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
712                     p_in += p_pic->p[i_plane].i_pitch;
713                     p_out += p_outpic->p[i_plane].i_pitch;
714                 }
715
716                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
717
718                 if( i_plane == Y_PLANE )
719                 {
720                     for( ; p_out < p_out_end ; )
721                     {
722                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
723
724                         p_out += p_outpic->p[i_plane].i_pitch;
725
726                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
727
728                         p_in += 2 * p_pic->p[i_plane].i_pitch;
729                         p_out += p_outpic->p[i_plane].i_pitch;
730                     }
731                 }
732                 else
733                 {
734                     for( ; p_out < p_out_end ; )
735                     {
736                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
737
738                         p_out += p_outpic->p[i_plane].i_pitch;
739                         p_in += 2 * p_pic->p[i_plane].i_pitch;
740                     }
741                 }
742
743                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
744
745                 /* For TOP field we need to add the last line */
746                 if( i_field == 0 )
747                 {
748                     p_in += p_pic->p[i_plane].i_pitch;
749                     p_out += p_outpic->p[i_plane].i_pitch;
750                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
751                 }
752                 break;
753         }
754     }
755 }
756
757 #define Merge p_vout->p_sys->pf_merge
758 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
759
760 /*****************************************************************************
761  * RenderLinear: BOB with linear interpolation
762  *****************************************************************************/
763 static void RenderLinear( vout_thread_t *p_vout,
764                           picture_t *p_outpic, picture_t *p_pic, int i_field )
765 {
766     int i_plane;
767
768     /* Copy image and skip lines */
769     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
770     {
771         uint8_t *p_in, *p_out_end, *p_out;
772
773         p_in = p_pic->p[i_plane].p_pixels;
774         p_out = p_outpic->p[i_plane].p_pixels;
775         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
776                              * p_outpic->p[i_plane].i_visible_lines;
777
778         /* For BOTTOM field we need to add the first line */
779         if( i_field == 1 )
780         {
781             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
782             p_in += p_pic->p[i_plane].i_pitch;
783             p_out += p_outpic->p[i_plane].i_pitch;
784         }
785
786         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
787
788         for( ; p_out < p_out_end ; )
789         {
790             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
791
792             p_out += p_outpic->p[i_plane].i_pitch;
793
794             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
795                    p_pic->p[i_plane].i_pitch );
796
797             p_in += 2 * p_pic->p[i_plane].i_pitch;
798             p_out += p_outpic->p[i_plane].i_pitch;
799         }
800
801         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
802
803         /* For TOP field we need to add the last line */
804         if( i_field == 0 )
805         {
806             p_in += p_pic->p[i_plane].i_pitch;
807             p_out += p_outpic->p[i_plane].i_pitch;
808             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
809         }
810     }
811     EndMerge();
812 }
813
814 static void RenderMean( vout_thread_t *p_vout,
815                         picture_t *p_outpic, picture_t *p_pic )
816 {
817     int i_plane;
818
819     /* Copy image and skip lines */
820     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
821     {
822         uint8_t *p_in, *p_out_end, *p_out;
823
824         p_in = p_pic->p[i_plane].p_pixels;
825
826         p_out = p_outpic->p[i_plane].p_pixels;
827         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
828                              * p_outpic->p[i_plane].i_visible_lines;
829
830         /* All lines: mean value */
831         for( ; p_out < p_out_end ; )
832         {
833             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
834                    p_pic->p[i_plane].i_pitch );
835
836             p_out += p_outpic->p[i_plane].i_pitch;
837             p_in += 2 * p_pic->p[i_plane].i_pitch;
838         }
839     }
840     EndMerge();
841 }
842
843 static void RenderBlend( vout_thread_t *p_vout,
844                          picture_t *p_outpic, picture_t *p_pic )
845 {
846     int i_plane;
847
848     /* Copy image and skip lines */
849     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
850     {
851         uint8_t *p_in, *p_out_end, *p_out;
852
853         p_in = p_pic->p[i_plane].p_pixels;
854
855         p_out = p_outpic->p[i_plane].p_pixels;
856         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
857                              * p_outpic->p[i_plane].i_visible_lines;
858
859         switch( p_vout->render.i_chroma )
860         {
861             case VLC_FOURCC('I','4','2','0'):
862             case VLC_FOURCC('I','Y','U','V'):
863             case VLC_FOURCC('Y','V','1','2'):
864                 /* First line: simple copy */
865                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
866                 p_out += p_outpic->p[i_plane].i_pitch;
867
868                 /* Remaining lines: mean value */
869                 for( ; p_out < p_out_end ; )
870                 {
871                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
872                            p_pic->p[i_plane].i_pitch );
873
874                     p_out += p_outpic->p[i_plane].i_pitch;
875                     p_in += p_pic->p[i_plane].i_pitch;
876                 }
877                 break;
878
879             case VLC_FOURCC('I','4','2','2'):
880                 /* First line: simple copy */
881                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
882                 p_out += p_outpic->p[i_plane].i_pitch;
883
884                 /* Remaining lines: mean value */
885                 if( i_plane == Y_PLANE )
886                 {
887                     for( ; p_out < p_out_end ; )
888                     {
889                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
890                                p_pic->p[i_plane].i_pitch );
891
892                         p_out += p_outpic->p[i_plane].i_pitch;
893                         p_in += p_pic->p[i_plane].i_pitch;
894                     }
895                 }
896
897                 else
898                 {
899                     for( ; p_out < p_out_end ; )
900                     {
901                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
902                                p_pic->p[i_plane].i_pitch );
903
904                         p_out += p_outpic->p[i_plane].i_pitch;
905                         p_in += 2*p_pic->p[i_plane].i_pitch;
906                     }
907                 }
908                 break;
909         }
910     }
911     EndMerge();
912 }
913
914 #undef Merge
915
916 static void MergeGeneric( void *_p_dest, const void *_p_s1,
917                           const void *_p_s2, size_t i_bytes )
918 {
919     uint8_t* p_dest = (uint8_t*)_p_dest;
920     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
921     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
922     uint8_t* p_end = p_dest + i_bytes - 8;
923
924     while( p_dest < p_end )
925     {
926         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
927         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
928         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
929         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
930         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
931         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
932         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
933         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
934     }
935
936     p_end += 8;
937
938     while( p_dest < p_end )
939     {
940         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
941     }
942 }
943
944 #if defined(CAN_COMPILE_MMXEXT)
945 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
946                          size_t i_bytes )
947 {
948     uint8_t* p_dest = (uint8_t*)_p_dest;
949     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
950     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
951     uint8_t* p_end = p_dest + i_bytes - 8;
952     while( p_dest < p_end )
953     {
954         __asm__  __volatile__( "movq %2,%%mm1;"
955                                "pavgb %1, %%mm1;"
956                                "movq %%mm1, %0" :"=m" (*p_dest):
957                                                  "m" (*p_s1),
958                                                  "m" (*p_s2) );
959         p_dest += 8;
960         p_s1 += 8;
961         p_s2 += 8;
962     }
963
964     p_end += 8;
965
966     while( p_dest < p_end )
967     {
968         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
969     }
970 }
971 #endif
972
973 #if defined(CAN_COMPILE_3DNOW)
974 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
975                         size_t i_bytes )
976 {
977     uint8_t* p_dest = (uint8_t*)_p_dest;
978     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
979     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
980     uint8_t* p_end = p_dest + i_bytes - 8;
981     while( p_dest < p_end )
982     {
983         __asm__  __volatile__( "movq %2,%%mm1;"
984                                "pavgusb %1, %%mm1;"
985                                "movq %%mm1, %0" :"=m" (*p_dest):
986                                                  "m" (*p_s1),
987                                                  "m" (*p_s2) );
988         p_dest += 8;
989         p_s1 += 8;
990         p_s2 += 8;
991     }
992
993     p_end += 8;
994
995     while( p_dest < p_end )
996     {
997         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
998     }
999 }
1000 #endif
1001
1002 #if defined(CAN_COMPILE_SSE)
1003 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1004                        size_t i_bytes )
1005 {
1006     uint8_t* p_dest = (uint8_t*)_p_dest;
1007     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1008     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1009     uint8_t* p_end;
1010     while( (uintptr_t)p_s1 % 16 )
1011     {
1012         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1013     }
1014     p_end = p_dest + i_bytes - 16;
1015     while( p_dest < p_end )
1016     {
1017         __asm__  __volatile__( "movdqu %2,%%xmm1;"
1018                                "pavgb %1, %%xmm1;"
1019                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
1020                                                  "m" (*p_s1),
1021                                                  "m" (*p_s2) );
1022         p_dest += 16;
1023         p_s1 += 16;
1024         p_s2 += 16;
1025     }
1026
1027     p_end += 16;
1028
1029     while( p_dest < p_end )
1030     {
1031         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1032     }
1033 }
1034 #endif
1035
1036 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1037 static void EndMMX( void )
1038 {
1039     __asm__ __volatile__( "emms" :: );
1040 }
1041 #endif
1042
1043 #if defined(CAN_COMPILE_3DNOW)
1044 static void End3DNow( void )
1045 {
1046     __asm__ __volatile__( "femms" :: );
1047 }
1048 #endif
1049
1050 #ifdef CAN_COMPILE_C_ALTIVEC
1051 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1052                           const void *_p_s2, size_t i_bytes )
1053 {
1054     uint8_t *p_dest = (uint8_t *)_p_dest;
1055     uint8_t *p_s1   = (uint8_t *)_p_s1;
1056     uint8_t *p_s2   = (uint8_t *)_p_s2;
1057     uint8_t *p_end  = p_dest + i_bytes - 15;
1058
1059     /* Use C until the first 16-bytes aligned destination pixel */
1060     while( (int)p_dest & 0xF )
1061     {
1062         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1063     }
1064
1065     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1066     {
1067         /* Unaligned source */
1068         vector unsigned char s1v, s2v, destv;
1069         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1070         vector unsigned char perm1v, perm2v;
1071
1072         perm1v = vec_lvsl( 0, p_s1 );
1073         perm2v = vec_lvsl( 0, p_s2 );
1074         s1oldv = vec_ld( 0, p_s1 );
1075         s2oldv = vec_ld( 0, p_s2 );
1076
1077         while( p_dest < p_end )
1078         {
1079             s1newv = vec_ld( 16, p_s1 );
1080             s2newv = vec_ld( 16, p_s2 );
1081             s1v    = vec_perm( s1oldv, s1newv, perm1v );
1082             s2v    = vec_perm( s2oldv, s2newv, perm2v );
1083             s1oldv = s1newv;
1084             s2oldv = s2newv;
1085             destv  = vec_avg( s1v, s2v );
1086             vec_st( destv, 0, p_dest );
1087
1088             p_s1   += 16;
1089             p_s2   += 16;
1090             p_dest += 16;
1091         }
1092     }
1093     else
1094     {
1095         /* Aligned source */
1096         vector unsigned char s1v, s2v, destv;
1097
1098         while( p_dest < p_end )
1099         {
1100             s1v   = vec_ld( 0, p_s1 );
1101             s2v   = vec_ld( 0, p_s2 );
1102             destv = vec_avg( s1v, s2v );
1103             vec_st( destv, 0, p_dest );
1104
1105             p_s1   += 16;
1106             p_s2   += 16;
1107             p_dest += 16;
1108         }
1109     }
1110
1111     p_end += 15;
1112
1113     while( p_dest < p_end )
1114     {
1115         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1116     }
1117 }
1118 #endif
1119
1120 /*****************************************************************************
1121  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1122  * and apply a process to recreate the bottom field :
1123  *  If a 8x8 block is classified as :
1124  *   - progressive: it applies a small blend (1,6,1)
1125  *   - interlaced:
1126  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1127  *    good match we use MC to recreate the bottom field (with a small
1128  *    blend (1,6,1) )
1129  *    * otherwise: it recreates the bottom field by an edge oriented
1130  *    interpolation.
1131   *****************************************************************************/
1132
1133 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1134  * XXX: It need to access to 8x10
1135  * We use more than 8 lines to help with scrolling (text)
1136  * (and because XDeint8x8Frame use line 9)
1137  * XXX: smooth/uniform area with noise detection doesn't works well
1138  * but it's not really a problem because they don't have much details anyway
1139  */
1140 static inline int ssd( int a ) { return a*a; }
1141 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1142 {
1143     int y, x;
1144     int ff, fr;
1145     int fc;
1146
1147     /* Detect interlacing */
1148     fc = 0;
1149     for( y = 0; y < 7; y += 2 )
1150     {
1151         ff = fr = 0;
1152         for( x = 0; x < 8; x++ )
1153         {
1154             fr += ssd(src[      x] - src[1*i_src+x]) +
1155                   ssd(src[i_src+x] - src[2*i_src+x]);
1156             ff += ssd(src[      x] - src[2*i_src+x]) +
1157                   ssd(src[i_src+x] - src[3*i_src+x]);
1158         }
1159         if( ff < 6*fr/8 && fr > 32 )
1160             fc++;
1161
1162         src += 2*i_src;
1163     }
1164
1165     return fc < 1 ? false : true;
1166 }
1167 #ifdef CAN_COMPILE_MMXEXT
1168 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1169 {
1170
1171     int y, x;
1172     int32_t ff, fr;
1173     int fc;
1174
1175     /* Detect interlacing */
1176     fc = 0;
1177     pxor_r2r( mm7, mm7 );
1178     for( y = 0; y < 9; y += 2 )
1179     {
1180         ff = fr = 0;
1181         pxor_r2r( mm5, mm5 );
1182         pxor_r2r( mm6, mm6 );
1183         for( x = 0; x < 8; x+=4 )
1184         {
1185             movd_m2r( src[        x], mm0 );
1186             movd_m2r( src[1*i_src+x], mm1 );
1187             movd_m2r( src[2*i_src+x], mm2 );
1188             movd_m2r( src[3*i_src+x], mm3 );
1189
1190             punpcklbw_r2r( mm7, mm0 );
1191             punpcklbw_r2r( mm7, mm1 );
1192             punpcklbw_r2r( mm7, mm2 );
1193             punpcklbw_r2r( mm7, mm3 );
1194
1195             movq_r2r( mm0, mm4 );
1196
1197             psubw_r2r( mm1, mm0 );
1198             psubw_r2r( mm2, mm4 );
1199
1200             psubw_r2r( mm1, mm2 );
1201             psubw_r2r( mm1, mm3 );
1202
1203             pmaddwd_r2r( mm0, mm0 );
1204             pmaddwd_r2r( mm4, mm4 );
1205             pmaddwd_r2r( mm2, mm2 );
1206             pmaddwd_r2r( mm3, mm3 );
1207             paddd_r2r( mm0, mm2 );
1208             paddd_r2r( mm4, mm3 );
1209             paddd_r2r( mm2, mm5 );
1210             paddd_r2r( mm3, mm6 );
1211         }
1212
1213         movq_r2r( mm5, mm0 );
1214         psrlq_i2r( 32, mm0 );
1215         paddd_r2r( mm0, mm5 );
1216         movd_r2m( mm5, fr );
1217
1218         movq_r2r( mm6, mm0 );
1219         psrlq_i2r( 32, mm0 );
1220         paddd_r2r( mm0, mm6 );
1221         movd_r2m( mm6, ff );
1222
1223         if( ff < 6*fr/8 && fr > 32 )
1224             fc++;
1225
1226         src += 2*i_src;
1227     }
1228     return fc;
1229 }
1230 #endif
1231
1232 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1233                                     uint8_t *src1, int i_src1,
1234                                     uint8_t *src2, int i_src2 )
1235 {
1236     int y, x;
1237
1238     /* Progressive */
1239     for( y = 0; y < 8; y += 2 )
1240     {
1241         memcpy( dst, src1, 8 );
1242         dst  += i_dst;
1243
1244         for( x = 0; x < 8; x++ )
1245             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1246         dst += i_dst;
1247
1248         src1 += i_src1;
1249         src2 += i_src2;
1250     }
1251 }
1252
1253 #ifdef CAN_COMPILE_MMXEXT
1254 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1255                                          uint8_t *src1, int i_src1,
1256                                          uint8_t *src2, int i_src2 )
1257 {
1258     static const uint64_t m_4 = INT64_C(0x0004000400040004);
1259     int y, x;
1260
1261     /* Progressive */
1262     pxor_r2r( mm7, mm7 );
1263     for( y = 0; y < 8; y += 2 )
1264     {
1265         for( x = 0; x < 8; x +=4 )
1266         {
1267             movd_m2r( src1[x], mm0 );
1268             movd_r2m( mm0, dst[x] );
1269
1270             movd_m2r( src2[x], mm1 );
1271             movd_m2r( src1[i_src1+x], mm2 );
1272
1273             punpcklbw_r2r( mm7, mm0 );
1274             punpcklbw_r2r( mm7, mm1 );
1275             punpcklbw_r2r( mm7, mm2 );
1276             paddw_r2r( mm1, mm1 );
1277             movq_r2r( mm1, mm3 );
1278             paddw_r2r( mm3, mm3 );
1279             paddw_r2r( mm2, mm0 );
1280             paddw_r2r( mm3, mm1 );
1281             paddw_m2r( m_4, mm1 );
1282             paddw_r2r( mm1, mm0 );
1283             psraw_i2r( 3, mm0 );
1284             packuswb_r2r( mm7, mm0 );
1285             movd_r2m( mm0, dst[i_dst+x] );
1286         }
1287         dst += 2*i_dst;
1288         src1 += i_src1;
1289         src2 += i_src2;
1290     }
1291 }
1292
1293 #endif
1294
1295 /* For debug */
1296 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1297 {
1298     int y;
1299     for( y = 0; y < 8; y++ )
1300         memset( &dst[y*i_dst], v, 8 );
1301 }
1302
1303 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1304  * neighbour
1305  * (Use 8x9 pixels)
1306  * TODO: a better one for the inner part.
1307  */
1308 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1309                                      uint8_t *src, int i_src )
1310 {
1311     int y, x;
1312
1313     /* Interlaced */
1314     for( y = 0; y < 8; y += 2 )
1315     {
1316         memcpy( dst, src, 8 );
1317         dst += i_dst;
1318
1319         for( x = 0; x < 8; x++ )
1320             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1321         dst += 1*i_dst;
1322         src += 2*i_src;
1323     }
1324 }
1325 #ifdef CAN_COMPILE_MMXEXT
1326 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1327                                           uint8_t *src, int i_src )
1328 {
1329     int y;
1330
1331     /* Interlaced */
1332     for( y = 0; y < 8; y += 2 )
1333     {
1334         movq_m2r( src[0], mm0 );
1335         movq_r2m( mm0, dst[0] );
1336         dst += i_dst;
1337
1338         movq_m2r( src[2*i_src], mm1 );
1339         pavgb_r2r( mm1, mm0 );
1340
1341         movq_r2m( mm0, dst[0] );
1342
1343         dst += 1*i_dst;
1344         src += 2*i_src;
1345     }
1346 }
1347 #endif
1348
1349 /* XDeint8x8Field: Edge oriented interpolation
1350  * (Need -4 and +5 pixels H, +1 line)
1351  */
1352 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1353                                     uint8_t *src, int i_src )
1354 {
1355     int y, x;
1356
1357     /* Interlaced */
1358     for( y = 0; y < 8; y += 2 )
1359     {
1360         memcpy( dst, src, 8 );
1361         dst += i_dst;
1362
1363         for( x = 0; x < 8; x++ )
1364         {
1365             uint8_t *src2 = &src[2*i_src];
1366             /* I use 8 pixels just to match the MMX version, but it's overkill
1367              * 5 would be enough (less isn't good) */
1368             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1369                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1370                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1371                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1372
1373             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1374                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1375                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1376                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1377
1378             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1379                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1380                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1381                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1382
1383             if( c0 < c1 && c1 <= c2 )
1384                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1385             else if( c2 < c1 && c1 <= c0 )
1386                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1387             else
1388                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1389         }
1390
1391         dst += 1*i_dst;
1392         src += 2*i_src;
1393     }
1394 }
1395 #ifdef CAN_COMPILE_MMXEXT
1396 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1397                                          uint8_t *src, int i_src )
1398 {
1399     int y, x;
1400
1401     /* Interlaced */
1402     for( y = 0; y < 8; y += 2 )
1403     {
1404         memcpy( dst, src, 8 );
1405         dst += i_dst;
1406
1407         for( x = 0; x < 8; x++ )
1408         {
1409             uint8_t *src2 = &src[2*i_src];
1410             int32_t c0, c1, c2;
1411
1412             movq_m2r( src[x-2], mm0 );
1413             movq_m2r( src[x-3], mm1 );
1414             movq_m2r( src[x-4], mm2 );
1415
1416             psadbw_m2r( src2[x-4], mm0 );
1417             psadbw_m2r( src2[x-3], mm1 );
1418             psadbw_m2r( src2[x-2], mm2 );
1419
1420             movd_r2m( mm0, c2 );
1421             movd_r2m( mm1, c1 );
1422             movd_r2m( mm2, c0 );
1423
1424             if( c0 < c1 && c1 <= c2 )
1425                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1426             else if( c2 < c1 && c1 <= c0 )
1427                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1428             else
1429                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1430         }
1431
1432         dst += 1*i_dst;
1433         src += 2*i_src;
1434     }
1435 }
1436 #endif
1437
1438 /* NxN arbitray size (and then only use pixel in the NxN block)
1439  */
1440 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1441                                    int i_height, int i_width )
1442 {
1443     int y, x;
1444     int ff, fr;
1445     int fc;
1446
1447
1448     /* Detect interlacing */
1449     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1450     ff = fr = 0;
1451     fc = 0;
1452     for( y = 0; y < i_height - 2; y += 2 )
1453     {
1454         const uint8_t *s = &src[y*i_src];
1455         for( x = 0; x < i_width; x++ )
1456         {
1457             fr += ssd(s[      x] - s[1*i_src+x]);
1458             ff += ssd(s[      x] - s[2*i_src+x]);
1459         }
1460         if( ff < fr && fr > i_width / 2 )
1461             fc++;
1462     }
1463
1464     return fc < 2 ? false : true;
1465 }
1466
1467 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1468                                    uint8_t *src, int i_src,
1469                                    int i_width, int i_height )
1470 {
1471     int y, x;
1472
1473     /* Progressive */
1474     for( y = 0; y < i_height; y += 2 )
1475     {
1476         memcpy( dst, src, i_width );
1477         dst += i_dst;
1478
1479         if( y < i_height - 2 )
1480         {
1481             for( x = 0; x < i_width; x++ )
1482                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1483         }
1484         else
1485         {
1486             /* Blend last line */
1487             for( x = 0; x < i_width; x++ )
1488                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1489         }
1490         dst += 1*i_dst;
1491         src += 2*i_src;
1492     }
1493 }
1494
1495 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1496                                    uint8_t *src, int i_src,
1497                                    int i_width, int i_height )
1498 {
1499     int y, x;
1500
1501     /* Interlaced */
1502     for( y = 0; y < i_height; y += 2 )
1503     {
1504         memcpy( dst, src, i_width );
1505         dst += i_dst;
1506
1507         if( y < i_height - 2 )
1508         {
1509             for( x = 0; x < i_width; x++ )
1510                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1511         }
1512         else
1513         {
1514             /* Blend last line */
1515             for( x = 0; x < i_width; x++ )
1516                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1517         }
1518         dst += 1*i_dst;
1519         src += 2*i_src;
1520     }
1521 }
1522
1523 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1524                               int i_width, int i_height )
1525 {
1526     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1527         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1528     else
1529         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1530 }
1531
1532
1533 static inline int median( int a, int b, int c )
1534 {
1535     int min = a, max =a;
1536     if( b < min )
1537         min = b;
1538     else
1539         max = b;
1540
1541     if( c < min )
1542         min = c;
1543     else if( c > max )
1544         max = c;
1545
1546     return a + b + c - min - max;
1547 }
1548
1549
1550 /* XDeintBand8x8:
1551  */
1552 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1553                                    uint8_t *src, int i_src,
1554                                    const int i_mbx, int i_modx )
1555 {
1556     int x;
1557
1558     for( x = 0; x < i_mbx; x++ )
1559     {
1560         int s;
1561         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1562         {
1563             if( x == 0 || x == i_mbx - 1 )
1564                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1565             else
1566                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1567         }
1568         else
1569         {
1570             XDeint8x8MergeC( dst, i_dst,
1571                              &src[0*i_src], 2*i_src,
1572                              &src[1*i_src], 2*i_src );
1573         }
1574
1575         dst += 8;
1576         src += 8;
1577     }
1578
1579     if( i_modx )
1580         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1581 }
1582 #ifdef CAN_COMPILE_MMXEXT
1583 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1584                                         uint8_t *src, int i_src,
1585                                         const int i_mbx, int i_modx )
1586 {
1587     int x;
1588
1589     /* Reset current line */
1590     for( x = 0; x < i_mbx; x++ )
1591     {
1592         int s;
1593         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1594         {
1595             if( x == 0 || x == i_mbx - 1 )
1596                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1597             else
1598                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1599         }
1600         else
1601         {
1602             XDeint8x8MergeMMXEXT( dst, i_dst,
1603                                   &src[0*i_src], 2*i_src,
1604                                   &src[1*i_src], 2*i_src );
1605         }
1606
1607         dst += 8;
1608         src += 8;
1609     }
1610
1611     if( i_modx )
1612         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1613 }
1614 #endif
1615
1616 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1617 {
1618     int i_plane;
1619
1620     /* Copy image and skip lines */
1621     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1622     {
1623         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1624         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1625
1626         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1627         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1628
1629         const int i_dst = p_outpic->p[i_plane].i_pitch;
1630         const int i_src = p_pic->p[i_plane].i_pitch;
1631
1632         int y, x;
1633
1634         for( y = 0; y < i_mby; y++ )
1635         {
1636             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1637             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1638
1639 #ifdef CAN_COMPILE_MMXEXT
1640             if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1641                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1642             else
1643 #endif
1644                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1645         }
1646
1647         /* Last line (C only)*/
1648         if( i_mody )
1649         {
1650             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1651             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1652
1653             for( x = 0; x < i_mbx; x++ )
1654             {
1655                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1656
1657                 dst += 8;
1658                 src += 8;
1659             }
1660
1661             if( i_modx )
1662                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1663         }
1664     }
1665
1666 #ifdef CAN_COMPILE_MMXEXT
1667     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1668         emms();
1669 #endif
1670 }
1671
1672 /*****************************************************************************
1673  * SendEvents: forward mouse and keyboard events to the parent p_vout
1674  *****************************************************************************/
1675 static int SendEvents( vlc_object_t *p_this, char const *psz_var,
1676                        vlc_value_t oldval, vlc_value_t newval, void *_p_vout )
1677 {
1678     VLC_UNUSED(p_this); VLC_UNUSED(oldval);
1679     vout_thread_t *p_vout = (vout_thread_t *)_p_vout;
1680     vlc_value_t sentval = newval;
1681
1682     if( !strcmp( psz_var, "mouse-y" ) && p_vout->p_sys->b_half_height )
1683         sentval.i_int *= 2;
1684
1685     var_Set( p_vout, psz_var, sentval );
1686
1687     return VLC_SUCCESS;
1688 }
1689
1690 /*****************************************************************************
1691  * FilterCallback: called when changing the deinterlace method on the fly.
1692  *****************************************************************************/
1693 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1694                            vlc_value_t oldval, vlc_value_t newval,
1695                            void *p_data )
1696 {
1697     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1698     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1699
1700     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1701
1702     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
1703     const bool b_old_half_height = p_vout->p_sys->b_half_height;
1704
1705     SetFilterMethod( p_vout, newval.psz_string );
1706
1707     if( !b_old_half_height == !p_vout->p_sys->b_half_height )
1708     {
1709         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1710         return VLC_SUCCESS;
1711     }
1712
1713     /* We need to kill the old vout */
1714     if( p_vout->p_sys->p_vout )
1715     {
1716         DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
1717         vout_CloseAndRelease( p_vout->p_sys->p_vout );
1718     }
1719
1720     /* Try to open a new video output */
1721     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
1722
1723     if( p_vout->p_sys->p_vout == NULL )
1724     {
1725         /* Everything failed */
1726         msg_Err( p_vout, "cannot open vout, aborting" );
1727
1728         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1729         return VLC_EGENERIC;
1730     }
1731
1732     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
1733
1734     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1735     return VLC_SUCCESS;
1736 }
1737
1738 /*****************************************************************************
1739  * SendEventsToChild: forward events to the child/children vout
1740  *****************************************************************************/
1741 static int SendEventsToChild( vlc_object_t *p_this, char const *psz_var,
1742                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
1743 {
1744     VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1745     vout_thread_t *p_vout = (vout_thread_t *)p_this;
1746     var_Set( p_vout->p_sys->p_vout, psz_var, newval );
1747     return VLC_SUCCESS;
1748 }
1749
1750
1751 /*****************************************************************************
1752  * video filter2 functions
1753  *****************************************************************************/
1754 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1755 {
1756     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1757     picture_t *p_pic_dst;
1758
1759     /* Request output picture */
1760     p_pic_dst = filter_NewPicture( p_filter );
1761     if( p_pic_dst == NULL )
1762     {
1763         picture_Release( p_pic );
1764         return NULL;
1765     }
1766
1767     switch( p_vout->p_sys->i_mode )
1768     {
1769         case DEINTERLACE_DISCARD:
1770             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1771             break;
1772
1773         case DEINTERLACE_BOB:
1774 #if 0
1775             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1776             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1777             break;
1778 #endif
1779
1780         case DEINTERLACE_LINEAR:
1781 #if 0
1782             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1783             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
1784 #endif
1785             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
1786             picture_Release( p_pic_dst );
1787             picture_Release( p_pic );
1788             return NULL;
1789
1790         case DEINTERLACE_MEAN:
1791             RenderMean( p_vout, p_pic_dst, p_pic );
1792             break;
1793
1794         case DEINTERLACE_BLEND:
1795             RenderBlend( p_vout, p_pic_dst, p_pic );
1796             break;
1797
1798         case DEINTERLACE_X:
1799             RenderX( p_pic_dst, p_pic );
1800             break;
1801     }
1802
1803     picture_CopyProperties( p_pic_dst, p_pic );
1804     p_pic_dst->b_progressive = true;
1805
1806     picture_Release( p_pic );
1807     return p_pic_dst;
1808 }
1809
1810 /*****************************************************************************
1811  * OpenFilter:
1812  *****************************************************************************/
1813 static int OpenFilter( vlc_object_t *p_this )
1814 {
1815     filter_t *p_filter = (filter_t*)p_this;
1816     vout_thread_t *p_vout;
1817     vlc_value_t val;
1818
1819     if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
1820         return VLC_EGENERIC;
1821
1822     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
1823      * by spu filters */
1824     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
1825     vlc_object_attach( p_vout, p_filter );
1826     p_filter->p_sys = (filter_sys_t *)p_vout;
1827     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
1828
1829     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
1830                    p_filter->p_cfg );
1831     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
1832
1833     var_Create( p_filter, "deinterlace-mode", VLC_VAR_STRING );
1834     var_Set( p_filter, "deinterlace-mode", val );
1835     free( val.psz_string );
1836
1837     if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
1838     {
1839         vlc_object_detach( p_vout );
1840         vlc_object_release( p_vout );
1841         return VLC_EGENERIC;
1842     }
1843
1844     video_format_t fmt;
1845     GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
1846     if( !p_filter->b_allow_fmt_out_change &&
1847         ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
1848           fmt.i_height != p_filter->fmt_in.video.i_height ) )
1849     {
1850         CloseFilter( VLC_OBJECT(p_filter) );
1851         return VLC_EGENERIC;
1852     }
1853     p_filter->fmt_out.video = fmt;
1854     p_filter->fmt_out.i_codec = fmt.i_chroma;
1855     p_filter->pf_video_filter = Deinterlace;
1856
1857     msg_Dbg( p_filter, "deinterlacing" );
1858
1859     return VLC_SUCCESS;
1860 }
1861
1862 /*****************************************************************************
1863  * CloseFilter: clean up the filter
1864  *****************************************************************************/
1865 static void CloseFilter( vlc_object_t *p_this )
1866 {
1867     filter_t *p_filter = (filter_t*)p_this;
1868     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1869
1870     Destroy( VLC_OBJECT(p_vout) );
1871     vlc_object_detach( p_vout );
1872     vlc_object_release( p_vout );
1873 }
1874