]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c
9ebbf51e81ba666cdc757ba889699dc5cff1da60
[vlc] / modules / video_filter / deinterlace.c
1 /*****************************************************************************
2  * deinterlace.c : deinterlacer plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001, 2002, 2003 the VideoLAN team
5  * $Id$
6  *
7  * Author: Sam Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include <errno.h>
28
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
32
33 #ifdef HAVE_ALTIVEC_H
34 #   include <altivec.h>
35 #endif
36
37 #include <vlc_common.h>
38 #include <vlc_plugin.h>
39 #include <vlc_vout.h>
40 #include <vlc_sout.h>
41 #include "vlc_filter.h"
42
43 #ifdef CAN_COMPILE_MMXEXT
44 #   include "mmx.h"
45 #endif
46
47 #include "filter_common.h"
48
49 #define DEINTERLACE_DISCARD 1
50 #define DEINTERLACE_MEAN    2
51 #define DEINTERLACE_BLEND   3
52 #define DEINTERLACE_BOB     4
53 #define DEINTERLACE_LINEAR  5
54 #define DEINTERLACE_X       6
55
56 /*****************************************************************************
57  * Local protypes
58  *****************************************************************************/
59 static int  Create    ( vlc_object_t * );
60 static void Destroy   ( vlc_object_t * );
61
62 static int  Init      ( vout_thread_t * );
63 static void End       ( vout_thread_t * );
64 static void Render    ( vout_thread_t *, picture_t * );
65
66 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
67 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
68 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
69 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
70 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
71 static void RenderX      ( picture_t *, picture_t * );
72
73 static void MergeGeneric ( void *, const void *, const void *, size_t );
74 #if defined(CAN_COMPILE_C_ALTIVEC)
75 static void MergeAltivec ( void *, const void *, const void *, size_t );
76 #endif
77 #if defined(CAN_COMPILE_MMXEXT)
78 static void MergeMMXEXT  ( void *, const void *, const void *, size_t );
79 #endif
80 #if defined(CAN_COMPILE_3DNOW)
81 static void Merge3DNow   ( void *, const void *, const void *, size_t );
82 #endif
83 #if defined(CAN_COMPILE_SSE)
84 static void MergeSSE2    ( void *, const void *, const void *, size_t );
85 #endif
86 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
87 static void EndMMX       ( void );
88 #endif
89 #if defined(CAN_COMPILE_3DNOW)
90 static void End3DNow     ( void );
91 #endif
92
93 static int  SendEvents   ( vlc_object_t *, char const *,
94                            vlc_value_t, vlc_value_t, void * );
95
96 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method );
97 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
98
99 static int OpenFilter( vlc_object_t *p_this );
100 static void CloseFilter( vlc_object_t *p_this );
101
102 /*****************************************************************************
103  * Callback prototypes
104  *****************************************************************************/
105 static int FilterCallback ( vlc_object_t *, char const *,
106                             vlc_value_t, vlc_value_t, void * );
107
108 /*****************************************************************************
109  * Module descriptor
110  *****************************************************************************/
111 #define MODE_TEXT N_("Deinterlace mode")
112 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
113
114 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
115 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
116
117 #define FILTER_CFG_PREFIX "sout-deinterlace-"
118
119 static const char *const mode_list[] = {
120     "discard", "blend", "mean", "bob", "linear", "x" };
121 static const char *const mode_list_text[] = {
122     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X" };
123
124 vlc_module_begin();
125     set_description( N_("Deinterlacing video filter") );
126     set_shortname( N_("Deinterlace" ));
127     set_capability( "video filter", 0 );
128     set_category( CAT_VIDEO );
129     set_subcategory( SUBCAT_VIDEO_VFILTER );
130
131     set_section( N_("Display"),NULL);
132     add_string( "deinterlace-mode", "discard", NULL, MODE_TEXT,
133                 MODE_LONGTEXT, false );
134         change_string_list( mode_list, mode_list_text, 0 );
135
136     add_shortcut( "deinterlace" );
137     set_callbacks( Create, Destroy );
138
139     add_submodule();
140     set_capability( "video filter2", 0 );
141     set_section( N_("Streaming"),NULL);
142     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
143                 SOUT_MODE_LONGTEXT, false );
144         change_string_list( mode_list, mode_list_text, 0 );
145     set_callbacks( OpenFilter, CloseFilter );
146 vlc_module_end();
147
148 static const char *const ppsz_filter_options[] = {
149     "mode", NULL
150 };
151
152 /*****************************************************************************
153  * vout_sys_t: Deinterlace video output method descriptor
154  *****************************************************************************
155  * This structure is part of the video output thread descriptor.
156  * It describes the Deinterlace specific properties of an output thread.
157  *****************************************************************************/
158 struct vout_sys_t
159 {
160     int        i_mode;        /* Deinterlace mode */
161     bool b_double_rate; /* Shall we double the framerate? */
162
163     mtime_t    last_date;
164     mtime_t    next_date;
165
166     vout_thread_t *p_vout;
167
168     vlc_mutex_t filter_lock;
169
170     void (*pf_merge) ( void *, const void *, const void *, size_t );
171     void (*pf_end_merge) ( void );
172 };
173
174 /*****************************************************************************
175  * Control: control facility for the vout (forwards to child vout)
176  *****************************************************************************/
177 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
178 {
179     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
180 }
181
182 /*****************************************************************************
183  * Create: allocates Deinterlace video thread output method
184  *****************************************************************************
185  * This function allocates and initializes a Deinterlace vout method.
186  *****************************************************************************/
187 static int Create( vlc_object_t *p_this )
188 {
189     vout_thread_t *p_vout = (vout_thread_t *)p_this;
190     vlc_value_t val;
191
192     /* Allocate structure */
193     p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
194     if( p_vout->p_sys == NULL )
195     {
196         msg_Err( p_vout, "out of memory" );
197         return VLC_ENOMEM;
198     }
199
200     p_vout->pf_init = Init;
201     p_vout->pf_end = End;
202     p_vout->pf_manage = NULL;
203     p_vout->pf_render = Render;
204     p_vout->pf_display = NULL;
205     p_vout->pf_control = Control;
206
207     p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
208     p_vout->p_sys->b_double_rate = false;
209     p_vout->p_sys->last_date = 0;
210     p_vout->p_sys->p_vout = 0;
211     vlc_mutex_init( &p_vout->p_sys->filter_lock );
212
213 #if defined(CAN_COMPILE_C_ALTIVEC)
214     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
215     {
216         p_vout->p_sys->pf_merge = MergeAltivec;
217         p_vout->p_sys->pf_end_merge = NULL;
218     }
219     else
220 #endif
221 #if defined(CAN_COMPILE_SSE)
222     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
223     {
224         p_vout->p_sys->pf_merge = MergeSSE2;
225         p_vout->p_sys->pf_end_merge = EndMMX;
226     }
227     else
228 #endif
229 #if defined(CAN_COMPILE_MMXEXT)
230     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
231     {
232         p_vout->p_sys->pf_merge = MergeMMXEXT;
233         p_vout->p_sys->pf_end_merge = EndMMX;
234     }
235     else
236 #endif
237 #if defined(CAN_COMPILE_3DNOW)
238     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
239     {
240         p_vout->p_sys->pf_merge = Merge3DNow;
241         p_vout->p_sys->pf_end_merge = End3DNow;
242     }
243     else
244 #endif
245     {
246         p_vout->p_sys->pf_merge = MergeGeneric;
247         p_vout->p_sys->pf_end_merge = NULL;
248     }
249
250     /* Look what method was requested */
251     var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING );
252     var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL );
253
254     if( val.psz_string == NULL )
255     {
256         msg_Err( p_vout, "configuration variable deinterlace-mode empty" );
257         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
258
259         val.psz_string = strdup( "discard" );
260     }
261
262     msg_Dbg( p_vout, "using %s deinterlace mode", val.psz_string );
263
264     SetFilterMethod( p_vout, val.psz_string );
265
266     free( val.psz_string );
267
268     return VLC_SUCCESS;
269 }
270
271 /*****************************************************************************
272  * SetFilterMethod: setup the deinterlace method to use.
273  *****************************************************************************/
274 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
275 {
276     if( !strcmp( psz_method, "discard" ) )
277     {
278         p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
279         p_vout->p_sys->b_double_rate = false;
280     }
281     else if( !strcmp( psz_method, "mean" ) )
282     {
283         p_vout->p_sys->i_mode = DEINTERLACE_MEAN;
284         p_vout->p_sys->b_double_rate = false;
285     }
286     else if( !strcmp( psz_method, "blend" )
287              || !strcmp( psz_method, "average" )
288              || !strcmp( psz_method, "combine-fields" ) )
289     {
290         p_vout->p_sys->i_mode = DEINTERLACE_BLEND;
291         p_vout->p_sys->b_double_rate = false;
292     }
293     else if( !strcmp( psz_method, "bob" )
294              || !strcmp( psz_method, "progressive-scan" ) )
295     {
296         p_vout->p_sys->i_mode = DEINTERLACE_BOB;
297         p_vout->p_sys->b_double_rate = true;
298     }
299     else if( !strcmp( psz_method, "linear" ) )
300     {
301         p_vout->p_sys->i_mode = DEINTERLACE_LINEAR;
302         p_vout->p_sys->b_double_rate = true;
303     }
304     else if( !strcmp( psz_method, "x" ) )
305     {
306         p_vout->p_sys->i_mode = DEINTERLACE_X;
307         p_vout->p_sys->b_double_rate = false;
308     }
309     else
310     {
311         msg_Err( p_vout, "no valid deinterlace mode provided, "
312                  "using \"discard\"" );
313     }
314
315     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
316 }
317
318 /*****************************************************************************
319  * Init: initialize Deinterlace video thread output method
320  *****************************************************************************/
321 static int Init( vout_thread_t *p_vout )
322 {
323     int i_index;
324     picture_t *p_pic;
325
326     I_OUTPUTPICTURES = 0;
327
328     /* Initialize the output structure, full of directbuffers since we want
329      * the decoder to output directly to our structures. */
330     switch( p_vout->render.i_chroma )
331     {
332         case VLC_FOURCC('I','4','2','0'):
333         case VLC_FOURCC('I','Y','U','V'):
334         case VLC_FOURCC('Y','V','1','2'):
335         case VLC_FOURCC('I','4','2','2'):
336             p_vout->output.i_chroma = p_vout->render.i_chroma;
337             p_vout->output.i_width  = p_vout->render.i_width;
338             p_vout->output.i_height = p_vout->render.i_height;
339             p_vout->output.i_aspect = p_vout->render.i_aspect;
340             p_vout->fmt_out = p_vout->fmt_in;
341             break;
342
343         default:
344             return VLC_EGENERIC; /* unknown chroma */
345             break;
346     }
347
348     /* Try to open the real video output */
349     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
350
351     if( p_vout->p_sys->p_vout == NULL )
352     {
353         /* Everything failed */
354         msg_Err( p_vout, "cannot open vout, aborting" );
355
356         return VLC_EGENERIC;
357     }
358
359     var_AddCallback( p_vout, "deinterlace-mode", FilterCallback, NULL );
360
361     ALLOCATE_DIRECTBUFFERS( VOUT_MAX_PICTURES );
362
363     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
364
365     ADD_PARENT_CALLBACKS( SendEventsToChild );
366
367     return VLC_SUCCESS;
368 }
369
370 /*****************************************************************************
371  * SpawnRealVout: spawn the real video output.
372  *****************************************************************************/
373 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
374 {
375     vout_thread_t *p_real_vout = NULL;
376     video_format_t fmt;
377     memset( &fmt, 0, sizeof( video_format_t ) );
378
379     msg_Dbg( p_vout, "spawning the real video output" );
380
381     fmt = p_vout->fmt_out;
382
383     switch( p_vout->render.i_chroma )
384     {
385     case VLC_FOURCC('I','4','2','0'):
386     case VLC_FOURCC('I','Y','U','V'):
387     case VLC_FOURCC('Y','V','1','2'):
388         switch( p_vout->p_sys->i_mode )
389         {
390         case DEINTERLACE_MEAN:
391         case DEINTERLACE_DISCARD:
392             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
393             fmt.i_sar_den *= 2;
394             p_real_vout = vout_Create( p_vout, &fmt );
395             break;
396
397         case DEINTERLACE_BOB:
398         case DEINTERLACE_BLEND:
399         case DEINTERLACE_LINEAR:
400         case DEINTERLACE_X:
401             p_real_vout = vout_Create( p_vout, &fmt );
402             break;
403         }
404         break;
405
406     case VLC_FOURCC('I','4','2','2'):
407         fmt.i_chroma = VLC_FOURCC('I','4','2','0');
408         p_real_vout = vout_Create( p_vout, &fmt );
409         break;
410
411     default:
412         break;
413     }
414
415     return p_real_vout;
416 }
417
418 /*****************************************************************************
419  * End: terminate Deinterlace video thread output method
420  *****************************************************************************/
421 static void End( vout_thread_t *p_vout )
422 {
423     int i_index;
424
425     /* Free the fake output buffers we allocated */
426     for( i_index = I_OUTPUTPICTURES ; i_index ; )
427     {
428         i_index--;
429         free( PP_OUTPUTPICTURE[ i_index ]->p_data_orig );
430     }
431
432     if( p_vout->p_sys->p_vout )
433     {
434         DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
435         vlc_object_detach( p_vout->p_sys->p_vout );
436         vlc_object_release( p_vout->p_sys->p_vout );
437     }
438
439     DEL_PARENT_CALLBACKS( SendEventsToChild );
440 }
441
442 /*****************************************************************************
443  * Destroy: destroy Deinterlace video thread output method
444  *****************************************************************************
445  * Terminate an output method created by DeinterlaceCreateOutputMethod
446  *****************************************************************************/
447 static void Destroy( vlc_object_t *p_this )
448 {
449     vout_thread_t *p_vout = (vout_thread_t *)p_this;
450     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
451     free( p_vout->p_sys );
452 }
453
454 /*****************************************************************************
455  * Render: displays previously rendered output
456  *****************************************************************************
457  * This function send the currently rendered image to Deinterlace image,
458  * waits until it is displayed and switch the two rendering buffers, preparing
459  * next frame.
460  *****************************************************************************/
461 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
462 {
463     vout_sys_t *p_sys = p_vout->p_sys;
464     picture_t *pp_outpic[2];
465
466     p_vout->fmt_out.i_x_offset = p_sys->p_vout->fmt_in.i_x_offset =
467         p_vout->fmt_in.i_x_offset;
468     p_vout->fmt_out.i_y_offset = p_sys->p_vout->fmt_in.i_y_offset =
469         p_vout->fmt_in.i_y_offset;
470     p_vout->fmt_out.i_visible_width = p_sys->p_vout->fmt_in.i_visible_width =
471         p_vout->fmt_in.i_visible_width;
472     p_vout->fmt_out.i_visible_height = p_sys->p_vout->fmt_in.i_visible_height =
473         p_vout->fmt_in.i_visible_height;
474     if( p_vout->p_sys->i_mode == DEINTERLACE_MEAN ||
475         p_vout->p_sys->i_mode == DEINTERLACE_DISCARD )
476     {
477         p_vout->fmt_out.i_y_offset /= 2; p_sys->p_vout->fmt_in.i_y_offset /= 2;
478         p_vout->fmt_out.i_visible_height /= 2;
479         p_sys->p_vout->fmt_in.i_visible_height /= 2;
480     }
481  
482     pp_outpic[0] = pp_outpic[1] = NULL;
483
484     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
485
486     /* Get a new picture */
487     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
488                                                 0, 0, 0 ) )
489               == NULL )
490     {
491         if( p_vout->b_die || p_vout->b_error )
492         {
493             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
494             return;
495         }
496         msleep( VOUT_OUTMEM_SLEEP );
497     }
498
499     vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[0], p_pic->date );
500
501     /* If we are using double rate, get an additional new picture */
502     if( p_vout->p_sys->b_double_rate )
503     {
504         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
505                                                  0, 0, 0 ) )
506                   == NULL )
507         {
508             if( p_vout->b_die || p_vout->b_error )
509             {
510                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
511                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
512                 return;
513             }
514             msleep( VOUT_OUTMEM_SLEEP );
515         }
516
517         /* 20ms is a bit arbitrary, but it's only for the first image we get */
518         if( !p_vout->p_sys->last_date )
519         {
520             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
521                               p_pic->date + 20000 );
522         }
523         else
524         {
525             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
526                       (3 * p_pic->date - p_vout->p_sys->last_date) / 2 );
527         }
528         p_vout->p_sys->last_date = p_pic->date;
529     }
530
531     switch( p_vout->p_sys->i_mode )
532     {
533         case DEINTERLACE_DISCARD:
534             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
535             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
536             break;
537
538         case DEINTERLACE_BOB:
539             RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
540             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
541             RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
542             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
543             break;
544
545         case DEINTERLACE_LINEAR:
546             RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
547             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
548             RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
549             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
550             break;
551
552         case DEINTERLACE_MEAN:
553             RenderMean( p_vout, pp_outpic[0], p_pic );
554             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
555             break;
556
557         case DEINTERLACE_BLEND:
558             RenderBlend( p_vout, pp_outpic[0], p_pic );
559             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
560             break;
561
562         case DEINTERLACE_X:
563             RenderX( pp_outpic[0], p_pic );
564             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
565             break;
566     }
567     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
568 }
569
570 /*****************************************************************************
571  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
572  *****************************************************************************/
573 static void RenderDiscard( vout_thread_t *p_vout,
574                            picture_t *p_outpic, picture_t *p_pic, int i_field )
575 {
576     int i_plane;
577
578     /* Copy image and skip lines */
579     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
580     {
581         uint8_t *p_in, *p_out_end, *p_out;
582         int i_increment;
583
584         p_in = p_pic->p[i_plane].p_pixels
585                    + i_field * p_pic->p[i_plane].i_pitch;
586
587         p_out = p_outpic->p[i_plane].p_pixels;
588         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
589                              * p_outpic->p[i_plane].i_visible_lines;
590
591         switch( p_vout->render.i_chroma )
592         {
593         case VLC_FOURCC('I','4','2','0'):
594         case VLC_FOURCC('I','Y','U','V'):
595         case VLC_FOURCC('Y','V','1','2'):
596
597             for( ; p_out < p_out_end ; )
598             {
599                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
600
601                 p_out += p_outpic->p[i_plane].i_pitch;
602                 p_in += 2 * p_pic->p[i_plane].i_pitch;
603             }
604             break;
605
606         case VLC_FOURCC('I','4','2','2'):
607
608             i_increment = 2 * p_pic->p[i_plane].i_pitch;
609
610             if( i_plane == Y_PLANE )
611             {
612                 for( ; p_out < p_out_end ; )
613                 {
614                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
615                     p_out += p_outpic->p[i_plane].i_pitch;
616                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
617                     p_out += p_outpic->p[i_plane].i_pitch;
618                     p_in += i_increment;
619                 }
620             }
621             else
622             {
623                 for( ; p_out < p_out_end ; )
624                 {
625                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
626                     p_out += p_outpic->p[i_plane].i_pitch;
627                     p_in += i_increment;
628                 }
629             }
630             break;
631
632         default:
633             break;
634         }
635     }
636 }
637
638 /*****************************************************************************
639  * RenderBob: renders a BOB picture - simple copy
640  *****************************************************************************/
641 static void RenderBob( vout_thread_t *p_vout,
642                        picture_t *p_outpic, picture_t *p_pic, int i_field )
643 {
644     int i_plane;
645
646     /* Copy image and skip lines */
647     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
648     {
649         uint8_t *p_in, *p_out_end, *p_out;
650
651         p_in = p_pic->p[i_plane].p_pixels;
652         p_out = p_outpic->p[i_plane].p_pixels;
653         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
654                              * p_outpic->p[i_plane].i_visible_lines;
655
656         switch( p_vout->render.i_chroma )
657         {
658             case VLC_FOURCC('I','4','2','0'):
659             case VLC_FOURCC('I','Y','U','V'):
660             case VLC_FOURCC('Y','V','1','2'):
661                 /* For BOTTOM field we need to add the first line */
662                 if( i_field == 1 )
663                 {
664                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
665                     p_in += p_pic->p[i_plane].i_pitch;
666                     p_out += p_outpic->p[i_plane].i_pitch;
667                 }
668
669                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
670
671                 for( ; p_out < p_out_end ; )
672                 {
673                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
674
675                     p_out += p_outpic->p[i_plane].i_pitch;
676
677                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
678
679                     p_in += 2 * p_pic->p[i_plane].i_pitch;
680                     p_out += p_outpic->p[i_plane].i_pitch;
681                 }
682
683                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
684
685                 /* For TOP field we need to add the last line */
686                 if( i_field == 0 )
687                 {
688                     p_in += p_pic->p[i_plane].i_pitch;
689                     p_out += p_outpic->p[i_plane].i_pitch;
690                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
691                 }
692                 break;
693
694             case VLC_FOURCC('I','4','2','2'):
695                 /* For BOTTOM field we need to add the first line */
696                 if( i_field == 1 )
697                 {
698                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
699                     p_in += p_pic->p[i_plane].i_pitch;
700                     p_out += p_outpic->p[i_plane].i_pitch;
701                 }
702
703                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
704
705                 if( i_plane == Y_PLANE )
706                 {
707                     for( ; p_out < p_out_end ; )
708                     {
709                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
710
711                         p_out += p_outpic->p[i_plane].i_pitch;
712
713                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
714
715                         p_in += 2 * p_pic->p[i_plane].i_pitch;
716                         p_out += p_outpic->p[i_plane].i_pitch;
717                     }
718                 }
719                 else
720                 {
721                     for( ; p_out < p_out_end ; )
722                     {
723                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
724
725                         p_out += p_outpic->p[i_plane].i_pitch;
726                         p_in += 2 * p_pic->p[i_plane].i_pitch;
727                     }
728                 }
729
730                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
731
732                 /* For TOP field we need to add the last line */
733                 if( i_field == 0 )
734                 {
735                     p_in += p_pic->p[i_plane].i_pitch;
736                     p_out += p_outpic->p[i_plane].i_pitch;
737                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
738                 }
739                 break;
740         }
741     }
742 }
743
744 #define Merge p_vout->p_sys->pf_merge
745 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
746
747 /*****************************************************************************
748  * RenderLinear: BOB with linear interpolation
749  *****************************************************************************/
750 static void RenderLinear( vout_thread_t *p_vout,
751                           picture_t *p_outpic, picture_t *p_pic, int i_field )
752 {
753     int i_plane;
754
755     /* Copy image and skip lines */
756     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
757     {
758         uint8_t *p_in, *p_out_end, *p_out;
759
760         p_in = p_pic->p[i_plane].p_pixels;
761         p_out = p_outpic->p[i_plane].p_pixels;
762         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
763                              * p_outpic->p[i_plane].i_visible_lines;
764
765         /* For BOTTOM field we need to add the first line */
766         if( i_field == 1 )
767         {
768             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
769             p_in += p_pic->p[i_plane].i_pitch;
770             p_out += p_outpic->p[i_plane].i_pitch;
771         }
772
773         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
774
775         for( ; p_out < p_out_end ; )
776         {
777             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
778
779             p_out += p_outpic->p[i_plane].i_pitch;
780
781             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
782                    p_pic->p[i_plane].i_pitch );
783
784             p_in += 2 * p_pic->p[i_plane].i_pitch;
785             p_out += p_outpic->p[i_plane].i_pitch;
786         }
787
788         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
789
790         /* For TOP field we need to add the last line */
791         if( i_field == 0 )
792         {
793             p_in += p_pic->p[i_plane].i_pitch;
794             p_out += p_outpic->p[i_plane].i_pitch;
795             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
796         }
797     }
798     EndMerge();
799 }
800
801 static void RenderMean( vout_thread_t *p_vout,
802                         picture_t *p_outpic, picture_t *p_pic )
803 {
804     int i_plane;
805
806     /* Copy image and skip lines */
807     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
808     {
809         uint8_t *p_in, *p_out_end, *p_out;
810
811         p_in = p_pic->p[i_plane].p_pixels;
812
813         p_out = p_outpic->p[i_plane].p_pixels;
814         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
815                              * p_outpic->p[i_plane].i_visible_lines;
816
817         /* All lines: mean value */
818         for( ; p_out < p_out_end ; )
819         {
820             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
821                    p_pic->p[i_plane].i_pitch );
822
823             p_out += p_outpic->p[i_plane].i_pitch;
824             p_in += 2 * p_pic->p[i_plane].i_pitch;
825         }
826     }
827     EndMerge();
828 }
829
830 static void RenderBlend( vout_thread_t *p_vout,
831                          picture_t *p_outpic, picture_t *p_pic )
832 {
833     int i_plane;
834
835     /* Copy image and skip lines */
836     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
837     {
838         uint8_t *p_in, *p_out_end, *p_out;
839
840         p_in = p_pic->p[i_plane].p_pixels;
841
842         p_out = p_outpic->p[i_plane].p_pixels;
843         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
844                              * p_outpic->p[i_plane].i_visible_lines;
845
846         switch( p_vout->render.i_chroma )
847         {
848             case VLC_FOURCC('I','4','2','0'):
849             case VLC_FOURCC('I','Y','U','V'):
850             case VLC_FOURCC('Y','V','1','2'):
851                 /* First line: simple copy */
852                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
853                 p_out += p_outpic->p[i_plane].i_pitch;
854
855                 /* Remaining lines: mean value */
856                 for( ; p_out < p_out_end ; )
857                 {
858                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
859                            p_pic->p[i_plane].i_pitch );
860
861                     p_out += p_outpic->p[i_plane].i_pitch;
862                     p_in += p_pic->p[i_plane].i_pitch;
863                 }
864                 break;
865
866             case VLC_FOURCC('I','4','2','2'):
867                 /* First line: simple copy */
868                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
869                 p_out += p_outpic->p[i_plane].i_pitch;
870
871                 /* Remaining lines: mean value */
872                 if( i_plane == Y_PLANE )
873                 {
874                     for( ; p_out < p_out_end ; )
875                     {
876                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
877                                p_pic->p[i_plane].i_pitch );
878
879                         p_out += p_outpic->p[i_plane].i_pitch;
880                         p_in += p_pic->p[i_plane].i_pitch;
881                     }
882                 }
883
884                 else
885                 {
886                     for( ; p_out < p_out_end ; )
887                     {
888                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
889                                p_pic->p[i_plane].i_pitch );
890
891                         p_out += p_outpic->p[i_plane].i_pitch;
892                         p_in += 2*p_pic->p[i_plane].i_pitch;
893                     }
894                 }
895                 break;
896         }
897     }
898     EndMerge();
899 }
900
901 #undef Merge
902
903 static void MergeGeneric( void *_p_dest, const void *_p_s1,
904                           const void *_p_s2, size_t i_bytes )
905 {
906     uint8_t* p_dest = (uint8_t*)_p_dest;
907     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
908     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
909     uint8_t* p_end = p_dest + i_bytes - 8;
910
911     while( p_dest < p_end )
912     {
913         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
914         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
915         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
916         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
917         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
918         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
919         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
920         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
921     }
922
923     p_end += 8;
924
925     while( p_dest < p_end )
926     {
927         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
928     }
929 }
930
931 #if defined(CAN_COMPILE_MMXEXT)
932 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
933                          size_t i_bytes )
934 {
935     uint8_t* p_dest = (uint8_t*)_p_dest;
936     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
937     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
938     uint8_t* p_end = p_dest + i_bytes - 8;
939     while( p_dest < p_end )
940     {
941         __asm__  __volatile__( "movq %2,%%mm1;"
942                                "pavgb %1, %%mm1;"
943                                "movq %%mm1, %0" :"=m" (*p_dest):
944                                                  "m" (*p_s1),
945                                                  "m" (*p_s2) );
946         p_dest += 8;
947         p_s1 += 8;
948         p_s2 += 8;
949     }
950
951     p_end += 8;
952
953     while( p_dest < p_end )
954     {
955         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
956     }
957 }
958 #endif
959
960 #if defined(CAN_COMPILE_3DNOW)
961 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
962                         size_t i_bytes )
963 {
964     uint8_t* p_dest = (uint8_t*)_p_dest;
965     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
966     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
967     uint8_t* p_end = p_dest + i_bytes - 8;
968     while( p_dest < p_end )
969     {
970         __asm__  __volatile__( "movq %2,%%mm1;"
971                                "pavgusb %1, %%mm1;"
972                                "movq %%mm1, %0" :"=m" (*p_dest):
973                                                  "m" (*p_s1),
974                                                  "m" (*p_s2) );
975         p_dest += 8;
976         p_s1 += 8;
977         p_s2 += 8;
978     }
979
980     p_end += 8;
981
982     while( p_dest < p_end )
983     {
984         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
985     }
986 }
987 #endif
988
989 #if defined(CAN_COMPILE_SSE)
990 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
991                        size_t i_bytes )
992 {
993     uint8_t* p_dest = (uint8_t*)_p_dest;
994     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
995     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
996     uint8_t* p_end;
997     while( (uintptr_t)p_s1 % 16 )
998     {
999         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1000     }
1001     p_end = p_dest + i_bytes - 16;
1002     while( p_dest < p_end )
1003     {
1004         __asm__  __volatile__( "movdqu %2,%%xmm1;"
1005                                "pavgb %1, %%xmm1;"
1006                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
1007                                                  "m" (*p_s1),
1008                                                  "m" (*p_s2) );
1009         p_dest += 16;
1010         p_s1 += 16;
1011         p_s2 += 16;
1012     }
1013
1014     p_end += 16;
1015
1016     while( p_dest < p_end )
1017     {
1018         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1019     }
1020 }
1021 #endif
1022
1023 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1024 static void EndMMX( void )
1025 {
1026     __asm__ __volatile__( "emms" :: );
1027 }
1028 #endif
1029
1030 #if defined(CAN_COMPILE_3DNOW)
1031 static void End3DNow( void )
1032 {
1033     __asm__ __volatile__( "femms" :: );
1034 }
1035 #endif
1036
1037 #ifdef CAN_COMPILE_C_ALTIVEC
1038 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1039                           const void *_p_s2, size_t i_bytes )
1040 {
1041     uint8_t *p_dest = (uint8_t *)_p_dest;
1042     uint8_t *p_s1   = (uint8_t *)_p_s1;
1043     uint8_t *p_s2   = (uint8_t *)_p_s2;
1044     uint8_t *p_end  = p_dest + i_bytes - 15;
1045
1046     /* Use C until the first 16-bytes aligned destination pixel */
1047     while( (int)p_dest & 0xF )
1048     {
1049         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1050     }
1051
1052     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1053     {
1054         /* Unaligned source */
1055         vector unsigned char s1v, s2v, destv;
1056         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1057         vector unsigned char perm1v, perm2v;
1058
1059         perm1v = vec_lvsl( 0, p_s1 );
1060         perm2v = vec_lvsl( 0, p_s2 );
1061         s1oldv = vec_ld( 0, p_s1 );
1062         s2oldv = vec_ld( 0, p_s2 );
1063
1064         while( p_dest < p_end )
1065         {
1066             s1newv = vec_ld( 16, p_s1 );
1067             s2newv = vec_ld( 16, p_s2 );
1068             s1v    = vec_perm( s1oldv, s1newv, perm1v );
1069             s2v    = vec_perm( s2oldv, s2newv, perm2v );
1070             s1oldv = s1newv;
1071             s2oldv = s2newv;
1072             destv  = vec_avg( s1v, s2v );
1073             vec_st( destv, 0, p_dest );
1074
1075             p_s1   += 16;
1076             p_s2   += 16;
1077             p_dest += 16;
1078         }
1079     }
1080     else
1081     {
1082         /* Aligned source */
1083         vector unsigned char s1v, s2v, destv;
1084
1085         while( p_dest < p_end )
1086         {
1087             s1v   = vec_ld( 0, p_s1 );
1088             s2v   = vec_ld( 0, p_s2 );
1089             destv = vec_avg( s1v, s2v );
1090             vec_st( destv, 0, p_dest );
1091
1092             p_s1   += 16;
1093             p_s2   += 16;
1094             p_dest += 16;
1095         }
1096     }
1097
1098     p_end += 15;
1099
1100     while( p_dest < p_end )
1101     {
1102         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1103     }
1104 }
1105 #endif
1106
1107 /*****************************************************************************
1108  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1109  * and apply a process to recreate the bottom field :
1110  *  If a 8x8 block is classified as :
1111  *   - progressive: it applies a small blend (1,6,1)
1112  *   - interlaced:
1113  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1114  *    good match we use MC to recreate the bottom field (with a small
1115  *    blend (1,6,1) )
1116  *    * otherwise: it recreates the bottom field by an edge oriented
1117  *    interpolation.
1118   *****************************************************************************/
1119
1120 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1121  * XXX: It need to access to 8x10
1122  * We use more than 8 lines to help with scrolling (text)
1123  * (and because XDeint8x8Frame use line 9)
1124  * XXX: smooth/uniform area with noise detection doesn't works well
1125  * but it's not really a problem because they don't have much details anyway
1126  */
1127 static inline int ssd( int a ) { return a*a; }
1128 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1129 {
1130     int y, x;
1131     int ff, fr;
1132     int fc;
1133
1134     /* Detect interlacing */
1135     fc = 0;
1136     for( y = 0; y < 7; y += 2 )
1137     {
1138         ff = fr = 0;
1139         for( x = 0; x < 8; x++ )
1140         {
1141             fr += ssd(src[      x] - src[1*i_src+x]) +
1142                   ssd(src[i_src+x] - src[2*i_src+x]);
1143             ff += ssd(src[      x] - src[2*i_src+x]) +
1144                   ssd(src[i_src+x] - src[3*i_src+x]);
1145         }
1146         if( ff < 6*fr/8 && fr > 32 )
1147             fc++;
1148
1149         src += 2*i_src;
1150     }
1151
1152     return fc < 1 ? false : true;
1153 }
1154 #ifdef CAN_COMPILE_MMXEXT
1155 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1156 {
1157
1158     int y, x;
1159     int32_t ff, fr;
1160     int fc;
1161
1162     /* Detect interlacing */
1163     fc = 0;
1164     pxor_r2r( mm7, mm7 );
1165     for( y = 0; y < 9; y += 2 )
1166     {
1167         ff = fr = 0;
1168         pxor_r2r( mm5, mm5 );
1169         pxor_r2r( mm6, mm6 );
1170         for( x = 0; x < 8; x+=4 )
1171         {
1172             movd_m2r( src[        x], mm0 );
1173             movd_m2r( src[1*i_src+x], mm1 );
1174             movd_m2r( src[2*i_src+x], mm2 );
1175             movd_m2r( src[3*i_src+x], mm3 );
1176
1177             punpcklbw_r2r( mm7, mm0 );
1178             punpcklbw_r2r( mm7, mm1 );
1179             punpcklbw_r2r( mm7, mm2 );
1180             punpcklbw_r2r( mm7, mm3 );
1181
1182             movq_r2r( mm0, mm4 );
1183
1184             psubw_r2r( mm1, mm0 );
1185             psubw_r2r( mm2, mm4 );
1186
1187             psubw_r2r( mm1, mm2 );
1188             psubw_r2r( mm1, mm3 );
1189
1190             pmaddwd_r2r( mm0, mm0 );
1191             pmaddwd_r2r( mm4, mm4 );
1192             pmaddwd_r2r( mm2, mm2 );
1193             pmaddwd_r2r( mm3, mm3 );
1194             paddd_r2r( mm0, mm2 );
1195             paddd_r2r( mm4, mm3 );
1196             paddd_r2r( mm2, mm5 );
1197             paddd_r2r( mm3, mm6 );
1198         }
1199
1200         movq_r2r( mm5, mm0 );
1201         psrlq_i2r( 32, mm0 );
1202         paddd_r2r( mm0, mm5 );
1203         movd_r2m( mm5, fr );
1204
1205         movq_r2r( mm6, mm0 );
1206         psrlq_i2r( 32, mm0 );
1207         paddd_r2r( mm0, mm6 );
1208         movd_r2m( mm6, ff );
1209
1210         if( ff < 6*fr/8 && fr > 32 )
1211             fc++;
1212
1213         src += 2*i_src;
1214     }
1215     return fc;
1216 }
1217 #endif
1218
1219 /* XDeint8x8Frame: apply a small blend between field (1,6,1).
1220  * This won't destroy details, and help if there is a bit of interlacing.
1221  * (It helps with paning to avoid flickers)
1222  * (Use 8x9 pixels)
1223  */
1224 #if 0
1225 static inline void XDeint8x8FrameC( uint8_t *dst, int i_dst,
1226                                     uint8_t *src, int i_src )
1227 {
1228     int y, x;
1229
1230     /* Progressive */
1231     for( y = 0; y < 8; y += 2 )
1232     {
1233         memcpy( dst, src, 8 );
1234         dst += i_dst;
1235
1236         for( x = 0; x < 8; x++ )
1237             dst[x] = (src[x] + 6*src[1*i_src+x] + src[2*i_src+x] + 4 ) >> 3;
1238         dst += 1*i_dst;
1239         src += 2*i_src;
1240     }
1241 }
1242 #endif
1243 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1244                                     uint8_t *src1, int i_src1,
1245                                     uint8_t *src2, int i_src2 )
1246 {
1247     int y, x;
1248
1249     /* Progressive */
1250     for( y = 0; y < 8; y += 2 )
1251     {
1252         memcpy( dst, src1, 8 );
1253         dst  += i_dst;
1254
1255         for( x = 0; x < 8; x++ )
1256             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1257         dst += i_dst;
1258
1259         src1 += i_src1;
1260         src2 += i_src2;
1261     }
1262 }
1263
1264 #ifdef CAN_COMPILE_MMXEXT
1265 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1266                                          uint8_t *src1, int i_src1,
1267                                          uint8_t *src2, int i_src2 )
1268 {
1269     static const uint64_t m_4 = INT64_C(0x0004000400040004);
1270     int y, x;
1271
1272     /* Progressive */
1273     pxor_r2r( mm7, mm7 );
1274     for( y = 0; y < 8; y += 2 )
1275     {
1276         for( x = 0; x < 8; x +=4 )
1277         {
1278             movd_m2r( src1[x], mm0 );
1279             movd_r2m( mm0, dst[x] );
1280
1281             movd_m2r( src2[x], mm1 );
1282             movd_m2r( src1[i_src1+x], mm2 );
1283
1284             punpcklbw_r2r( mm7, mm0 );
1285             punpcklbw_r2r( mm7, mm1 );
1286             punpcklbw_r2r( mm7, mm2 );
1287             paddw_r2r( mm1, mm1 );
1288             movq_r2r( mm1, mm3 );
1289             paddw_r2r( mm3, mm3 );
1290             paddw_r2r( mm2, mm0 );
1291             paddw_r2r( mm3, mm1 );
1292             paddw_m2r( m_4, mm1 );
1293             paddw_r2r( mm1, mm0 );
1294             psraw_i2r( 3, mm0 );
1295             packuswb_r2r( mm7, mm0 );
1296             movd_r2m( mm0, dst[i_dst+x] );
1297         }
1298         dst += 2*i_dst;
1299         src1 += i_src1;
1300         src2 += i_src2;
1301     }
1302 }
1303
1304 #endif
1305
1306 /* For debug */
1307 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1308 {
1309     int y;
1310     for( y = 0; y < 8; y++ )
1311         memset( &dst[y*i_dst], v, 8 );
1312 }
1313
1314 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1315  * neighbour
1316  * (Use 8x9 pixels)
1317  * TODO: a better one for the inner part.
1318  */
1319 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1320                                      uint8_t *src, int i_src )
1321 {
1322     int y, x;
1323
1324     /* Interlaced */
1325     for( y = 0; y < 8; y += 2 )
1326     {
1327         memcpy( dst, src, 8 );
1328         dst += i_dst;
1329
1330         for( x = 0; x < 8; x++ )
1331             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1332         dst += 1*i_dst;
1333         src += 2*i_src;
1334     }
1335 }
1336 #ifdef CAN_COMPILE_MMXEXT
1337 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1338                                           uint8_t *src, int i_src )
1339 {
1340     int y;
1341
1342     /* Interlaced */
1343     for( y = 0; y < 8; y += 2 )
1344     {
1345         movq_m2r( src[0], mm0 );
1346         movq_r2m( mm0, dst[0] );
1347         dst += i_dst;
1348
1349         movq_m2r( src[2*i_src], mm1 );
1350         pavgb_r2r( mm1, mm0 );
1351
1352         movq_r2m( mm0, dst[0] );
1353
1354         dst += 1*i_dst;
1355         src += 2*i_src;
1356     }
1357 }
1358 #endif
1359
1360 /* XDeint8x8Field: Edge oriented interpolation
1361  * (Need -4 and +5 pixels H, +1 line)
1362  */
1363 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1364                                     uint8_t *src, int i_src )
1365 {
1366     int y, x;
1367
1368     /* Interlaced */
1369     for( y = 0; y < 8; y += 2 )
1370     {
1371         memcpy( dst, src, 8 );
1372         dst += i_dst;
1373
1374         for( x = 0; x < 8; x++ )
1375         {
1376             uint8_t *src2 = &src[2*i_src];
1377             /* I use 8 pixels just to match the MMX version, but it's overkill
1378              * 5 would be enough (less isn't good) */
1379             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1380                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1381                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1382                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1383
1384             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1385                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1386                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1387                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1388
1389             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1390                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1391                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1392                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1393
1394             if( c0 < c1 && c1 <= c2 )
1395                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1396             else if( c2 < c1 && c1 <= c0 )
1397                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1398             else
1399                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1400         }
1401
1402         dst += 1*i_dst;
1403         src += 2*i_src;
1404     }
1405 }
1406 #ifdef CAN_COMPILE_MMXEXT
1407 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1408                                          uint8_t *src, int i_src )
1409 {
1410     int y, x;
1411
1412     /* Interlaced */
1413     for( y = 0; y < 8; y += 2 )
1414     {
1415         memcpy( dst, src, 8 );
1416         dst += i_dst;
1417
1418         for( x = 0; x < 8; x++ )
1419         {
1420             uint8_t *src2 = &src[2*i_src];
1421             int32_t c0, c1, c2;
1422
1423             movq_m2r( src[x-2], mm0 );
1424             movq_m2r( src[x-3], mm1 );
1425             movq_m2r( src[x-4], mm2 );
1426
1427             psadbw_m2r( src2[x-4], mm0 );
1428             psadbw_m2r( src2[x-3], mm1 );
1429             psadbw_m2r( src2[x-2], mm2 );
1430
1431             movd_r2m( mm0, c2 );
1432             movd_r2m( mm1, c1 );
1433             movd_r2m( mm2, c0 );
1434
1435             if( c0 < c1 && c1 <= c2 )
1436                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1437             else if( c2 < c1 && c1 <= c0 )
1438                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1439             else
1440                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1441         }
1442
1443         dst += 1*i_dst;
1444         src += 2*i_src;
1445     }
1446 }
1447 #endif
1448
1449 #if 0
1450 static inline int XDeint8x8SsdC( uint8_t *pix1, int i_pix1,
1451                                  uint8_t *pix2, int i_pix2 )
1452 {
1453     int y, x;
1454     int s = 0;
1455
1456     for( y = 0; y < 8; y++ )
1457         for( x = 0; x < 8; x++ )
1458             s += ssd( pix1[y*i_pix1+x] - pix2[y*i_pix2+x] );
1459     return s;
1460 }
1461
1462 #ifdef CAN_COMPILE_MMXEXT
1463 static inline int XDeint8x8SsdMMXEXT( uint8_t *pix1, int i_pix1,
1464                                       uint8_t *pix2, int i_pix2 )
1465 {
1466     int y;
1467     int32_t s;
1468
1469     pxor_r2r( mm7, mm7 );
1470     pxor_r2r( mm6, mm6 );
1471
1472     for( y = 0; y < 8; y++ )
1473     {
1474         movq_m2r( pix1[0], mm0 );
1475         movq_m2r( pix2[0], mm1 );
1476
1477         movq_r2r( mm0, mm2 );
1478         movq_r2r( mm1, mm3 );
1479
1480         punpcklbw_r2r( mm7, mm0 );
1481         punpckhbw_r2r( mm7, mm2 );
1482         punpcklbw_r2r( mm7, mm1 );
1483         punpckhbw_r2r( mm7, mm3 );
1484
1485         psubw_r2r( mm1, mm0 );
1486         psubw_r2r( mm3, mm2 );
1487
1488         pmaddwd_r2r( mm0, mm0 );
1489         pmaddwd_r2r( mm2, mm2 );
1490
1491         paddd_r2r( mm2, mm0 );
1492         paddd_r2r( mm0, mm6 );
1493
1494         pix1 += i_pix1;
1495         pix2 += i_pix2;
1496     }
1497
1498     movq_r2r( mm6, mm7 );
1499     psrlq_i2r( 32, mm7 );
1500     paddd_r2r( mm6, mm7 );
1501     movd_r2m( mm7, s );
1502
1503     return s;
1504 }
1505 #endif
1506 #endif
1507
1508 #if 0
1509 /* A little try with motion, but doesn't work better that pure intra (and slow) */
1510 #ifdef CAN_COMPILE_MMXEXT
1511 /* XDeintMC:
1512  *  Bilinear MC QPel
1513  *  TODO: mmx version (easier in sse2)
1514  */
1515 static inline void XDeintMC( uint8_t *dst, int i_dst,
1516                              uint8_t *src, int i_src,
1517                              int mvx, int mvy,
1518                              int i_width, int i_height )
1519 {
1520     const int d4x = mvx&0x03;
1521     const int d4y = mvy&0x03;
1522
1523     const int cA = (4-d4x)*(4-d4y);
1524     const int cB = d4x    *(4-d4y);
1525     const int cC = (4-d4x)*d4y;
1526     const int cD = d4x    *d4y;
1527
1528     int y, x;
1529     uint8_t *srcp;
1530
1531
1532     src  += (mvy >> 2) * i_src + (mvx >> 2);
1533     srcp = &src[i_src];
1534
1535     for( y = 0; y < i_height; y++ )
1536     {
1537         for( x = 0; x < i_width; x++ )
1538         {
1539             dst[x] = ( cA*src[x]  + cB*src[x+1] +
1540                        cC*srcp[x] + cD*srcp[x+1] + 8 ) >> 4;
1541         }
1542         dst  += i_dst;
1543
1544         src   = srcp;
1545         srcp += i_src;
1546     }
1547 }
1548 static int XDeint8x4SadMMXEXT( uint8_t *pix1, int i_pix1,
1549                                uint8_t *pix2, int i_pix2 )
1550 {
1551     int32_t s;
1552
1553     movq_m2r( pix1[0*i_pix1], mm0 );
1554     movq_m2r( pix1[1*i_pix1], mm1 );
1555
1556     psadbw_m2r( pix2[0*i_pix2], mm0 );
1557     psadbw_m2r( pix2[1*i_pix2], mm1 );
1558
1559     movq_m2r( pix1[2*i_pix1], mm2 );
1560     movq_m2r( pix1[3*i_pix1], mm3 );
1561     psadbw_m2r( pix2[2*i_pix2], mm2 );
1562     psadbw_m2r( pix2[3*i_pix2], mm3 );
1563
1564     paddd_r2r( mm1, mm0 );
1565     paddd_r2r( mm3, mm2 );
1566     paddd_r2r( mm2, mm0 );
1567     movd_r2m( mm0, s );
1568
1569     return s;
1570 }
1571
1572 static inline int XDeint8x4TestQpel( uint8_t *src, int i_src,
1573                                      uint8_t *ref, int i_stride,
1574                                      int mx, int my,
1575                                      int xmax, int ymax )
1576 {
1577     uint8_t buffer[8*4];
1578
1579     if( abs(mx) >= 4*xmax || abs(my) >= 4*ymax )
1580         return 255*255*255;
1581
1582     XDeintMC( buffer, 8, ref, i_stride, mx, my, 8, 4 );
1583     return XDeint8x4SadMMXEXT( src, i_src, buffer, 8 );
1584 }
1585 static inline int XDeint8x4TestInt( uint8_t *src, int i_src,
1586                                     uint8_t *ref, int i_stride,
1587                                     int mx, int my,
1588                                     int xmax, int ymax )
1589 {
1590     if( abs(mx) >= xmax || abs(my) >= ymax )
1591         return 255*255*255;
1592
1593     return XDeint8x4SadMMXEXT( src, i_src, &ref[my*i_stride+mx], i_stride );
1594 }
1595
1596 static inline void XDeint8x8FieldMotion( uint8_t *dst, int i_dst,
1597                                          uint8_t *src, int i_src,
1598                                          int *mpx, int *mpy,
1599                                          int xmax, int ymax )
1600 {
1601     static const int dx[8] = { 0,  0, -1, 1, -1, -1,  1, 1 };
1602     static const int dy[8] = {-1,  1,  0, 0, -1,  1, -1, 1 };
1603     uint8_t *next = &src[i_src];
1604     const int i_src2 = 2*i_src;
1605     int mvx, mvy;
1606     int mvs, s;
1607     int i_step;
1608
1609     uint8_t *rec = &dst[i_dst];
1610
1611     /* We construct with intra method the missing field */
1612     XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1613
1614     /* Now we will try to find a match with ME with the other field */
1615
1616     /* ME: A small/partial EPZS
1617      * We search only for small MV (with high motion intra will be perfect */
1618     if( xmax > 4 ) xmax = 4;
1619     if( ymax > 4 ) ymax = 4;
1620
1621     /* Init with NULL Mv */
1622     mvx = mvy = 0;
1623     mvs = XDeint8x4SadMMXEXT( rec, i_src2, next, i_src2 );
1624
1625     /* Try predicted Mv */
1626     if( (s=XDeint8x4TestInt( rec, i_src2, next, i_src2, *mpx, *mpy, xmax, ymax)) < mvs )
1627     {
1628         mvs = s;
1629         mvx = *mpx;
1630         mvy = *mpy;
1631     }
1632     /* Search interger pel (small mv) */
1633     for( i_step = 0; i_step < 4; i_step++ )
1634     {
1635         int c = 4;
1636         int s;
1637         int i;
1638
1639         for( i = 0; i < 4; i++ )
1640         {
1641             s = XDeint8x4TestInt( rec, i_src2,
1642                                   next, i_src2, mvx+dx[i], mvy+dy[i],
1643                                   xmax, ymax );
1644             if( s < mvs )
1645             {
1646                 mvs = s;
1647                 c = i;
1648             }
1649         }
1650         if( c == 4 )
1651             break;
1652
1653         mvx += dx[c];
1654         mvy += dy[c];
1655     }
1656     *mpx = mvx;
1657     *mpy = mvy;
1658
1659     mvx <<= 2;
1660     mvy <<= 2;
1661
1662     if( mvs > 4 && mvs < 256 )
1663     {
1664         /* Search Qpel */
1665         /* XXX: for now only HPEL (too slow) */
1666         for( i_step = 0; i_step < 4; i_step++ )
1667         {
1668             int c = 8;
1669             int s;
1670             int i;
1671
1672             for( i = 0; i < 8; i++ )
1673             {
1674                 s = XDeint8x4TestQpel( rec, i_src2, next, i_src2,
1675                                        mvx+dx[i], mvy+dy[i],
1676                                        xmax, ymax );
1677                 if( s < mvs )
1678                 {
1679                     mvs = s;
1680                     c = i;
1681                 }
1682             }
1683             if( c == 8 )
1684                 break;
1685
1686             mvx += dx[c];
1687             mvy += dy[c];
1688         }
1689     }
1690
1691     if( mvs < 128 )
1692     {
1693         uint8_t buffer[8*4];
1694         XDeintMC( buffer, 8, next, i_src2, mvx, mvy, 8, 4 );
1695         XDeint8x8MergeMMXEXT( dst, i_dst, src, 2*i_src, buffer, 8 );
1696
1697         //XDeint8x8Set( dst, i_dst, 0 );
1698     }
1699 }
1700 #endif
1701 #endif
1702
1703 #if 0
1704 /* Kernel interpolation (1,-5,20,20,-5,1)
1705  * Lose a bit more details+add aliasing than edge interpol but avoid
1706  * more artifacts
1707  */
1708 static inline uint8_t clip1( int a )
1709 {
1710     if( a <= 0 )
1711         return 0;
1712     else if( a >= 255 )
1713         return 255;
1714     else
1715         return a;
1716 }
1717 static inline void XDeint8x8Field( uint8_t *dst, int i_dst,
1718                                    uint8_t *src, int i_src )
1719 {
1720     int y, x;
1721
1722     /* Interlaced */
1723     for( y = 0; y < 8; y += 2 )
1724     {
1725         const int i_src2 = i_src*2;
1726
1727         memcpy( dst, src, 8 );
1728         dst += i_dst;
1729
1730         for( x = 0; x < 8; x++ )
1731         {
1732             int pix;
1733
1734             pix =   1*(src[-2*i_src2+x]+src[3*i_src2+x]) +
1735                    -5*(src[-1*i_src2+x]+src[2*i_src2+x])
1736                   +20*(src[ 0*i_src2+x]+src[1*i_src2+x]);
1737
1738             dst[x] = clip1( ( pix + 16 ) >> 5 );
1739         }
1740
1741         dst += 1*i_dst;
1742         src += 2*i_src;
1743     }
1744 }
1745
1746 #endif
1747
1748 /* NxN arbitray size (and then only use pixel in the NxN block)
1749  */
1750 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1751                                    int i_height, int i_width )
1752 {
1753     int y, x;
1754     int ff, fr;
1755     int fc;
1756
1757
1758     /* Detect interlacing */
1759     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1760     ff = fr = 0;
1761     fc = 0;
1762     for( y = 0; y < i_height - 2; y += 2 )
1763     {
1764         const uint8_t *s = &src[y*i_src];
1765         for( x = 0; x < i_width; x++ )
1766         {
1767             fr += ssd(s[      x] - s[1*i_src+x]);
1768             ff += ssd(s[      x] - s[2*i_src+x]);
1769         }
1770         if( ff < fr && fr > i_width / 2 )
1771             fc++;
1772     }
1773
1774     return fc < 2 ? false : true;
1775 }
1776
1777 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1778                                    uint8_t *src, int i_src,
1779                                    int i_width, int i_height )
1780 {
1781     int y, x;
1782
1783     /* Progressive */
1784     for( y = 0; y < i_height; y += 2 )
1785     {
1786         memcpy( dst, src, i_width );
1787         dst += i_dst;
1788
1789         if( y < i_height - 2 )
1790         {
1791             for( x = 0; x < i_width; x++ )
1792                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1793         }
1794         else
1795         {
1796             /* Blend last line */
1797             for( x = 0; x < i_width; x++ )
1798                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1799         }
1800         dst += 1*i_dst;
1801         src += 2*i_src;
1802     }
1803 }
1804
1805 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1806                                    uint8_t *src, int i_src,
1807                                    int i_width, int i_height )
1808 {
1809     int y, x;
1810
1811     /* Interlaced */
1812     for( y = 0; y < i_height; y += 2 )
1813     {
1814         memcpy( dst, src, i_width );
1815         dst += i_dst;
1816
1817         if( y < i_height - 2 )
1818         {
1819             for( x = 0; x < i_width; x++ )
1820                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1821         }
1822         else
1823         {
1824             /* Blend last line */
1825             for( x = 0; x < i_width; x++ )
1826                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1827         }
1828         dst += 1*i_dst;
1829         src += 2*i_src;
1830     }
1831 }
1832
1833 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1834                               int i_width, int i_height )
1835 {
1836     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1837         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1838     else
1839         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1840 }
1841
1842
1843 static inline int median( int a, int b, int c )
1844 {
1845     int min = a, max =a;
1846     if( b < min )
1847         min = b;
1848     else
1849         max = b;
1850
1851     if( c < min )
1852         min = c;
1853     else if( c > max )
1854         max = c;
1855
1856     return a + b + c - min - max;
1857 }
1858
1859
1860 /* XDeintBand8x8:
1861  */
1862 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1863                                    uint8_t *src, int i_src,
1864                                    const int i_mbx, int i_modx )
1865 {
1866     int x;
1867
1868     for( x = 0; x < i_mbx; x++ )
1869     {
1870         int s;
1871         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1872         {
1873             if( x == 0 || x == i_mbx - 1 )
1874                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1875             else
1876                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1877         }
1878         else
1879         {
1880             XDeint8x8MergeC( dst, i_dst,
1881                              &src[0*i_src], 2*i_src,
1882                              &src[1*i_src], 2*i_src );
1883         }
1884
1885         dst += 8;
1886         src += 8;
1887     }
1888
1889     if( i_modx )
1890         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1891 }
1892 #ifdef CAN_COMPILE_MMXEXT
1893 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1894                                         uint8_t *src, int i_src,
1895                                         const int i_mbx, int i_modx )
1896 {
1897     int x;
1898
1899     /* Reset current line */
1900     for( x = 0; x < i_mbx; x++ )
1901     {
1902         int s;
1903         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1904         {
1905             if( x == 0 || x == i_mbx - 1 )
1906                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1907             else
1908                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1909         }
1910         else
1911         {
1912             XDeint8x8MergeMMXEXT( dst, i_dst,
1913                                   &src[0*i_src], 2*i_src,
1914                                   &src[1*i_src], 2*i_src );
1915         }
1916
1917         dst += 8;
1918         src += 8;
1919     }
1920
1921     if( i_modx )
1922         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1923 }
1924 #endif
1925
1926 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1927 {
1928     int i_plane;
1929
1930     /* Copy image and skip lines */
1931     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1932     {
1933         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1934         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1935
1936         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1937         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1938
1939         const int i_dst = p_outpic->p[i_plane].i_pitch;
1940         const int i_src = p_pic->p[i_plane].i_pitch;
1941
1942         int y, x;
1943
1944         for( y = 0; y < i_mby; y++ )
1945         {
1946             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1947             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1948
1949 #ifdef CAN_COMPILE_MMXEXT
1950             if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1951                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1952             else
1953 #endif
1954                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1955         }
1956
1957         /* Last line (C only)*/
1958         if( i_mody )
1959         {
1960             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1961             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1962
1963             for( x = 0; x < i_mbx; x++ )
1964             {
1965                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1966
1967                 dst += 8;
1968                 src += 8;
1969             }
1970
1971             if( i_modx )
1972                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1973         }
1974     }
1975
1976 #ifdef CAN_COMPILE_MMXEXT
1977     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1978         emms();
1979 #endif
1980 }
1981
1982 /*****************************************************************************
1983  * SendEvents: forward mouse and keyboard events to the parent p_vout
1984  *****************************************************************************/
1985 static int SendEvents( vlc_object_t *p_this, char const *psz_var,
1986                        vlc_value_t oldval, vlc_value_t newval, void *_p_vout )
1987 {
1988     VLC_UNUSED(p_this); VLC_UNUSED(oldval);
1989     vout_thread_t *p_vout = (vout_thread_t *)_p_vout;
1990     vlc_value_t sentval = newval;
1991
1992     if( !strcmp( psz_var, "mouse-y" ) )
1993     {
1994         switch( p_vout->p_sys->i_mode )
1995         {
1996             case DEINTERLACE_MEAN:
1997             case DEINTERLACE_DISCARD:
1998                 sentval.i_int *= 2;
1999                 break;
2000         }
2001     }
2002
2003     var_Set( p_vout, psz_var, sentval );
2004
2005     return VLC_SUCCESS;
2006 }
2007
2008 /*****************************************************************************
2009  * FilterCallback: called when changing the deinterlace method on the fly.
2010  *****************************************************************************/
2011 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
2012                            vlc_value_t oldval, vlc_value_t newval,
2013                            void *p_data )
2014 {
2015     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
2016     vout_thread_t * p_vout = (vout_thread_t *)p_this;
2017     int i_old_mode = p_vout->p_sys->i_mode;
2018
2019     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
2020
2021     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
2022
2023     SetFilterMethod( p_vout, newval.psz_string );
2024
2025     switch( p_vout->render.i_chroma )
2026     {
2027     case VLC_FOURCC('I','4','2','2'):
2028         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
2029         return VLC_SUCCESS;
2030         break;
2031
2032     case VLC_FOURCC('I','4','2','0'):
2033     case VLC_FOURCC('I','Y','U','V'):
2034     case VLC_FOURCC('Y','V','1','2'):
2035         switch( p_vout->p_sys->i_mode )
2036         {
2037         case DEINTERLACE_MEAN:
2038         case DEINTERLACE_DISCARD:
2039             if( ( i_old_mode == DEINTERLACE_MEAN )
2040                 || ( i_old_mode == DEINTERLACE_DISCARD ) )
2041             {
2042                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
2043                 return VLC_SUCCESS;
2044             }
2045             break;
2046
2047         case DEINTERLACE_BOB:
2048         case DEINTERLACE_BLEND:
2049         case DEINTERLACE_LINEAR:
2050             if( ( i_old_mode == DEINTERLACE_BOB )
2051                 || ( i_old_mode == DEINTERLACE_BLEND )
2052                 || ( i_old_mode == DEINTERLACE_LINEAR ) )
2053             {
2054                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
2055                 return VLC_SUCCESS;
2056             }
2057             break;
2058         }
2059         break;
2060
2061     default:
2062         break;
2063     }
2064
2065     /* We need to kill the old vout */
2066
2067     DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
2068
2069     vlc_object_detach( p_vout->p_sys->p_vout );
2070     vlc_object_release( p_vout->p_sys->p_vout );
2071
2072     /* Try to open a new video output */
2073     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
2074
2075     if( p_vout->p_sys->p_vout == NULL )
2076     {
2077         /* Everything failed */
2078         msg_Err( p_vout, "cannot open vout, aborting" );
2079
2080         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
2081         return VLC_EGENERIC;
2082     }
2083
2084     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
2085
2086     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
2087     return VLC_SUCCESS;
2088 }
2089
2090 /*****************************************************************************
2091  * SendEventsToChild: forward events to the child/children vout
2092  *****************************************************************************/
2093 static int SendEventsToChild( vlc_object_t *p_this, char const *psz_var,
2094                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
2095 {
2096     VLC_UNUSED(p_data); VLC_UNUSED(oldval);
2097     vout_thread_t *p_vout = (vout_thread_t *)p_this;
2098     var_Set( p_vout->p_sys->p_vout, psz_var, newval );
2099     return VLC_SUCCESS;
2100 }
2101
2102
2103 /*****************************************************************************
2104  * video filter2 functions
2105  *****************************************************************************/
2106 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
2107 {
2108     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2109     picture_t *p_pic_dst;
2110
2111     /* Request output picture */
2112     p_pic_dst = p_filter->pf_vout_buffer_new( p_filter );
2113     if( p_pic_dst == NULL )
2114     {
2115         msg_Warn( p_filter, "can't get output picture" );
2116         return NULL;
2117     }
2118
2119     switch( p_vout->p_sys->i_mode )
2120     {
2121         case DEINTERLACE_DISCARD:
2122 #if 0
2123             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
2124 #endif
2125             msg_Err( p_vout, "discarding lines is not supported yet" );
2126             p_pic_dst->pf_release( p_pic_dst );
2127             return p_pic;
2128             break;
2129
2130         case DEINTERLACE_BOB:
2131 #if 0
2132             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
2133             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
2134             break;
2135 #endif
2136
2137         case DEINTERLACE_LINEAR:
2138 #if 0
2139             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
2140             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
2141 #endif
2142             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2143             p_pic_dst->pf_release( p_pic_dst );
2144             return p_pic;
2145             break;
2146
2147         case DEINTERLACE_MEAN:
2148             RenderMean( p_vout, p_pic_dst, p_pic );
2149             break;
2150
2151         case DEINTERLACE_BLEND:
2152             RenderBlend( p_vout, p_pic_dst, p_pic );
2153             break;
2154
2155         case DEINTERLACE_X:
2156             RenderX( p_pic_dst, p_pic );
2157             break;
2158     }
2159
2160     p_pic_dst->date = p_pic->date;
2161     p_pic_dst->b_force = p_pic->b_force;
2162     p_pic_dst->i_nb_fields = p_pic->i_nb_fields;
2163     p_pic_dst->b_progressive = true;
2164     p_pic_dst->b_top_field_first = p_pic->b_top_field_first;
2165
2166     p_pic->pf_release( p_pic );
2167     return p_pic_dst;
2168 }
2169
2170 /*****************************************************************************
2171  * OpenFilter:
2172  *****************************************************************************/
2173 static int OpenFilter( vlc_object_t *p_this )
2174 {
2175     filter_t *p_filter = (filter_t*)p_this;
2176     vout_thread_t *p_vout;
2177     vlc_value_t val;
2178
2179     if( ( p_filter->fmt_in.video.i_chroma != VLC_FOURCC('I','4','2','0') &&
2180           p_filter->fmt_in.video.i_chroma != VLC_FOURCC('I','Y','U','V') &&
2181           p_filter->fmt_in.video.i_chroma != VLC_FOURCC('Y','V','1','2') ) ||
2182         p_filter->fmt_in.video.i_chroma != p_filter->fmt_out.video.i_chroma )
2183     {
2184         return VLC_EGENERIC;
2185     }
2186
2187     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
2188      * by spu filters */
2189     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
2190     vlc_object_attach( p_vout, p_filter );
2191     p_filter->p_sys = (filter_sys_t *)p_vout;
2192     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
2193
2194     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2195                    p_filter->p_cfg );
2196     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
2197     var_Create( p_filter, "deinterlace-mode", VLC_VAR_STRING );
2198     var_Set( p_filter, "deinterlace-mode", val );
2199
2200     if ( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
2201     {
2202         vlc_object_detach( p_vout );
2203         vlc_object_release( p_vout );
2204         return VLC_EGENERIC;
2205     }
2206
2207     p_filter->pf_video_filter = Deinterlace;
2208
2209     msg_Dbg( p_filter, "deinterlacing" );
2210
2211     return VLC_SUCCESS;
2212 }
2213
2214 /*****************************************************************************
2215  * CloseFilter: clean up the filter
2216  *****************************************************************************/
2217 static void CloseFilter( vlc_object_t *p_this )
2218 {
2219     filter_t *p_filter = (filter_t*)p_this;
2220     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2221
2222     Destroy( VLC_OBJECT(p_vout) );
2223     vlc_object_detach( p_vout );
2224     vlc_object_release( p_vout );
2225 }
2226