]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c
* modules/video_filter/deinterlace.c: included <altivec.h>.
[vlc] / modules / video_filter / deinterlace.c
1 /*****************************************************************************
2  * deinterlace.c : deinterlacer plugin for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001, 2002, 2003 VideoLAN
5  * $Id$
6  *
7  * Author: Sam Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include <errno.h>
28 #include <stdlib.h>                                      /* malloc(), free() */
29 #include <string.h>
30
31 #include <vlc/vlc.h>
32 #include <vlc/vout.h>
33
34 #ifdef HAVE_ALTIVEC_H
35 #   include <altivec.h>
36 #endif
37
38 #include "filter_common.h"
39
40 #define DEINTERLACE_DISCARD 1
41 #define DEINTERLACE_MEAN    2
42 #define DEINTERLACE_BLEND   3
43 #define DEINTERLACE_BOB     4
44 #define DEINTERLACE_LINEAR  5
45
46 /*****************************************************************************
47  * Local protypes
48  *****************************************************************************/
49 static int  Create    ( vlc_object_t * );
50 static void Destroy   ( vlc_object_t * );
51
52 static int  Init      ( vout_thread_t * );
53 static void End       ( vout_thread_t * );
54 static void Render    ( vout_thread_t *, picture_t * );
55
56 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
57 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
58 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
59 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
60 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
61
62 static void MergeGeneric ( void *, const void *, const void *, size_t );
63 #if defined(CAN_COMPILE_C_ALTIVEC)
64 static void MergeAltivec ( void *, const void *, const void *, size_t );
65 #endif
66 #if defined(CAN_COMPILE_MMX)
67 static void MergeMMX     ( void *, const void *, const void *, size_t );
68 #endif
69 #if defined(CAN_COMPILE_SSE)
70 static void MergeSSE2    ( void *, const void *, const void *, size_t );
71 #endif
72 #if defined(CAN_COMPILE_MMX) || defined(CAN_COMPILE_SSE)
73 static void EndMMX       ( void );
74 #endif
75
76 static int  SendEvents   ( vlc_object_t *, char const *,
77                            vlc_value_t, vlc_value_t, void * );
78
79 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method );
80 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
81
82 /*****************************************************************************
83  * Callback prototypes
84  *****************************************************************************/
85 static int FilterCallback ( vlc_object_t *, char const *,
86                             vlc_value_t, vlc_value_t, void * );
87
88 /*****************************************************************************
89  * Module descriptor
90  *****************************************************************************/
91 #define MODE_TEXT N_("Deinterlace mode")
92 #define MODE_LONGTEXT N_("You can choose the default deinterlace mode")
93
94 static char *mode_list[] = { "discard", "blend", "mean", "bob", "linear" };
95 static char *mode_list_text[] = { N_("Discard"), N_("Blend"), N_("Mean"),
96                                   N_("Bob"), N_("Linear") };
97
98 vlc_module_begin();
99     set_description( _("Deinterlacing video filter") );
100     set_capability( "video filter", 0 );
101
102     add_string( "deinterlace-mode", "discard", NULL, MODE_TEXT,
103                 MODE_LONGTEXT, VLC_FALSE );
104         change_string_list( mode_list, mode_list_text, 0 );
105
106     add_shortcut( "deinterlace" );
107     set_callbacks( Create, Destroy );
108 vlc_module_end();
109
110 /*****************************************************************************
111  * vout_sys_t: Deinterlace video output method descriptor
112  *****************************************************************************
113  * This structure is part of the video output thread descriptor.
114  * It describes the Deinterlace specific properties of an output thread.
115  *****************************************************************************/
116 struct vout_sys_t
117 {
118     int        i_mode;        /* Deinterlace mode */
119     vlc_bool_t b_double_rate; /* Shall we double the framerate? */
120
121     mtime_t    last_date;
122     mtime_t    next_date;
123
124     vout_thread_t *p_vout;
125
126     vlc_mutex_t filter_lock;
127
128     void (*pf_merge) ( void *, const void *, const void *, size_t );
129     void (*pf_end_merge) ( void );
130 };
131
132 /*****************************************************************************
133  * Create: allocates Deinterlace video thread output method
134  *****************************************************************************
135  * This function allocates and initializes a Deinterlace vout method.
136  *****************************************************************************/
137 static int Create( vlc_object_t *p_this )
138 {
139     vout_thread_t *p_vout = (vout_thread_t *)p_this;
140     vlc_value_t val;
141
142     /* Allocate structure */
143     p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
144     if( p_vout->p_sys == NULL )
145     {
146         msg_Err( p_vout, "out of memory" );
147         return VLC_ENOMEM;
148     }
149
150     p_vout->pf_init = Init;
151     p_vout->pf_end = End;
152     p_vout->pf_manage = NULL;
153     p_vout->pf_render = Render;
154     p_vout->pf_display = NULL;
155
156     p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
157     p_vout->p_sys->b_double_rate = 0;
158     p_vout->p_sys->last_date = 0;
159     vlc_mutex_init( p_vout, &p_vout->p_sys->filter_lock );
160
161 #if defined(CAN_COMPILE_C_ALTIVEC)
162     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_ALTIVEC )
163     {
164         p_vout->p_sys->pf_merge = MergeAltivec;
165         p_vout->p_sys->pf_end_merge = NULL;
166     }
167     else
168 #endif
169 #if defined(CAN_COMPILE_SSE)
170     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_SSE2 )
171     {
172         p_vout->p_sys->pf_merge = MergeSSE2;
173         p_vout->p_sys->pf_end_merge = EndMMX;
174     }
175     else
176 #endif
177 #if defined(CAN_COMPILE_MMX)
178     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMX )
179     {
180         p_vout->p_sys->pf_merge = MergeMMX;
181         p_vout->p_sys->pf_end_merge = EndMMX;
182     }
183     else
184 #endif
185     {
186         p_vout->p_sys->pf_merge = MergeGeneric;
187         p_vout->p_sys->pf_end_merge = NULL;
188     }
189
190     /* Look what method was requested */
191     var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING );
192     var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL );
193
194     if( val.psz_string == NULL )
195     {
196         msg_Err( p_vout, "configuration variable deinterlace-mode empty" );
197         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
198
199         val.psz_string = strdup( "discard" );
200     }
201
202     msg_Dbg( p_vout, "using %s deinterlace mode", val.psz_string );
203
204     SetFilterMethod( p_vout, val.psz_string );
205
206     free( val.psz_string );
207
208     var_AddCallback( p_vout, "deinterlace-mode", FilterCallback, NULL );
209
210     return VLC_SUCCESS;
211 }
212
213 /*****************************************************************************
214  * SetFilterMethod: setup the deinterlace method to use.
215  *****************************************************************************/
216 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
217 {
218     if( !strcmp( psz_method, "discard" ) )
219     {
220         p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
221         p_vout->p_sys->b_double_rate = 0;
222     }
223     else if( !strcmp( psz_method, "mean" ) )
224     {
225         p_vout->p_sys->i_mode = DEINTERLACE_MEAN;
226         p_vout->p_sys->b_double_rate = 0;
227     }
228     else if( !strcmp( psz_method, "blend" )
229              || !strcmp( psz_method, "average" )
230              || !strcmp( psz_method, "combine-fields" ) )
231     {
232         p_vout->p_sys->i_mode = DEINTERLACE_BLEND;
233         p_vout->p_sys->b_double_rate = 0;
234     }
235     else if( !strcmp( psz_method, "bob" )
236              || !strcmp( psz_method, "progressive-scan" ) )
237     {
238         p_vout->p_sys->i_mode = DEINTERLACE_BOB;
239         p_vout->p_sys->b_double_rate = 1;
240     }
241     else if( !strcmp( psz_method, "linear" ) )
242     {
243         p_vout->p_sys->i_mode = DEINTERLACE_LINEAR;
244         p_vout->p_sys->b_double_rate = 1;
245     }
246     else
247     {
248         msg_Err( p_vout, "no valid deinterlace mode provided, "
249                  "using \"discard\"" );
250     }
251
252     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
253 }
254
255 /*****************************************************************************
256  * Init: initialize Deinterlace video thread output method
257  *****************************************************************************/
258 static int Init( vout_thread_t *p_vout )
259 {
260     int i_index;
261     picture_t *p_pic;
262
263     I_OUTPUTPICTURES = 0;
264
265     /* Initialize the output structure, full of directbuffers since we want
266      * the decoder to output directly to our structures. */
267     switch( p_vout->render.i_chroma )
268     {
269         case VLC_FOURCC('I','4','2','0'):
270         case VLC_FOURCC('I','Y','U','V'):
271         case VLC_FOURCC('Y','V','1','2'):
272         case VLC_FOURCC('I','4','2','2'):
273             p_vout->output.i_chroma = p_vout->render.i_chroma;
274             p_vout->output.i_width  = p_vout->render.i_width;
275             p_vout->output.i_height = p_vout->render.i_height;
276             p_vout->output.i_aspect = p_vout->render.i_aspect;
277             break;
278
279         default:
280             return VLC_EGENERIC; /* unknown chroma */
281             break;
282     }
283
284     /* Try to open the real video output */
285     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
286
287     if( p_vout->p_sys->p_vout == NULL )
288     {
289         /* Everything failed */
290         msg_Err( p_vout, "cannot open vout, aborting" );
291
292         return VLC_EGENERIC;
293     }
294
295     ALLOCATE_DIRECTBUFFERS( VOUT_MAX_PICTURES );
296
297     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
298
299     ADD_PARENT_CALLBACKS( SendEventsToChild );
300
301     return VLC_SUCCESS;
302 }
303
304 /*****************************************************************************
305  * SpawnRealVout: spawn the real video output.
306  *****************************************************************************/
307 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
308 {
309     vout_thread_t *p_real_vout = NULL;
310
311     msg_Dbg( p_vout, "spawning the real video output" );
312
313     switch( p_vout->render.i_chroma )
314     {
315     case VLC_FOURCC('I','4','2','0'):
316     case VLC_FOURCC('I','Y','U','V'):
317     case VLC_FOURCC('Y','V','1','2'):
318         switch( p_vout->p_sys->i_mode )
319         {
320         case DEINTERLACE_MEAN:
321         case DEINTERLACE_DISCARD:
322             p_real_vout =
323                 vout_Create( p_vout,
324                        p_vout->output.i_width, p_vout->output.i_height / 2,
325                        p_vout->output.i_chroma, p_vout->output.i_aspect );
326             break;
327
328         case DEINTERLACE_BOB:
329         case DEINTERLACE_BLEND:
330         case DEINTERLACE_LINEAR:
331             p_real_vout =
332                 vout_Create( p_vout,
333                        p_vout->output.i_width, p_vout->output.i_height,
334                        p_vout->output.i_chroma, p_vout->output.i_aspect );
335             break;
336         }
337         break;
338
339     case VLC_FOURCC('I','4','2','2'):
340         p_real_vout =
341             vout_Create( p_vout,
342                        p_vout->output.i_width, p_vout->output.i_height,
343                        VLC_FOURCC('I','4','2','0'), p_vout->output.i_aspect );
344         break;
345
346     default:
347         break;
348     }
349
350     return p_real_vout;
351 }
352
353 /*****************************************************************************
354  * End: terminate Deinterlace video thread output method
355  *****************************************************************************/
356 static void End( vout_thread_t *p_vout )
357 {
358     int i_index;
359
360     /* Free the fake output buffers we allocated */
361     for( i_index = I_OUTPUTPICTURES ; i_index ; )
362     {
363         i_index--;
364         free( PP_OUTPUTPICTURE[ i_index ]->p_data_orig );
365     }
366 }
367
368 /*****************************************************************************
369  * Destroy: destroy Deinterlace video thread output method
370  *****************************************************************************
371  * Terminate an output method created by DeinterlaceCreateOutputMethod
372  *****************************************************************************/
373 static void Destroy( vlc_object_t *p_this )
374 {
375     vout_thread_t *p_vout = (vout_thread_t *)p_this;
376
377     DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
378
379     vlc_object_detach( p_vout->p_sys->p_vout );
380     vout_Destroy( p_vout->p_sys->p_vout );
381
382     DEL_PARENT_CALLBACKS( SendEventsToChild );
383
384     free( p_vout->p_sys );
385 }
386
387 /*****************************************************************************
388  * Render: displays previously rendered output
389  *****************************************************************************
390  * This function send the currently rendered image to Deinterlace image,
391  * waits until it is displayed and switch the two rendering buffers, preparing
392  * next frame.
393  *****************************************************************************/
394 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
395 {
396     picture_t *pp_outpic[2];
397
398     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
399
400     /* Get a new picture */
401     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
402                                              0, 0, 0 ) )
403               == NULL )
404     {
405         if( p_vout->b_die || p_vout->b_error )
406         {
407             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
408             return;
409         }
410         msleep( VOUT_OUTMEM_SLEEP );
411      }
412
413     vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[0], p_pic->date );
414
415     /* If we are using double rate, get an additional new picture */
416     if( p_vout->p_sys->b_double_rate )
417     {
418         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
419                                                  0, 0, 0 ) )
420                   == NULL )
421         {
422             if( p_vout->b_die || p_vout->b_error )
423             {
424                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
425                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
426                 return;
427             }
428             msleep( VOUT_OUTMEM_SLEEP );
429         }
430
431         /* 20ms is a bit arbitrary, but it's only for the first image we get */
432         if( !p_vout->p_sys->last_date )
433         {
434             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
435                               p_pic->date + 20000 );
436         }
437         else
438         {
439             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
440                       (3 * p_pic->date - p_vout->p_sys->last_date) / 2 );
441         }
442         p_vout->p_sys->last_date = p_pic->date;
443     }
444
445     switch( p_vout->p_sys->i_mode )
446     {
447         case DEINTERLACE_DISCARD:
448             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
449             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
450             break;
451
452         case DEINTERLACE_BOB:
453             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
454             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
455             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
456             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
457             break;
458
459         case DEINTERLACE_LINEAR:
460             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
461             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
462             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
463             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
464             break;
465
466         case DEINTERLACE_MEAN:
467             RenderMean( p_vout, pp_outpic[0], p_pic );
468             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
469             break;
470
471         case DEINTERLACE_BLEND:
472             RenderBlend( p_vout, pp_outpic[0], p_pic );
473             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
474             break;
475     }
476
477     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
478 }
479
480 /*****************************************************************************
481  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
482  *****************************************************************************/
483 static void RenderDiscard( vout_thread_t *p_vout,
484                            picture_t *p_outpic, picture_t *p_pic, int i_field )
485 {
486     int i_plane;
487
488     /* Copy image and skip lines */
489     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
490     {
491         uint8_t *p_in, *p_out_end, *p_out;
492         int i_increment;
493
494         p_in = p_pic->p[i_plane].p_pixels
495                    + i_field * p_pic->p[i_plane].i_pitch;
496
497         p_out = p_outpic->p[i_plane].p_pixels;
498         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
499                              * p_outpic->p[i_plane].i_lines;
500
501         switch( p_vout->render.i_chroma )
502         {
503         case VLC_FOURCC('I','4','2','0'):
504         case VLC_FOURCC('I','Y','U','V'):
505         case VLC_FOURCC('Y','V','1','2'):
506
507             for( ; p_out < p_out_end ; )
508             {
509                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
510                                           p_pic->p[i_plane].i_pitch );
511
512                 p_out += p_pic->p[i_plane].i_pitch;
513                 p_in += 2 * p_pic->p[i_plane].i_pitch;
514             }
515             break;
516
517         case VLC_FOURCC('I','4','2','2'):
518
519             i_increment = 2 * p_pic->p[i_plane].i_pitch;
520
521             if( i_plane == Y_PLANE )
522             {
523                 for( ; p_out < p_out_end ; )
524                 {
525                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
526                                               p_pic->p[i_plane].i_pitch );
527                     p_out += p_pic->p[i_plane].i_pitch;
528                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
529                                               p_pic->p[i_plane].i_pitch );
530                     p_out += p_pic->p[i_plane].i_pitch;
531                     p_in += i_increment;
532                 }
533             }
534             else
535             {
536                 for( ; p_out < p_out_end ; )
537                 {
538                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
539                                               p_pic->p[i_plane].i_pitch );
540                     p_out += p_pic->p[i_plane].i_pitch;
541                     p_in += i_increment;
542                 }
543             }
544             break;
545
546         default:
547             break;
548         }
549     }
550 }
551
552 /*****************************************************************************
553  * RenderBob: renders a BOB picture - simple copy
554  *****************************************************************************/
555 static void RenderBob( vout_thread_t *p_vout,
556                        picture_t *p_outpic, picture_t *p_pic, int i_field )
557 {
558     int i_plane;
559
560     /* Copy image and skip lines */
561     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
562     {
563         uint8_t *p_in, *p_out_end, *p_out;
564
565         p_in = p_pic->p[i_plane].p_pixels;
566         p_out = p_outpic->p[i_plane].p_pixels;
567         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
568                              * p_outpic->p[i_plane].i_lines;
569
570         switch( p_vout->render.i_chroma )
571         {
572             case VLC_FOURCC('I','4','2','0'):
573             case VLC_FOURCC('I','Y','U','V'):
574             case VLC_FOURCC('Y','V','1','2'):
575                 /* For BOTTOM field we need to add the first line */
576                 if( i_field == 1 )
577                 {
578                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
579                                               p_pic->p[i_plane].i_pitch );
580                     p_in += p_pic->p[i_plane].i_pitch;
581                     p_out += p_pic->p[i_plane].i_pitch;
582                 }
583
584                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
585
586                 for( ; p_out < p_out_end ; )
587                 {
588                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
589                                               p_pic->p[i_plane].i_pitch );
590
591                     p_out += p_pic->p[i_plane].i_pitch;
592
593                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
594                                               p_pic->p[i_plane].i_pitch );
595
596                     p_in += 2 * p_pic->p[i_plane].i_pitch;
597                     p_out += p_pic->p[i_plane].i_pitch;
598                 }
599
600                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
601                                           p_pic->p[i_plane].i_pitch );
602
603                 /* For TOP field we need to add the last line */
604                 if( i_field == 0 )
605                 {
606                     p_in += p_pic->p[i_plane].i_pitch;
607                     p_out += p_pic->p[i_plane].i_pitch;
608                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
609                                               p_pic->p[i_plane].i_pitch );
610                 }
611                 break;
612
613             case VLC_FOURCC('I','4','2','2'):
614                 /* For BOTTOM field we need to add the first line */
615                 if( i_field == 1 )
616                 {
617                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
618                                               p_pic->p[i_plane].i_pitch );
619                     p_in += p_pic->p[i_plane].i_pitch;
620                     p_out += p_pic->p[i_plane].i_pitch;
621                 }
622
623                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
624
625                 if( i_plane == Y_PLANE )
626                 {
627                     for( ; p_out < p_out_end ; )
628                     {
629                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
630                                                   p_pic->p[i_plane].i_pitch );
631
632                         p_out += p_pic->p[i_plane].i_pitch;
633
634                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
635                                                   p_pic->p[i_plane].i_pitch );
636
637                         p_in += 2 * p_pic->p[i_plane].i_pitch;
638                         p_out += p_pic->p[i_plane].i_pitch;
639                     }
640                 }
641                 else
642                 {
643                     for( ; p_out < p_out_end ; )
644                     {
645                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
646                                                   p_pic->p[i_plane].i_pitch );
647
648                         p_out += p_pic->p[i_plane].i_pitch;
649                         p_in += 2 * p_pic->p[i_plane].i_pitch;
650                     }
651                 }
652
653                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
654                                           p_pic->p[i_plane].i_pitch );
655
656                 /* For TOP field we need to add the last line */
657                 if( i_field == 0 )
658                 {
659                     p_in += p_pic->p[i_plane].i_pitch;
660                     p_out += p_pic->p[i_plane].i_pitch;
661                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
662                                               p_pic->p[i_plane].i_pitch );
663                 }
664                 break;
665         }
666     }
667 }
668
669 #define Merge p_vout->p_sys->pf_merge
670 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
671
672 /*****************************************************************************
673  * RenderLinear: BOB with linear interpolation
674  *****************************************************************************/
675 static void RenderLinear( vout_thread_t *p_vout,
676                           picture_t *p_outpic, picture_t *p_pic, int i_field )
677 {
678     int i_plane;
679
680     /* Copy image and skip lines */
681     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
682     {
683         uint8_t *p_in, *p_out_end, *p_out;
684
685         p_in = p_pic->p[i_plane].p_pixels;
686         p_out = p_outpic->p[i_plane].p_pixels;
687         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
688                              * p_outpic->p[i_plane].i_lines;
689
690         /* For BOTTOM field we need to add the first line */
691         if( i_field == 1 )
692         {
693             p_vout->p_vlc->pf_memcpy( p_out, p_in,
694                                       p_pic->p[i_plane].i_pitch );
695             p_in += p_pic->p[i_plane].i_pitch;
696             p_out += p_pic->p[i_plane].i_pitch;
697         }
698
699         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
700
701         for( ; p_out < p_out_end ; )
702         {
703             p_vout->p_vlc->pf_memcpy( p_out, p_in,
704                                       p_pic->p[i_plane].i_pitch );
705
706             p_out += p_pic->p[i_plane].i_pitch;
707
708             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
709                    p_pic->p[i_plane].i_pitch );
710
711             p_in += 2 * p_pic->p[i_plane].i_pitch;
712             p_out += p_pic->p[i_plane].i_pitch;
713         }
714
715         p_vout->p_vlc->pf_memcpy( p_out, p_in,
716                                   p_pic->p[i_plane].i_pitch );
717
718         /* For TOP field we need to add the last line */
719         if( i_field == 0 )
720         {
721             p_in += p_pic->p[i_plane].i_pitch;
722             p_out += p_pic->p[i_plane].i_pitch;
723             p_vout->p_vlc->pf_memcpy( p_out, p_in,
724                                       p_pic->p[i_plane].i_pitch );
725         }
726     }
727     EndMerge();
728 }
729
730 static void RenderMean( vout_thread_t *p_vout,
731                         picture_t *p_outpic, picture_t *p_pic )
732 {
733     int i_plane;
734
735     /* Copy image and skip lines */
736     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
737     {
738         uint8_t *p_in, *p_out_end, *p_out;
739
740         p_in = p_pic->p[i_plane].p_pixels;
741
742         p_out = p_outpic->p[i_plane].p_pixels;
743         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
744                              * p_outpic->p[i_plane].i_lines;
745
746         /* All lines: mean value */
747         for( ; p_out < p_out_end ; )
748         {
749             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
750                    p_pic->p[i_plane].i_pitch );
751
752             p_out += p_pic->p[i_plane].i_pitch;
753             p_in += 2 * p_pic->p[i_plane].i_pitch;
754         }
755     }
756     EndMerge();
757 }
758
759 static void RenderBlend( vout_thread_t *p_vout,
760                          picture_t *p_outpic, picture_t *p_pic )
761 {
762     int i_plane;
763
764     /* Copy image and skip lines */
765     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
766     {
767         uint8_t *p_in, *p_out_end, *p_out;
768
769         p_in = p_pic->p[i_plane].p_pixels;
770
771         p_out = p_outpic->p[i_plane].p_pixels;
772         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
773                              * p_outpic->p[i_plane].i_lines;
774
775         switch( p_vout->render.i_chroma )
776         {
777             case VLC_FOURCC('I','4','2','0'):
778             case VLC_FOURCC('I','Y','U','V'):
779             case VLC_FOURCC('Y','V','1','2'):
780                 /* First line: simple copy */
781                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
782                                           p_pic->p[i_plane].i_pitch );
783                 p_out += p_pic->p[i_plane].i_pitch;
784
785                 /* Remaining lines: mean value */
786                 for( ; p_out < p_out_end ; )
787                 {
788                    Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
789                           p_pic->p[i_plane].i_pitch );
790
791                     p_out += p_pic->p[i_plane].i_pitch;
792                     p_in += p_pic->p[i_plane].i_pitch;
793                 }
794                 break;
795
796             case VLC_FOURCC('I','4','2','2'):
797                 /* First line: simple copy */
798                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
799                                           p_pic->p[i_plane].i_pitch );
800                 p_out += p_pic->p[i_plane].i_pitch;
801
802                 /* Remaining lines: mean value */
803                 if( i_plane == Y_PLANE )
804                 {
805                     for( ; p_out < p_out_end ; )
806                     {
807                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
808                                p_pic->p[i_plane].i_pitch );
809
810                         p_out += p_pic->p[i_plane].i_pitch;
811                         p_in += p_pic->p[i_plane].i_pitch;
812                     }
813                 }
814
815                 else
816                 {
817                     for( ; p_out < p_out_end ; )
818                     {
819                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
820                                p_pic->p[i_plane].i_pitch );
821
822                         p_out += p_pic->p[i_plane].i_pitch;
823                         p_in += 2*p_pic->p[i_plane].i_pitch;
824                     }
825                 }
826                 break;
827         }
828     }
829     EndMerge();
830 }
831
832 #undef Merge
833
834 static void MergeGeneric( void *_p_dest, const void *_p_s1,
835                           const void *_p_s2, size_t i_bytes )
836 {
837     uint8_t* p_dest = (uint8_t*)_p_dest;
838     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
839     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
840     uint8_t* p_end = p_dest + i_bytes - 8;
841
842     while( p_dest < p_end )
843     {
844         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
845         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
846         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
847         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
848         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
849         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
850         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
851         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
852     }
853
854     p_end += 8;
855
856     while( p_dest < p_end )
857     {
858         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
859     }
860 }
861
862 #if defined(CAN_COMPILE_MMX)
863 static void MergeMMX( void *_p_dest, const void *_p_s1, const void *_p_s2,
864                       size_t i_bytes )
865 {
866     uint8_t* p_dest = (uint8_t*)_p_dest;
867     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
868     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
869     uint8_t* p_end = p_dest + i_bytes - 8;
870     while( p_dest < p_end )
871     {
872         __asm__  __volatile__( "movq %2,%%mm1;"
873                                "pavgb %1, %%mm1;"
874                                "movq %%mm1, %0" :"=m" (*p_dest):
875                                                  "m" (*p_s1),
876                                                  "m" (*p_s2) );
877         p_dest += 8;
878         p_s1 += 8;
879         p_s2 += 8;
880     }
881
882     p_end += 8;
883
884     while( p_dest < p_end )
885     {
886         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
887     }
888 }
889 #endif
890
891 #if defined(CAN_COMPILE_SSE)
892 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
893                        size_t i_bytes )
894 {
895     uint8_t* p_dest = (uint8_t*)_p_dest;
896     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
897     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
898     while( (int)p_s1 % 16 )
899     {
900         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
901     }        
902     uint8_t* p_end = p_dest + i_bytes - 16;
903     while( p_dest < p_end )
904     {
905         __asm__  __volatile__( "movdqu %2,%%xmm1;"
906                                "pavgb %1, %%xmm1;"
907                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
908                                                  "m" (*p_s1),
909                                                  "m" (*p_s2) );
910         p_dest += 16;
911         p_s1 += 16;
912         p_s2 += 16;
913     }
914
915     p_end += 16;
916
917     while( p_dest < p_end )
918     {
919         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
920     }
921 }
922 #endif
923
924 #if defined(CAN_COMPILE_MMX) || defined(CAN_COMPILE_SSE)
925 static void EndMMX( void )
926 {
927     __asm__ __volatile__( "emms" :: );
928 }
929 #endif
930
931 #ifdef CAN_COMPILE_C_ALTIVEC
932 static void MergeAltivec( void *_p_dest, const void *_p_s1,
933                           const void *_p_s2, size_t i_bytes )
934 {
935     uint8_t *p_dest = (uint8_t *)_p_dest;
936     uint8_t *p_s1   = (uint8_t *)_p_s1;
937     uint8_t *p_s2   = (uint8_t *)_p_s2;
938     uint8_t *p_end  = p_dest + i_bytes - 15;
939
940     /* Use C until the first 16-bytes aligned destination pixel */
941     while( (int)p_dest & 0xF )
942     {
943         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
944     }
945
946     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
947     {
948         /* Unaligned source */
949         vector unsigned char s1v, s2v, destv;
950         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
951         vector unsigned char perm1v, perm2v;
952
953         perm1v = vec_lvsl( 0, p_s1 );
954         perm2v = vec_lvsl( 0, p_s2 );
955         s1oldv = vec_ld( 0, p_s1 );
956         s2oldv = vec_ld( 0, p_s2 );
957
958         while( p_dest < p_end )
959         {
960             s1newv = vec_ld( 16, p_s1 );
961             s2newv = vec_ld( 16, p_s2 );
962             s1v    = vec_perm( s1oldv, s1newv, perm1v );
963             s2v    = vec_perm( s2oldv, s2newv, perm2v );
964             s1oldv = s1newv;
965             s2oldv = s2newv;
966             destv  = vec_avg( s1v, s2v );
967             vec_st( destv, 0, p_dest );
968
969             p_s1   += 16;
970             p_s2   += 16;
971             p_dest += 16;
972         }
973     }
974     else
975     {
976         /* Aligned source */
977         vector unsigned char s1v, s2v, destv;
978
979         while( p_dest < p_end )
980         {
981             s1v   = vec_ld( 0, p_s1 );
982             s2v   = vec_ld( 0, p_s2 );
983             destv = vec_avg( s1v, s2v );
984             vec_st( destv, 0, p_dest );
985
986             p_s1   += 16;
987             p_s2   += 16;
988             p_dest += 16;
989         }
990     }
991
992     p_end += 15;
993
994     while( p_dest < p_end )
995     {
996         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
997     }
998 }
999 #endif
1000
1001 /*****************************************************************************
1002  * SendEvents: forward mouse and keyboard events to the parent p_vout
1003  *****************************************************************************/
1004 static int SendEvents( vlc_object_t *p_this, char const *psz_var,
1005                        vlc_value_t oldval, vlc_value_t newval, void *_p_vout )
1006 {
1007     vout_thread_t *p_vout = (vout_thread_t *)_p_vout;
1008     vlc_value_t sentval = newval;
1009
1010     if( !strcmp( psz_var, "mouse-y" ) )
1011     {
1012         switch( p_vout->p_sys->i_mode )
1013         {
1014             case DEINTERLACE_MEAN:
1015             case DEINTERLACE_DISCARD:
1016                 sentval.i_int *= 2;
1017                 break;
1018         }
1019     }
1020
1021     var_Set( p_vout, psz_var, sentval );
1022
1023     return VLC_SUCCESS;
1024 }
1025
1026 /*****************************************************************************
1027  * FilterCallback: called when changing the deinterlace method on the fly.
1028  *****************************************************************************/
1029 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1030                            vlc_value_t oldval, vlc_value_t newval,
1031                            void *p_data )
1032 {
1033     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1034     int i_old_mode = p_vout->p_sys->i_mode;
1035
1036     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1037
1038     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
1039
1040     SetFilterMethod( p_vout, newval.psz_string );
1041
1042     switch( p_vout->render.i_chroma )
1043     {
1044     case VLC_FOURCC('I','4','2','2'):
1045         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1046         return VLC_SUCCESS;
1047         break;
1048
1049     case VLC_FOURCC('I','4','2','0'):
1050     case VLC_FOURCC('I','Y','U','V'):
1051     case VLC_FOURCC('Y','V','1','2'):
1052         switch( p_vout->p_sys->i_mode )
1053         {
1054         case DEINTERLACE_MEAN:
1055         case DEINTERLACE_DISCARD:
1056             if( ( i_old_mode == DEINTERLACE_MEAN )
1057                 || ( i_old_mode == DEINTERLACE_DISCARD ) )
1058             {
1059                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1060                 return VLC_SUCCESS;
1061             }
1062             break;
1063
1064         case DEINTERLACE_BOB:
1065         case DEINTERLACE_BLEND:
1066         case DEINTERLACE_LINEAR:
1067             if( ( i_old_mode == DEINTERLACE_BOB )
1068                 || ( i_old_mode == DEINTERLACE_BLEND )
1069                 || ( i_old_mode == DEINTERLACE_LINEAR ) )
1070             {
1071                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1072                 return VLC_SUCCESS;
1073             }
1074             break;
1075         }
1076         break;
1077
1078     default:
1079         break;
1080     }
1081
1082     /* We need to kill the old vout */
1083
1084     DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
1085
1086     vlc_object_detach( p_vout->p_sys->p_vout );
1087     vout_Destroy( p_vout->p_sys->p_vout );
1088
1089     /* Try to open a new video output */
1090     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
1091
1092     if( p_vout->p_sys->p_vout == NULL )
1093     {
1094         /* Everything failed */
1095         msg_Err( p_vout, "cannot open vout, aborting" );
1096
1097         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1098         return VLC_EGENERIC;
1099     }
1100
1101     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
1102
1103     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1104     return VLC_SUCCESS;
1105 }
1106
1107 /*****************************************************************************
1108  * SendEventsToChild: forward events to the child/children vout
1109  *****************************************************************************/
1110 static int SendEventsToChild( vlc_object_t *p_this, char const *psz_var,
1111                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
1112 {
1113     vout_thread_t *p_vout = (vout_thread_t *)p_this;
1114     var_Set( p_vout->p_sys->p_vout, psz_var, newval );
1115     return VLC_SUCCESS;
1116 }