git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c

   1 /*****************************************************************************
   2  * deinterlace.c : deinterlacer plugin for vlc
   3  *****************************************************************************
   4  * Copyright (C) 2000, 2001, 2002, 2003 VideoLAN
   5  * $Id$
   6  *
   7  * Author: Sam Hocevar <sam@zoy.org>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 /*****************************************************************************
  25  * Preamble
  26  *****************************************************************************/
  27 #include <errno.h>
  28 #include <stdlib.h>                                      /* malloc(), free() */
  29 #include <string.h>
  30
  31 #include <vlc/vlc.h>
  32 #include <vlc/vout.h>
  33
  34 #ifdef HAVE_ALTIVEC_H
  35 #   include <altivec.h>
  36 #endif
  37
  38 #ifdef CAN_COMPILE_MMXEXT
  39 #   include "mmx.h"
  40 #endif
  41
  42 #include "filter_common.h"
  43
  44 #define DEINTERLACE_DISCARD 1
  45 #define DEINTERLACE_MEAN    2
  46 #define DEINTERLACE_BLEND   3
  47 #define DEINTERLACE_BOB     4
  48 #define DEINTERLACE_LINEAR  5
  49 #define DEINTERLACE_X       6
  50
  51 /*****************************************************************************
  52  * Local protypes
  53  *****************************************************************************/
  54 static int  Create    ( vlc_object_t * );
  55 static void Destroy   ( vlc_object_t * );
  56
  57 static int  Init      ( vout_thread_t * );
  58 static void End       ( vout_thread_t * );
  59 static void Render    ( vout_thread_t *, picture_t * );
  60
  61 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
  62 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
  63 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
  64 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
  65 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
  66 static void RenderX      ( vout_thread_t *, picture_t *, picture_t * );
  67
  68 static void MergeGeneric ( void *, const void *, const void *, size_t );
  69 #if defined(CAN_COMPILE_C_ALTIVEC)
  70 static void MergeAltivec ( void *, const void *, const void *, size_t );
  71 #endif
  72 #if defined(CAN_COMPILE_MMXEXT)
  73 static void MergeMMX     ( void *, const void *, const void *, size_t );
  74 #endif
  75 #if defined(CAN_COMPILE_SSE)
  76 static void MergeSSE2    ( void *, const void *, const void *, size_t );
  77 #endif
  78 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
  79 static void EndMMX       ( void );
  80 #endif
  81
  82 static int  SendEvents   ( vlc_object_t *, char const *,
  83                            vlc_value_t, vlc_value_t, void * );
  84
  85 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method );
  86 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
  87
  88 /*****************************************************************************
  89  * Callback prototypes
  90  *****************************************************************************/
  91 static int FilterCallback ( vlc_object_t *, char const *,
  92                             vlc_value_t, vlc_value_t, void * );
  93
  94 /*****************************************************************************
  95  * Module descriptor
  96  *****************************************************************************/
  97 #define MODE_TEXT N_("Deinterlace mode")
  98 #define MODE_LONGTEXT N_("You can choose the default deinterlace mode")
  99
 100 static char *mode_list[] = { "discard", "blend", "mean", "bob", "linear", "x" };
 101 static char *mode_list_text[] = { N_("Discard"), N_("Blend"), N_("Mean"),
 102                                   N_("Bob"), N_("Linear"), N_("X") };
 103
 104 vlc_module_begin();
 105     set_description( _("Deinterlacing video filter") );
 106     set_shortname( N_("Deinterlace" ));
 107     set_capability( "video filter", 0 );
 108     set_category( CAT_VIDEO );
 109     set_subcategory( SUBCAT_VIDEO_VFILTER );
 110
 111     add_string( "deinterlace-mode", "discard", NULL, MODE_TEXT,
 112                 MODE_LONGTEXT, VLC_FALSE );
 113         change_string_list( mode_list, mode_list_text, 0 );
 114
 115     add_shortcut( "deinterlace" );
 116     set_callbacks( Create, Destroy );
 117 vlc_module_end();
 118
 119 /*****************************************************************************
 120  * vout_sys_t: Deinterlace video output method descriptor
 121  *****************************************************************************
 122  * This structure is part of the video output thread descriptor.
 123  * It describes the Deinterlace specific properties of an output thread.
 124  *****************************************************************************/
 125 struct vout_sys_t
 126 {
 127     int        i_mode;        /* Deinterlace mode */
 128     vlc_bool_t b_double_rate; /* Shall we double the framerate? */
 129
 130     mtime_t    last_date;
 131     mtime_t    next_date;
 132
 133     vout_thread_t *p_vout;
 134
 135     vlc_mutex_t filter_lock;
 136
 137     void (*pf_merge) ( void *, const void *, const void *, size_t );
 138     void (*pf_end_merge) ( void );
 139 };
 140
 141 /*****************************************************************************
 142  * Control: control facility for the vout (forwards to child vout)
 143  *****************************************************************************/
 144 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
 145 {
 146     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
 147 }
 148
 149 /*****************************************************************************
 150  * Create: allocates Deinterlace video thread output method
 151  *****************************************************************************
 152  * This function allocates and initializes a Deinterlace vout method.
 153  *****************************************************************************/
 154 static int Create( vlc_object_t *p_this )
 155 {
 156     vout_thread_t *p_vout = (vout_thread_t *)p_this;
 157     vlc_value_t val;
 158
 159     /* Allocate structure */
 160     p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
 161     if( p_vout->p_sys == NULL )
 162     {
 163         msg_Err( p_vout, "out of memory" );
 164         return VLC_ENOMEM;
 165     }
 166
 167     p_vout->pf_init = Init;
 168     p_vout->pf_end = End;
 169     p_vout->pf_manage = NULL;
 170     p_vout->pf_render = Render;
 171     p_vout->pf_display = NULL;
 172     p_vout->pf_control = Control;
 173
 174     p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
 175     p_vout->p_sys->b_double_rate = 0;
 176     p_vout->p_sys->last_date = 0;
 177     p_vout->p_sys->p_vout = 0;
 178     vlc_mutex_init( p_vout, &p_vout->p_sys->filter_lock );
 179
 180 #if defined(CAN_COMPILE_C_ALTIVEC)
 181     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_ALTIVEC )
 182     {
 183         p_vout->p_sys->pf_merge = MergeAltivec;
 184         p_vout->p_sys->pf_end_merge = NULL;
 185     }
 186     else
 187 #endif
 188 #if defined(CAN_COMPILE_SSE)
 189     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_SSE2 )
 190     {
 191         p_vout->p_sys->pf_merge = MergeSSE2;
 192         p_vout->p_sys->pf_end_merge = EndMMX;
 193     }
 194     else
 195 #endif
 196 #if defined(CAN_COMPILE_MMXEXT)
 197     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMX )
 198     {
 199         p_vout->p_sys->pf_merge = MergeMMX;
 200         p_vout->p_sys->pf_end_merge = EndMMX;
 201     }
 202     else
 203 #endif
 204     {
 205         p_vout->p_sys->pf_merge = MergeGeneric;
 206         p_vout->p_sys->pf_end_merge = NULL;
 207     }
 208
 209     /* Look what method was requested */
 210     var_Create( p_vout, "deinterlace-mode", VLC_VAR_STRING );
 211     var_Change( p_vout, "deinterlace-mode", VLC_VAR_INHERITVALUE, &val, NULL );
 212
 213     if( val.psz_string == NULL )
 214     {
 215         msg_Err( p_vout, "configuration variable deinterlace-mode empty" );
 216         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
 217
 218         val.psz_string = strdup( "discard" );
 219     }
 220
 221     msg_Dbg( p_vout, "using %s deinterlace mode", val.psz_string );
 222
 223     SetFilterMethod( p_vout, val.psz_string );
 224
 225     free( val.psz_string );
 226
 227     var_AddCallback( p_vout, "deinterlace-mode", FilterCallback, NULL );
 228
 229     return VLC_SUCCESS;
 230 }
 231
 232 /*****************************************************************************
 233  * SetFilterMethod: setup the deinterlace method to use.
 234  *****************************************************************************/
 235 static void SetFilterMethod( vout_thread_t *p_vout, char *psz_method )
 236 {
 237     if( !strcmp( psz_method, "discard" ) )
 238     {
 239         p_vout->p_sys->i_mode = DEINTERLACE_DISCARD;
 240         p_vout->p_sys->b_double_rate = 0;
 241     }
 242     else if( !strcmp( psz_method, "mean" ) )
 243     {
 244         p_vout->p_sys->i_mode = DEINTERLACE_MEAN;
 245         p_vout->p_sys->b_double_rate = 0;
 246     }
 247     else if( !strcmp( psz_method, "blend" )
 248              || !strcmp( psz_method, "average" )
 249              || !strcmp( psz_method, "combine-fields" ) )
 250     {
 251         p_vout->p_sys->i_mode = DEINTERLACE_BLEND;
 252         p_vout->p_sys->b_double_rate = 0;
 253     }
 254     else if( !strcmp( psz_method, "bob" )
 255              || !strcmp( psz_method, "progressive-scan" ) )
 256     {
 257         p_vout->p_sys->i_mode = DEINTERLACE_BOB;
 258         p_vout->p_sys->b_double_rate = 1;
 259     }
 260     else if( !strcmp( psz_method, "linear" ) )
 261     {
 262         p_vout->p_sys->i_mode = DEINTERLACE_LINEAR;
 263         p_vout->p_sys->b_double_rate = 1;
 264     }
 265     else if( !strcmp( psz_method, "x" ) )
 266     {
 267         p_vout->p_sys->i_mode = DEINTERLACE_X;
 268         p_vout->p_sys->b_double_rate = 0;
 269     }
 270     else
 271     {
 272         msg_Err( p_vout, "no valid deinterlace mode provided, "
 273                  "using \"discard\"" );
 274     }
 275
 276     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
 277 }
 278
 279 /*****************************************************************************
 280  * Init: initialize Deinterlace video thread output method
 281  *****************************************************************************/
 282 static int Init( vout_thread_t *p_vout )
 283 {
 284     int i_index;
 285     picture_t *p_pic;
 286
 287     I_OUTPUTPICTURES = 0;
 288
 289     /* Initialize the output structure, full of directbuffers since we want
 290      * the decoder to output directly to our structures. */
 291     switch( p_vout->render.i_chroma )
 292     {
 293         case VLC_FOURCC('I','4','2','0'):
 294         case VLC_FOURCC('I','Y','U','V'):
 295         case VLC_FOURCC('Y','V','1','2'):
 296         case VLC_FOURCC('I','4','2','2'):
 297             p_vout->output.i_chroma = p_vout->render.i_chroma;
 298             p_vout->output.i_width  = p_vout->render.i_width;
 299             p_vout->output.i_height = p_vout->render.i_height;
 300             p_vout->output.i_aspect = p_vout->render.i_aspect;
 301             break;
 302
 303         default:
 304             return VLC_EGENERIC; /* unknown chroma */
 305             break;
 306     }
 307
 308     /* Try to open the real video output */
 309     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
 310
 311     if( p_vout->p_sys->p_vout == NULL )
 312     {
 313         /* Everything failed */
 314         msg_Err( p_vout, "cannot open vout, aborting" );
 315
 316         return VLC_EGENERIC;
 317     }
 318
 319     ALLOCATE_DIRECTBUFFERS( VOUT_MAX_PICTURES );
 320
 321     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
 322
 323     ADD_PARENT_CALLBACKS( SendEventsToChild );
 324
 325     return VLC_SUCCESS;
 326 }
 327
 328 /*****************************************************************************
 329  * SpawnRealVout: spawn the real video output.
 330  *****************************************************************************/
 331 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
 332 {
 333     vout_thread_t *p_real_vout = NULL;
 334     video_format_t fmt = {0};
 335
 336     msg_Dbg( p_vout, "spawning the real video output" );
 337
 338     fmt.i_width = fmt.i_visible_width = p_vout->output.i_width;
 339     fmt.i_height = fmt.i_visible_height = p_vout->output.i_height;
 340     fmt.i_x_offset = fmt.i_y_offset = 0;
 341     fmt.i_chroma = p_vout->output.i_chroma;
 342     fmt.i_aspect = p_vout->output.i_aspect;
 343     fmt.i_sar_num = p_vout->output.i_aspect * fmt.i_height / fmt.i_width;
 344     fmt.i_sar_den = VOUT_ASPECT_FACTOR;
 345
 346     switch( p_vout->render.i_chroma )
 347     {
 348     case VLC_FOURCC('I','4','2','0'):
 349     case VLC_FOURCC('I','Y','U','V'):
 350     case VLC_FOURCC('Y','V','1','2'):
 351         switch( p_vout->p_sys->i_mode )
 352         {
 353         case DEINTERLACE_MEAN:
 354         case DEINTERLACE_DISCARD:
 355             fmt.i_height = fmt.i_visible_height = p_vout->output.i_height / 2;
 356             p_real_vout = vout_Create( p_vout, &fmt );
 357             break;
 358
 359         case DEINTERLACE_BOB:
 360         case DEINTERLACE_BLEND:
 361         case DEINTERLACE_LINEAR:
 362         case DEINTERLACE_X:
 363             p_real_vout = vout_Create( p_vout, &fmt );
 364             break;
 365         }
 366         break;
 367
 368     case VLC_FOURCC('I','4','2','2'):
 369         fmt.i_chroma = VLC_FOURCC('I','4','2','0');
 370         p_real_vout = vout_Create( p_vout, &fmt );
 371         break;
 372
 373     default:
 374         break;
 375     }
 376
 377     return p_real_vout;
 378 }
 379
 380 /*****************************************************************************
 381  * End: terminate Deinterlace video thread output method
 382  *****************************************************************************/
 383 static void End( vout_thread_t *p_vout )
 384 {
 385     int i_index;
 386
 387     /* Free the fake output buffers we allocated */
 388     for( i_index = I_OUTPUTPICTURES ; i_index ; )
 389     {
 390         i_index--;
 391         free( PP_OUTPUTPICTURE[ i_index ]->p_data_orig );
 392     }
 393 }
 394
 395 /*****************************************************************************
 396  * Destroy: destroy Deinterlace video thread output method
 397  *****************************************************************************
 398  * Terminate an output method created by DeinterlaceCreateOutputMethod
 399  *****************************************************************************/
 400 static void Destroy( vlc_object_t *p_this )
 401 {
 402     vout_thread_t *p_vout = (vout_thread_t *)p_this;
 403
 404     if( p_vout->p_sys->p_vout )
 405     {
 406         DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
 407         vlc_object_detach( p_vout->p_sys->p_vout );
 408         vout_Destroy( p_vout->p_sys->p_vout );
 409     }
 410
 411     DEL_PARENT_CALLBACKS( SendEventsToChild );
 412
 413     free( p_vout->p_sys );
 414 }
 415
 416 /*****************************************************************************
 417  * Render: displays previously rendered output
 418  *****************************************************************************
 419  * This function send the currently rendered image to Deinterlace image,
 420  * waits until it is displayed and switch the two rendering buffers, preparing
 421  * next frame.
 422  *****************************************************************************/
 423 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
 424 {
 425     picture_t *pp_outpic[2];
 426
 427     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
 428
 429     /* Get a new picture */
 430     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
 431                                              0, 0, 0 ) )
 432               == NULL )
 433     {
 434         if( p_vout->b_die || p_vout->b_error )
 435         {
 436             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
 437             return;
 438         }
 439         msleep( VOUT_OUTMEM_SLEEP );
 440      }
 441
 442     vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[0], p_pic->date );
 443
 444     /* If we are using double rate, get an additional new picture */
 445     if( p_vout->p_sys->b_double_rate )
 446     {
 447         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
 448                                                  0, 0, 0 ) )
 449                   == NULL )
 450         {
 451             if( p_vout->b_die || p_vout->b_error )
 452             {
 453                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 454                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
 455                 return;
 456             }
 457             msleep( VOUT_OUTMEM_SLEEP );
 458         }
 459
 460         /* 20ms is a bit arbitrary, but it's only for the first image we get */
 461         if( !p_vout->p_sys->last_date )
 462         {
 463             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
 464                               p_pic->date + 20000 );
 465         }
 466         else
 467         {
 468             vout_DatePicture( p_vout->p_sys->p_vout, pp_outpic[1],
 469                       (3 * p_pic->date - p_vout->p_sys->last_date) / 2 );
 470         }
 471         p_vout->p_sys->last_date = p_pic->date;
 472     }
 473
 474     switch( p_vout->p_sys->i_mode )
 475     {
 476         case DEINTERLACE_DISCARD:
 477             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
 478             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 479             break;
 480
 481         case DEINTERLACE_BOB:
 482             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
 483             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 484             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
 485             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
 486             break;
 487
 488         case DEINTERLACE_LINEAR:
 489             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
 490             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 491             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
 492             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
 493             break;
 494
 495         case DEINTERLACE_MEAN:
 496             RenderMean( p_vout, pp_outpic[0], p_pic );
 497             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 498             break;
 499
 500         case DEINTERLACE_BLEND:
 501             RenderBlend( p_vout, pp_outpic[0], p_pic );
 502             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 503             break;
 504
 505         case DEINTERLACE_X:
 506             RenderX( p_vout, pp_outpic[0], p_pic );
 507             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 508             break;
 509     }
 510     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
 511 }
 512
 513 /*****************************************************************************
 514  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
 515  *****************************************************************************/
 516 static void RenderDiscard( vout_thread_t *p_vout,
 517                            picture_t *p_outpic, picture_t *p_pic, int i_field )
 518 {
 519     int i_plane;
 520
 521     /* Copy image and skip lines */
 522     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 523     {
 524         uint8_t *p_in, *p_out_end, *p_out;
 525         int i_increment;
 526
 527         p_in = p_pic->p[i_plane].p_pixels
 528                    + i_field * p_pic->p[i_plane].i_pitch;
 529
 530         p_out = p_outpic->p[i_plane].p_pixels;
 531         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 532                              * p_outpic->p[i_plane].i_visible_lines;
 533
 534         switch( p_vout->render.i_chroma )
 535         {
 536         case VLC_FOURCC('I','4','2','0'):
 537         case VLC_FOURCC('I','Y','U','V'):
 538         case VLC_FOURCC('Y','V','1','2'):
 539
 540             for( ; p_out < p_out_end ; )
 541             {
 542                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
 543                                           p_pic->p[i_plane].i_pitch );
 544
 545                 p_out += p_pic->p[i_plane].i_pitch;
 546                 p_in += 2 * p_pic->p[i_plane].i_pitch;
 547             }
 548             break;
 549
 550         case VLC_FOURCC('I','4','2','2'):
 551
 552             i_increment = 2 * p_pic->p[i_plane].i_pitch;
 553
 554             if( i_plane == Y_PLANE )
 555             {
 556                 for( ; p_out < p_out_end ; )
 557                 {
 558                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 559                                               p_pic->p[i_plane].i_pitch );
 560                     p_out += p_pic->p[i_plane].i_pitch;
 561                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 562                                               p_pic->p[i_plane].i_pitch );
 563                     p_out += p_pic->p[i_plane].i_pitch;
 564                     p_in += i_increment;
 565                 }
 566             }
 567             else
 568             {
 569                 for( ; p_out < p_out_end ; )
 570                 {
 571                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 572                                               p_pic->p[i_plane].i_pitch );
 573                     p_out += p_pic->p[i_plane].i_pitch;
 574                     p_in += i_increment;
 575                 }
 576             }
 577             break;
 578
 579         default:
 580             break;
 581         }
 582     }
 583 }
 584
 585 /*****************************************************************************
 586  * RenderBob: renders a BOB picture - simple copy
 587  *****************************************************************************/
 588 static void RenderBob( vout_thread_t *p_vout,
 589                        picture_t *p_outpic, picture_t *p_pic, int i_field )
 590 {
 591     int i_plane;
 592
 593     /* Copy image and skip lines */
 594     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 595     {
 596         uint8_t *p_in, *p_out_end, *p_out;
 597
 598         p_in = p_pic->p[i_plane].p_pixels;
 599         p_out = p_outpic->p[i_plane].p_pixels;
 600         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 601                              * p_outpic->p[i_plane].i_visible_lines;
 602
 603         switch( p_vout->render.i_chroma )
 604         {
 605             case VLC_FOURCC('I','4','2','0'):
 606             case VLC_FOURCC('I','Y','U','V'):
 607             case VLC_FOURCC('Y','V','1','2'):
 608                 /* For BOTTOM field we need to add the first line */
 609                 if( i_field == 1 )
 610                 {
 611                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 612                                               p_pic->p[i_plane].i_pitch );
 613                     p_in += p_pic->p[i_plane].i_pitch;
 614                     p_out += p_pic->p[i_plane].i_pitch;
 615                 }
 616
 617                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
 618
 619                 for( ; p_out < p_out_end ; )
 620                 {
 621                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 622                                               p_pic->p[i_plane].i_pitch );
 623
 624                     p_out += p_pic->p[i_plane].i_pitch;
 625
 626                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 627                                               p_pic->p[i_plane].i_pitch );
 628
 629                     p_in += 2 * p_pic->p[i_plane].i_pitch;
 630                     p_out += p_pic->p[i_plane].i_pitch;
 631                 }
 632
 633                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
 634                                           p_pic->p[i_plane].i_pitch );
 635
 636                 /* For TOP field we need to add the last line */
 637                 if( i_field == 0 )
 638                 {
 639                     p_in += p_pic->p[i_plane].i_pitch;
 640                     p_out += p_pic->p[i_plane].i_pitch;
 641                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 642                                               p_pic->p[i_plane].i_pitch );
 643                 }
 644                 break;
 645
 646             case VLC_FOURCC('I','4','2','2'):
 647                 /* For BOTTOM field we need to add the first line */
 648                 if( i_field == 1 )
 649                 {
 650                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 651                                               p_pic->p[i_plane].i_pitch );
 652                     p_in += p_pic->p[i_plane].i_pitch;
 653                     p_out += p_pic->p[i_plane].i_pitch;
 654                 }
 655
 656                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
 657
 658                 if( i_plane == Y_PLANE )
 659                 {
 660                     for( ; p_out < p_out_end ; )
 661                     {
 662                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
 663                                                   p_pic->p[i_plane].i_pitch );
 664
 665                         p_out += p_pic->p[i_plane].i_pitch;
 666
 667                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
 668                                                   p_pic->p[i_plane].i_pitch );
 669
 670                         p_in += 2 * p_pic->p[i_plane].i_pitch;
 671                         p_out += p_pic->p[i_plane].i_pitch;
 672                     }
 673                 }
 674                 else
 675                 {
 676                     for( ; p_out < p_out_end ; )
 677                     {
 678                         p_vout->p_vlc->pf_memcpy( p_out, p_in,
 679                                                   p_pic->p[i_plane].i_pitch );
 680
 681                         p_out += p_pic->p[i_plane].i_pitch;
 682                         p_in += 2 * p_pic->p[i_plane].i_pitch;
 683                     }
 684                 }
 685
 686                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
 687                                           p_pic->p[i_plane].i_pitch );
 688
 689                 /* For TOP field we need to add the last line */
 690                 if( i_field == 0 )
 691                 {
 692                     p_in += p_pic->p[i_plane].i_pitch;
 693                     p_out += p_pic->p[i_plane].i_pitch;
 694                     p_vout->p_vlc->pf_memcpy( p_out, p_in,
 695                                               p_pic->p[i_plane].i_pitch );
 696                 }
 697                 break;
 698         }
 699     }
 700 }
 701
 702 #define Merge p_vout->p_sys->pf_merge
 703 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
 704
 705 /*****************************************************************************
 706  * RenderLinear: BOB with linear interpolation
 707  *****************************************************************************/
 708 static void RenderLinear( vout_thread_t *p_vout,
 709                           picture_t *p_outpic, picture_t *p_pic, int i_field )
 710 {
 711     int i_plane;
 712
 713     /* Copy image and skip lines */
 714     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 715     {
 716         uint8_t *p_in, *p_out_end, *p_out;
 717
 718         p_in = p_pic->p[i_plane].p_pixels;
 719         p_out = p_outpic->p[i_plane].p_pixels;
 720         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 721                              * p_outpic->p[i_plane].i_visible_lines;
 722
 723         /* For BOTTOM field we need to add the first line */
 724         if( i_field == 1 )
 725         {
 726             p_vout->p_vlc->pf_memcpy( p_out, p_in,
 727                                       p_pic->p[i_plane].i_pitch );
 728             p_in += p_pic->p[i_plane].i_pitch;
 729             p_out += p_pic->p[i_plane].i_pitch;
 730         }
 731
 732         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
 733
 734         for( ; p_out < p_out_end ; )
 735         {
 736             p_vout->p_vlc->pf_memcpy( p_out, p_in,
 737                                       p_pic->p[i_plane].i_pitch );
 738
 739             p_out += p_pic->p[i_plane].i_pitch;
 740
 741             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
 742                    p_pic->p[i_plane].i_pitch );
 743
 744             p_in += 2 * p_pic->p[i_plane].i_pitch;
 745             p_out += p_pic->p[i_plane].i_pitch;
 746         }
 747
 748         p_vout->p_vlc->pf_memcpy( p_out, p_in,
 749                                   p_pic->p[i_plane].i_pitch );
 750
 751         /* For TOP field we need to add the last line */
 752         if( i_field == 0 )
 753         {
 754             p_in += p_pic->p[i_plane].i_pitch;
 755             p_out += p_pic->p[i_plane].i_pitch;
 756             p_vout->p_vlc->pf_memcpy( p_out, p_in,
 757                                       p_pic->p[i_plane].i_pitch );
 758         }
 759     }
 760     EndMerge();
 761 }
 762
 763 static void RenderMean( vout_thread_t *p_vout,
 764                         picture_t *p_outpic, picture_t *p_pic )
 765 {
 766     int i_plane;
 767
 768     /* Copy image and skip lines */
 769     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 770     {
 771         uint8_t *p_in, *p_out_end, *p_out;
 772
 773         p_in = p_pic->p[i_plane].p_pixels;
 774
 775         p_out = p_outpic->p[i_plane].p_pixels;
 776         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 777                              * p_outpic->p[i_plane].i_visible_lines;
 778
 779         /* All lines: mean value */
 780         for( ; p_out < p_out_end ; )
 781         {
 782             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 783                    p_pic->p[i_plane].i_pitch );
 784
 785             p_out += p_pic->p[i_plane].i_pitch;
 786             p_in += 2 * p_pic->p[i_plane].i_pitch;
 787         }
 788     }
 789     EndMerge();
 790 }
 791
 792 static void RenderBlend( vout_thread_t *p_vout,
 793                          picture_t *p_outpic, picture_t *p_pic )
 794 {
 795     int i_plane;
 796
 797     /* Copy image and skip lines */
 798     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 799     {
 800         uint8_t *p_in, *p_out_end, *p_out;
 801
 802         p_in = p_pic->p[i_plane].p_pixels;
 803
 804         p_out = p_outpic->p[i_plane].p_pixels;
 805         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 806                              * p_outpic->p[i_plane].i_visible_lines;
 807
 808         switch( p_vout->render.i_chroma )
 809         {
 810             case VLC_FOURCC('I','4','2','0'):
 811             case VLC_FOURCC('I','Y','U','V'):
 812             case VLC_FOURCC('Y','V','1','2'):
 813                 /* First line: simple copy */
 814                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
 815                                           p_pic->p[i_plane].i_pitch );
 816                 p_out += p_pic->p[i_plane].i_pitch;
 817
 818                 /* Remaining lines: mean value */
 819                 for( ; p_out < p_out_end ; )
 820                 {
 821                    Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 822                           p_pic->p[i_plane].i_pitch );
 823
 824                     p_out += p_pic->p[i_plane].i_pitch;
 825                     p_in += p_pic->p[i_plane].i_pitch;
 826                 }
 827                 break;
 828
 829             case VLC_FOURCC('I','4','2','2'):
 830                 /* First line: simple copy */
 831                 p_vout->p_vlc->pf_memcpy( p_out, p_in,
 832                                           p_pic->p[i_plane].i_pitch );
 833                 p_out += p_pic->p[i_plane].i_pitch;
 834
 835                 /* Remaining lines: mean value */
 836                 if( i_plane == Y_PLANE )
 837                 {
 838                     for( ; p_out < p_out_end ; )
 839                     {
 840                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 841                                p_pic->p[i_plane].i_pitch );
 842
 843                         p_out += p_pic->p[i_plane].i_pitch;
 844                         p_in += p_pic->p[i_plane].i_pitch;
 845                     }
 846                 }
 847
 848                 else
 849                 {
 850                     for( ; p_out < p_out_end ; )
 851                     {
 852                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 853                                p_pic->p[i_plane].i_pitch );
 854
 855                         p_out += p_pic->p[i_plane].i_pitch;
 856                         p_in += 2*p_pic->p[i_plane].i_pitch;
 857                     }
 858                 }
 859                 break;
 860         }
 861     }
 862     EndMerge();
 863 }
 864
 865 #undef Merge
 866
 867 static void MergeGeneric( void *_p_dest, const void *_p_s1,
 868                           const void *_p_s2, size_t i_bytes )
 869 {
 870     uint8_t* p_dest = (uint8_t*)_p_dest;
 871     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
 872     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
 873     uint8_t* p_end = p_dest + i_bytes - 8;
 874
 875     while( p_dest < p_end )
 876     {
 877         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 878         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 879         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 880         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 881         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 882         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 883         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 884         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 885     }
 886
 887     p_end += 8;
 888
 889     while( p_dest < p_end )
 890     {
 891         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 892     }
 893 }
 894
 895 #if defined(CAN_COMPILE_MMXEXT)
 896 static void MergeMMX( void *_p_dest, const void *_p_s1, const void *_p_s2,
 897                       size_t i_bytes )
 898 {
 899     uint8_t* p_dest = (uint8_t*)_p_dest;
 900     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
 901     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
 902     uint8_t* p_end = p_dest + i_bytes - 8;
 903     while( p_dest < p_end )
 904     {
 905         __asm__  __volatile__( "movq %2,%%mm1;"
 906                                "pavgb %1, %%mm1;"
 907                                "movq %%mm1, %0" :"=m" (*p_dest):
 908                                                  "m" (*p_s1),
 909                                                  "m" (*p_s2) );
 910         p_dest += 8;
 911         p_s1 += 8;
 912         p_s2 += 8;
 913     }
 914
 915     p_end += 8;
 916
 917     while( p_dest < p_end )
 918     {
 919         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 920     }
 921 }
 922 #endif
 923
 924 #if defined(CAN_COMPILE_SSE)
 925 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
 926                        size_t i_bytes )
 927 {
 928     uint8_t* p_dest = (uint8_t*)_p_dest;
 929     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
 930     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
 931     uint8_t* p_end;
 932     while( (int)p_s1 % 16 )
 933     {
 934         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 935     }
 936     p_end = p_dest + i_bytes - 16;
 937     while( p_dest < p_end )
 938     {
 939         __asm__  __volatile__( "movdqu %2,%%xmm1;"
 940                                "pavgb %1, %%xmm1;"
 941                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
 942                                                  "m" (*p_s1),
 943                                                  "m" (*p_s2) );
 944         p_dest += 16;
 945         p_s1 += 16;
 946         p_s2 += 16;
 947     }
 948
 949     p_end += 16;
 950
 951     while( p_dest < p_end )
 952     {
 953         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 954     }
 955 }
 956 #endif
 957
 958 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
 959 static void EndMMX( void )
 960 {
 961     __asm__ __volatile__( "emms" :: );
 962 }
 963 #endif
 964
 965 #ifdef CAN_COMPILE_C_ALTIVEC
 966 static void MergeAltivec( void *_p_dest, const void *_p_s1,
 967                           const void *_p_s2, size_t i_bytes )
 968 {
 969     uint8_t *p_dest = (uint8_t *)_p_dest;
 970     uint8_t *p_s1   = (uint8_t *)_p_s1;
 971     uint8_t *p_s2   = (uint8_t *)_p_s2;
 972     uint8_t *p_end  = p_dest + i_bytes - 15;
 973
 974     /* Use C until the first 16-bytes aligned destination pixel */
 975     while( (int)p_dest & 0xF )
 976     {
 977         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 978     }
 979
 980     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
 981     {
 982         /* Unaligned source */
 983         vector unsigned char s1v, s2v, destv;
 984         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
 985         vector unsigned char perm1v, perm2v;
 986
 987         perm1v = vec_lvsl( 0, p_s1 );
 988         perm2v = vec_lvsl( 0, p_s2 );
 989         s1oldv = vec_ld( 0, p_s1 );
 990         s2oldv = vec_ld( 0, p_s2 );
 991
 992         while( p_dest < p_end )
 993         {
 994             s1newv = vec_ld( 16, p_s1 );
 995             s2newv = vec_ld( 16, p_s2 );
 996             s1v    = vec_perm( s1oldv, s1newv, perm1v );
 997             s2v    = vec_perm( s2oldv, s2newv, perm2v );
 998             s1oldv = s1newv;
 999             s2oldv = s2newv;
1000             destv  = vec_avg( s1v, s2v );
1001             vec_st( destv, 0, p_dest );
1002
1003             p_s1   += 16;
1004             p_s2   += 16;
1005             p_dest += 16;
1006         }
1007     }
1008     else
1009     {
1010         /* Aligned source */
1011         vector unsigned char s1v, s2v, destv;
1012
1013         while( p_dest < p_end )
1014         {
1015             s1v   = vec_ld( 0, p_s1 );
1016             s2v   = vec_ld( 0, p_s2 );
1017             destv = vec_avg( s1v, s2v );
1018             vec_st( destv, 0, p_dest );
1019
1020             p_s1   += 16;
1021             p_s2   += 16;
1022             p_dest += 16;
1023         }
1024     }
1025
1026     p_end += 15;
1027
1028     while( p_dest < p_end )
1029     {
1030         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1031     }
1032 }
1033 #endif
1034
1035 /*****************************************************************************
1036  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1037  * and apply a process to recreate the bottom field :
1038  *  If a 8x8 block is classified as :
1039  *   - progressive: it applies a small blend (1,6,1)
1040  *   - interlaced:
1041  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1042  *    good match we use MC to recreate the bottom field (with a small
1043  *    blend (1,6,1) )
1044  *    * otherwise: it recreates the bottom field by an edge oriented
1045  *    interpolation.
1046   *****************************************************************************/
1047
1048 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1049  * XXX: It need to access to 8x10
1050  * We use more than 8 lines to help with scrolling (text)
1051  * (and because XDeint8x8Frame use line 9)
1052  * XXX: smooth/uniform area with noise detection doesn't works well
1053  * but it's not really a problem because they don't have much details anyway
1054  */
1055 static inline int ssd( int a ) { return a*a; }
1056 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1057 {
1058     int y, x;
1059     int ff, fr;
1060     int fc;
1061
1062     /* Detect interlacing */
1063     fc = 0;
1064     for( y = 0; y < 7; y += 2 )
1065     {
1066         ff = fr = 0;
1067         for( x = 0; x < 8; x++ )
1068         {
1069             fr += ssd(src[      x] - src[1*i_src+x]) +
1070                   ssd(src[i_src+x] - src[2*i_src+x]);
1071             ff += ssd(src[      x] - src[2*i_src+x]) +
1072                   ssd(src[i_src+x] - src[3*i_src+x]);
1073         }
1074         if( ff < 6*fr/8 && fr > 32 )
1075             fc++;
1076
1077         src += 2*i_src;
1078     }
1079
1080     return fc < 1 ? VLC_FALSE : VLC_TRUE;
1081 }
1082 #ifdef CAN_COMPILE_MMXEXT
1083 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1084 {
1085
1086     int y, x;
1087     int32_t ff, fr;
1088     int fc;
1089
1090     /* Detect interlacing */
1091     fc = 0;
1092     pxor_r2r( mm7, mm7 );
1093     for( y = 0; y < 9; y += 2 )
1094     {
1095         ff = fr = 0;
1096         pxor_r2r( mm5, mm5 );
1097         pxor_r2r( mm6, mm6 );
1098         for( x = 0; x < 8; x+=4 )
1099         {
1100             movd_m2r( src[        x], mm0 );
1101             movd_m2r( src[1*i_src+x], mm1 );
1102             movd_m2r( src[2*i_src+x], mm2 );
1103             movd_m2r( src[3*i_src+x], mm3 );
1104
1105             punpcklbw_r2r( mm7, mm0 );
1106             punpcklbw_r2r( mm7, mm1 );
1107             punpcklbw_r2r( mm7, mm2 );
1108             punpcklbw_r2r( mm7, mm3 );
1109
1110             movq_r2r( mm0, mm4 );
1111
1112             psubw_r2r( mm1, mm0 );
1113             psubw_r2r( mm2, mm4 );
1114
1115             psubw_r2r( mm1, mm2 );
1116             psubw_r2r( mm1, mm3 );
1117
1118             pmaddwd_r2r( mm0, mm0 );
1119             pmaddwd_r2r( mm4, mm4 );
1120             pmaddwd_r2r( mm2, mm2 );
1121             pmaddwd_r2r( mm3, mm3 );
1122             paddd_r2r( mm0, mm2 );
1123             paddd_r2r( mm4, mm3 );
1124             paddd_r2r( mm2, mm5 );
1125             paddd_r2r( mm3, mm6 );
1126         }
1127
1128         movq_r2r( mm5, mm0 );
1129         psrlq_i2r( 32, mm0 );
1130         paddd_r2r( mm0, mm5 );
1131         movd_r2m( mm5, fr );
1132
1133         movq_r2r( mm6, mm0 );
1134         psrlq_i2r( 32, mm0 );
1135         paddd_r2r( mm0, mm6 );
1136         movd_r2m( mm6, ff );
1137
1138         if( ff < 6*fr/8 && fr > 32 )
1139             fc++;
1140
1141         src += 2*i_src;
1142     }
1143     return fc;
1144 }
1145 #endif
1146
1147 /* XDeint8x8Frame: apply a small blend between field (1,6,1).
1148  * This won't destroy details, and help if there is a bit of interlacing.
1149  * (It helps with paning to avoid flickers)
1150  * (Use 8x9 pixels)
1151  */
1152 #if 0
1153 static inline void XDeint8x8FrameC( uint8_t *dst, int i_dst,
1154                                     uint8_t *src, int i_src )
1155 {
1156     int y, x;
1157
1158     /* Progressive */
1159     for( y = 0; y < 8; y += 2 )
1160     {
1161         memcpy( dst, src, 8 );
1162         dst += i_dst;
1163
1164         for( x = 0; x < 8; x++ )
1165             dst[x] = (src[x] + 6*src[1*i_src+x] + src[2*i_src+x] + 4 ) >> 3;
1166         dst += 1*i_dst;
1167         src += 2*i_src;
1168     }
1169 }
1170 #endif
1171 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1172                                     uint8_t *src1, int i_src1,
1173                                     uint8_t *src2, int i_src2 )
1174 {
1175     int y, x;
1176
1177     /* Progressive */
1178     for( y = 0; y < 8; y += 2 )
1179     {
1180         memcpy( dst, src1, 8 );
1181         dst  += i_dst;
1182
1183         for( x = 0; x < 8; x++ )
1184             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1185         dst += i_dst;
1186
1187         src1 += i_src1;
1188         src2 += i_src2;
1189     }
1190 }
1191
1192 #ifdef CAN_COMPILE_MMXEXT
1193 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1194                                          uint8_t *src1, int i_src1,
1195                                          uint8_t *src2, int i_src2 )
1196 {
1197     static const uint64_t m_4 = I64C(0x0004000400040004);
1198     int y, x;
1199
1200     /* Progressive */
1201     pxor_r2r( mm7, mm7 );
1202     for( y = 0; y < 8; y += 2 )
1203     {
1204         for( x = 0; x < 8; x +=4 )
1205         {
1206             movd_m2r( src1[x], mm0 );
1207             movd_r2m( mm0, dst[x] );
1208
1209             movd_m2r( src2[x], mm1 );
1210             movd_m2r( src1[i_src1+x], mm2 );
1211
1212             punpcklbw_r2r( mm7, mm0 );
1213             punpcklbw_r2r( mm7, mm1 );
1214             punpcklbw_r2r( mm7, mm2 );
1215             paddw_r2r( mm1, mm1 );
1216             movq_r2r( mm1, mm3 );
1217             paddw_r2r( mm3, mm3 );
1218             paddw_r2r( mm2, mm0 );
1219             paddw_r2r( mm3, mm1 );
1220             paddw_m2r( m_4, mm1 );
1221             paddw_r2r( mm1, mm0 );
1222             psraw_i2r( 3, mm0 );
1223             packuswb_r2r( mm7, mm0 );
1224             movd_r2m( mm0, dst[i_dst+x] );
1225         }
1226         dst += 2*i_dst;
1227         src1 += i_src1;
1228         src2 += i_src2;
1229     }
1230 }
1231
1232 #endif
1233
1234 /* For debug */
1235 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1236 {
1237     int y;
1238     for( y = 0; y < 8; y++ )
1239         memset( &dst[y*i_dst], v, 8 );
1240 }
1241
1242 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1243  * neighbour
1244  * (Use 8x9 pixels)
1245  * TODO: a better one for the inner part.
1246  */
1247 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1248                                      uint8_t *src, int i_src )
1249 {
1250     int y, x;
1251
1252     /* Interlaced */
1253     for( y = 0; y < 8; y += 2 )
1254     {
1255         memcpy( dst, src, 8 );
1256         dst += i_dst;
1257
1258         for( x = 0; x < 8; x++ )
1259             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1260         dst += 1*i_dst;
1261         src += 2*i_src;
1262     }
1263 }
1264 #ifdef CAN_COMPILE_MMXEXT
1265 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1266                                           uint8_t *src, int i_src )
1267 {
1268     int y;
1269
1270     /* Interlaced */
1271     for( y = 0; y < 8; y += 2 )
1272     {
1273         movq_m2r( src[0], mm0 );
1274         movq_r2m( mm0, dst[0] );
1275         dst += i_dst;
1276
1277         movq_m2r( src[2*i_src], mm1 );
1278         pavgb_r2r( mm1, mm0 );
1279
1280         movq_r2m( mm0, dst[0] );
1281
1282         dst += 1*i_dst;
1283         src += 2*i_src;
1284     }
1285 }
1286 #endif
1287
1288 /* XDeint8x8Field: Edge oriented interpolation
1289  * (Need -4 and +5 pixels H, +1 line)
1290  */
1291 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1292                                     uint8_t *src, int i_src )
1293 {
1294     int y, x;
1295
1296     /* Interlaced */
1297     for( y = 0; y < 8; y += 2 )
1298     {
1299         memcpy( dst, src, 8 );
1300         dst += i_dst;
1301
1302         for( x = 0; x < 8; x++ )
1303         {
1304             uint8_t *src2 = &src[2*i_src];
1305             /* I use 8 pixels just to match the MMX version, but it's overkill
1306              * 5 would be enough (less isn't good) */
1307             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1308                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1309                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1310                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1311
1312             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1313                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1314                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1315                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1316
1317             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1318                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1319                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1320                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1321
1322             if( c0 < c1 && c1 <= c2 )
1323                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1324             else if( c2 < c1 && c1 <= c0 )
1325                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1326             else
1327                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1328         }
1329
1330         dst += 1*i_dst;
1331         src += 2*i_src;
1332     }
1333 }
1334 #ifdef CAN_COMPILE_MMXEXT
1335 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1336                                          uint8_t *src, int i_src )
1337 {
1338     int y, x;
1339
1340     /* Interlaced */
1341     for( y = 0; y < 8; y += 2 )
1342     {
1343         memcpy( dst, src, 8 );
1344         dst += i_dst;
1345
1346         for( x = 0; x < 8; x++ )
1347         {
1348             uint8_t *src2 = &src[2*i_src];
1349             int32_t c0, c1, c2;
1350
1351             movq_m2r( src[x-2], mm0 );
1352             movq_m2r( src[x-3], mm1 );
1353             movq_m2r( src[x-4], mm2 );
1354
1355             psadbw_m2r( src2[x-4], mm0 );
1356             psadbw_m2r( src2[x-3], mm1 );
1357             psadbw_m2r( src2[x-2], mm2 );
1358
1359             movd_r2m( mm0, c2 );
1360             movd_r2m( mm1, c1 );
1361             movd_r2m( mm2, c0 );
1362
1363             if( c0 < c1 && c1 <= c2 )
1364                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1365             else if( c2 < c1 && c1 <= c0 )
1366                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1367             else
1368                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1369         }
1370
1371         dst += 1*i_dst;
1372         src += 2*i_src;
1373     }
1374 }
1375 #endif
1376
1377 #if 0
1378 static inline int XDeint8x8SsdC( uint8_t *pix1, int i_pix1,
1379                                  uint8_t *pix2, int i_pix2 )
1380 {
1381     int y, x;
1382     int s = 0;
1383
1384     for( y = 0; y < 8; y++ )
1385         for( x = 0; x < 8; x++ )
1386             s += ssd( pix1[y*i_pix1+x] - pix2[y*i_pix2+x] );
1387     return s;
1388 }
1389
1390 #ifdef CAN_COMPILE_MMXEXT
1391 static inline int XDeint8x8SsdMMXEXT( uint8_t *pix1, int i_pix1,
1392                                       uint8_t *pix2, int i_pix2 )
1393 {
1394     int y;
1395     int32_t s;
1396
1397     pxor_r2r( mm7, mm7 );
1398     pxor_r2r( mm6, mm6 );
1399
1400     for( y = 0; y < 8; y++ )
1401     {
1402         movq_m2r( pix1[0], mm0 );
1403         movq_m2r( pix2[0], mm1 );
1404
1405         movq_r2r( mm0, mm2 );
1406         movq_r2r( mm1, mm3 );
1407
1408         punpcklbw_r2r( mm7, mm0 );
1409         punpckhbw_r2r( mm7, mm2 );
1410         punpcklbw_r2r( mm7, mm1 );
1411         punpckhbw_r2r( mm7, mm3 );
1412
1413         psubw_r2r( mm1, mm0 );
1414         psubw_r2r( mm3, mm2 );
1415
1416         pmaddwd_r2r( mm0, mm0 );
1417         pmaddwd_r2r( mm2, mm2 );
1418
1419         paddd_r2r( mm2, mm0 );
1420         paddd_r2r( mm0, mm6 );
1421
1422         pix1 += i_pix1;
1423         pix2 += i_pix2;
1424     }
1425
1426     movq_r2r( mm6, mm7 );
1427     psrlq_i2r( 32, mm7 );
1428     paddd_r2r( mm6, mm7 );
1429     movd_r2m( mm7, s );
1430
1431     return s;
1432 }
1433 #endif
1434 #endif
1435
1436 #if 0
1437 /* A little try with motion, but doesn't work better that pure intra (and slow) */
1438 #ifdef CAN_COMPILE_MMXEXT
1439 /* XDeintMC:
1440  *  Bilinear MC QPel
1441  *  TODO: mmx version (easier in sse2)
1442  */
1443 static inline void XDeintMC( uint8_t *dst, int i_dst,
1444                              uint8_t *src, int i_src,
1445                              int mvx, int mvy,
1446                              int i_width, int i_height )
1447 {
1448     const int d4x = mvx&0x03;
1449     const int d4y = mvy&0x03;
1450
1451     const int cA = (4-d4x)*(4-d4y);
1452     const int cB = d4x    *(4-d4y);
1453     const int cC = (4-d4x)*d4y;
1454     const int cD = d4x    *d4y;
1455
1456     int y, x;
1457     uint8_t *srcp;
1458
1459
1460     src  += (mvy >> 2) * i_src + (mvx >> 2);
1461     srcp = &src[i_src];
1462
1463     for( y = 0; y < i_height; y++ )
1464     {
1465         for( x = 0; x < i_width; x++ )
1466         {
1467             dst[x] = ( cA*src[x]  + cB*src[x+1] +
1468                        cC*srcp[x] + cD*srcp[x+1] + 8 ) >> 4;
1469         }
1470         dst  += i_dst;
1471
1472         src   = srcp;
1473         srcp += i_src;
1474     }
1475 }
1476 static int XDeint8x4SadMMXEXT( uint8_t *pix1, int i_pix1,
1477                                uint8_t *pix2, int i_pix2 )
1478 {
1479     int32_t s;
1480
1481     movq_m2r( pix1[0*i_pix1], mm0 );
1482     movq_m2r( pix1[1*i_pix1], mm1 );
1483
1484     psadbw_m2r( pix2[0*i_pix2], mm0 );
1485     psadbw_m2r( pix2[1*i_pix2], mm1 );
1486
1487     movq_m2r( pix1[2*i_pix1], mm2 );
1488     movq_m2r( pix1[3*i_pix1], mm3 );
1489     psadbw_m2r( pix2[2*i_pix2], mm2 );
1490     psadbw_m2r( pix2[3*i_pix2], mm3 );
1491
1492     paddd_r2r( mm1, mm0 );
1493     paddd_r2r( mm3, mm2 );
1494     paddd_r2r( mm2, mm0 );
1495     movd_r2m( mm0, s );
1496
1497     return s;
1498 }
1499
1500 static inline int XDeint8x4TestQpel( uint8_t *src, int i_src,
1501                                      uint8_t *ref, int i_stride,
1502                                      int mx, int my,
1503                                      int xmax, int ymax )
1504 {
1505     uint8_t buffer[8*4];
1506
1507     if( abs(mx) >= 4*xmax || abs(my) >= 4*ymax )
1508         return 255*255*255;
1509
1510     XDeintMC( buffer, 8, ref, i_stride, mx, my, 8, 4 );
1511     return XDeint8x4SadMMXEXT( src, i_src, buffer, 8 );
1512 }
1513 static inline int XDeint8x4TestInt( uint8_t *src, int i_src,
1514                                     uint8_t *ref, int i_stride,
1515                                     int mx, int my,
1516                                     int xmax, int ymax )
1517 {
1518     if( abs(mx) >= xmax || abs(my) >= ymax )
1519         return 255*255*255;
1520
1521     return XDeint8x4SadMMXEXT( src, i_src, &ref[my*i_stride+mx], i_stride );
1522 }
1523
1524 static inline void XDeint8x8FieldMotion( uint8_t *dst, int i_dst,
1525                                          uint8_t *src, int i_src,
1526                                          int *mpx, int *mpy,
1527                                          int xmax, int ymax )
1528 {
1529     static const int dx[8] = { 0,  0, -1, 1, -1, -1,  1, 1 };
1530     static const int dy[8] = {-1,  1,  0, 0, -1,  1, -1, 1 };
1531     uint8_t *next = &src[i_src];
1532     const int i_src2 = 2*i_src;
1533     int mvx, mvy;
1534     int mvs, s;
1535     int i_step;
1536
1537     uint8_t *rec = &dst[i_dst];
1538
1539     /* We construct with intra method the missing field */
1540     XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1541
1542     /* Now we will try to find a match with ME with the other field */
1543
1544     /* ME: A small/partial EPZS
1545      * We search only for small MV (with high motion intra will be perfect */
1546     if( xmax > 4 ) xmax = 4;
1547     if( ymax > 4 ) ymax = 4;
1548
1549     /* Init with NULL Mv */
1550     mvx = mvy = 0;
1551     mvs = XDeint8x4SadMMXEXT( rec, i_src2, next, i_src2 );
1552
1553     /* Try predicted Mv */
1554     if( (s=XDeint8x4TestInt( rec, i_src2, next, i_src2, *mpx, *mpy, xmax, ymax)) < mvs )
1555     {
1556         mvs = s;
1557         mvx = *mpx;
1558         mvy = *mpy;
1559     }
1560     /* Search interger pel (small mv) */
1561     for( i_step = 0; i_step < 4; i_step++ )
1562     {
1563         int c = 4;
1564         int s;
1565         int i;
1566
1567         for( i = 0; i < 4; i++ )
1568         {
1569             s = XDeint8x4TestInt( rec, i_src2,
1570                                   next, i_src2, mvx+dx[i], mvy+dy[i],
1571                                   xmax, ymax );
1572             if( s < mvs )
1573             {
1574                 mvs = s;
1575                 c = i;
1576             }
1577         }
1578         if( c == 4 )
1579             break;
1580
1581         mvx += dx[c];
1582         mvy += dy[c];
1583     }
1584     *mpx = mvx;
1585     *mpy = mvy;
1586
1587     mvx <<= 2;
1588     mvy <<= 2;
1589
1590     if( mvs > 4 && mvs < 256 )
1591     {
1592         /* Search Qpel */
1593         /* XXX: for now only HPEL (too slow) */
1594         for( i_step = 0; i_step < 4; i_step++ )
1595         {
1596             int c = 8;
1597             int s;
1598             int i;
1599
1600             for( i = 0; i < 8; i++ )
1601             {
1602                 s = XDeint8x4TestQpel( rec, i_src2, next, i_src2,
1603                                        mvx+dx[i], mvy+dy[i],
1604                                        xmax, ymax );
1605                 if( s < mvs )
1606                 {
1607                     mvs = s;
1608                     c = i;
1609                 }
1610             }
1611             if( c == 8 )
1612                 break;
1613
1614             mvx += dx[c];
1615             mvy += dy[c];
1616         }
1617     }
1618
1619     if( mvs < 128 )
1620     {
1621         uint8_t buffer[8*4];
1622         XDeintMC( buffer, 8, next, i_src2, mvx, mvy, 8, 4 );
1623         XDeint8x8MergeMMXEXT( dst, i_dst, src, 2*i_src, buffer, 8 );
1624
1625         //XDeint8x8Set( dst, i_dst, 0 );
1626     }
1627 }
1628 #endif
1629 #endif
1630
1631 #if 0
1632 /* Kernel interpolation (1,-5,20,20,-5,1)
1633  * Loose a bit more details+add aliasing than edge interpol but avoid
1634  * more artifacts
1635  */
1636 static inline uint8_t clip1( int a )
1637 {
1638     if( a <= 0 )
1639         return 0;
1640     else if( a >= 255 )
1641         return 255;
1642     else
1643         return a;
1644 }
1645 static inline void XDeint8x8Field( uint8_t *dst, int i_dst,
1646                                    uint8_t *src, int i_src )
1647 {
1648     int y, x;
1649
1650     /* Interlaced */
1651     for( y = 0; y < 8; y += 2 )
1652     {
1653         const int i_src2 = i_src*2;
1654
1655         memcpy( dst, src, 8 );
1656         dst += i_dst;
1657
1658         for( x = 0; x < 8; x++ )
1659         {
1660             int pix;
1661
1662             pix =   1*(src[-2*i_src2+x]+src[3*i_src2+x]) +
1663                    -5*(src[-1*i_src2+x]+src[2*i_src2+x])
1664                   +20*(src[ 0*i_src2+x]+src[1*i_src2+x]);
1665
1666             dst[x] = clip1( ( pix + 16 ) >> 5 );
1667         }
1668
1669         dst += 1*i_dst;
1670         src += 2*i_src;
1671     }
1672 }
1673
1674 #endif
1675
1676 /* NxN arbitray size (and then only use pixel in the NxN block)
1677  */
1678 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1679                                    int i_height, int i_width )
1680 {
1681     int y, x;
1682     int ff, fr;
1683     int fc;
1684
1685
1686     /* Detect interlacing */
1687     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1688     ff = fr = 0;
1689     fc = 0;
1690     for( y = 0; y < i_height - 2; y += 2 )
1691     {
1692         const uint8_t *s = &src[y*i_src];
1693         for( x = 0; x < i_width; x++ )
1694         {
1695             fr += ssd(s[      x] - s[1*i_src+x]);
1696             ff += ssd(s[      x] - s[2*i_src+x]);
1697         }
1698         if( ff < fr && fr > i_width / 2 )
1699             fc++;
1700     }
1701
1702     return fc < 2 ? VLC_FALSE : VLC_TRUE;
1703 }
1704
1705 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1706                                    uint8_t *src, int i_src,
1707                                    int i_width, int i_height )
1708 {
1709     int y, x;
1710
1711     /* Progressive */
1712     for( y = 0; y < i_height; y += 2 )
1713     {
1714         memcpy( dst, src, i_width );
1715         dst += i_dst;
1716
1717         if( y < i_height - 2 )
1718         {
1719             for( x = 0; x < i_width; x++ )
1720                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1721         }
1722         else
1723         {
1724             /* Blend last line */
1725             for( x = 0; x < i_width; x++ )
1726                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1727         }
1728         dst += 1*i_dst;
1729         src += 2*i_src;
1730     }
1731 }
1732
1733 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1734                                    uint8_t *src, int i_src,
1735                                    int i_width, int i_height )
1736 {
1737     int y, x;
1738
1739     /* Interlaced */
1740     for( y = 0; y < i_height; y += 2 )
1741     {
1742         memcpy( dst, src, i_width );
1743         dst += i_dst;
1744
1745         if( y < i_height - 2 )
1746         {
1747             for( x = 0; x < i_width; x++ )
1748                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1749         }
1750         else
1751         {
1752             /* Blend last line */
1753             for( x = 0; x < i_width; x++ )
1754                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1755         }
1756         dst += 1*i_dst;
1757         src += 2*i_src;
1758     }
1759 }
1760
1761 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1762                               int i_width, int i_height )
1763 {
1764     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1765         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1766     else
1767         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1768 }
1769
1770
1771 static inline int median( int a, int b, int c )
1772 {
1773     int min = a, max =a;
1774     if( b < min )
1775         min = b;
1776     else
1777         max = b;
1778
1779     if( c < min )
1780         min = c;
1781     else if( c > max )
1782         max = c;
1783
1784     return a + b + c - min - max;
1785 }
1786
1787
1788 /* XDeintBand8x8:
1789  */
1790 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1791                                    uint8_t *src, int i_src,
1792                                    const int i_mbx, int i_modx )
1793 {
1794     int x;
1795
1796     for( x = 0; x < i_mbx; x++ )
1797     {
1798         int s;
1799         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1800         {
1801             if( x == 0 || x == i_mbx - 1 )
1802                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1803             else
1804                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1805         }
1806         else
1807         {
1808             XDeint8x8MergeC( dst, i_dst,
1809                              &src[0*i_src], 2*i_src,
1810                              &src[1*i_src], 2*i_src );
1811         }
1812
1813         dst += 8;
1814         src += 8;
1815     }
1816
1817     if( i_modx )
1818         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1819 }
1820 #ifdef CAN_COMPILE_MMXEXT
1821 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1822                                         uint8_t *src, int i_src,
1823                                         const int i_mbx, int i_modx )
1824 {
1825     int x;
1826
1827     /* Reset current line */
1828     for( x = 0; x < i_mbx; x++ )
1829     {
1830         int s;
1831         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1832         {
1833             if( x == 0 || x == i_mbx - 1 )
1834                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1835             else
1836                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1837         }
1838         else
1839         {
1840             XDeint8x8MergeMMXEXT( dst, i_dst,
1841                                   &src[0*i_src], 2*i_src,
1842                                   &src[1*i_src], 2*i_src );
1843         }
1844
1845         dst += 8;
1846         src += 8;
1847     }
1848
1849     if( i_modx )
1850         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1851 }
1852 #endif
1853
1854 static void RenderX( vout_thread_t *p_vout,
1855                      picture_t *p_outpic, picture_t *p_pic )
1856 {
1857     vout_sys_t *p_sys = p_vout->p_sys;
1858     int i_plane;
1859
1860     /* Copy image and skip lines */
1861     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1862     {
1863         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1864         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1865
1866         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1867         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1868
1869         const int i_dst = p_outpic->p[i_plane].i_pitch;
1870         const int i_src = p_pic->p[i_plane].i_pitch;
1871
1872         int y, x;
1873
1874         for( y = 0; y < i_mby; y++ )
1875         {
1876             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1877             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1878
1879 #ifdef CAN_COMPILE_MMXEXT
1880             if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMXEXT )
1881                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1882             else
1883 #endif
1884                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1885         }
1886
1887         /* Last line (C only)*/
1888         if( i_mody )
1889         {
1890             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1891             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1892
1893             for( x = 0; x < i_mbx; x++ )
1894             {
1895                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1896
1897                 dst += 8;
1898                 src += 8;
1899             }
1900
1901             if( i_modx )
1902                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1903         }
1904     }
1905
1906 #ifdef CAN_COMPILE_MMXEXT
1907     if( p_vout->p_libvlc->i_cpu & CPU_CAPABILITY_MMXEXT )
1908         emms();
1909 #endif
1910 }
1911
1912 /*****************************************************************************
1913  * SendEvents: forward mouse and keyboard events to the parent p_vout
1914  *****************************************************************************/
1915 static int SendEvents( vlc_object_t *p_this, char const *psz_var,
1916                        vlc_value_t oldval, vlc_value_t newval, void *_p_vout )
1917 {
1918     vout_thread_t *p_vout = (vout_thread_t *)_p_vout;
1919     vlc_value_t sentval = newval;
1920
1921     if( !strcmp( psz_var, "mouse-y" ) )
1922     {
1923         switch( p_vout->p_sys->i_mode )
1924         {
1925             case DEINTERLACE_MEAN:
1926             case DEINTERLACE_DISCARD:
1927                 sentval.i_int *= 2;
1928                 break;
1929         }
1930     }
1931
1932     var_Set( p_vout, psz_var, sentval );
1933
1934     return VLC_SUCCESS;
1935 }
1936
1937 /*****************************************************************************
1938  * FilterCallback: called when changing the deinterlace method on the fly.
1939  *****************************************************************************/
1940 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1941                            vlc_value_t oldval, vlc_value_t newval,
1942                            void *p_data )
1943 {
1944     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1945     int i_old_mode = p_vout->p_sys->i_mode;
1946
1947     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1948
1949     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
1950
1951     SetFilterMethod( p_vout, newval.psz_string );
1952
1953     switch( p_vout->render.i_chroma )
1954     {
1955     case VLC_FOURCC('I','4','2','2'):
1956         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1957         return VLC_SUCCESS;
1958         break;
1959
1960     case VLC_FOURCC('I','4','2','0'):
1961     case VLC_FOURCC('I','Y','U','V'):
1962     case VLC_FOURCC('Y','V','1','2'):
1963         switch( p_vout->p_sys->i_mode )
1964         {
1965         case DEINTERLACE_MEAN:
1966         case DEINTERLACE_DISCARD:
1967             if( ( i_old_mode == DEINTERLACE_MEAN )
1968                 || ( i_old_mode == DEINTERLACE_DISCARD ) )
1969             {
1970                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1971                 return VLC_SUCCESS;
1972             }
1973             break;
1974
1975         case DEINTERLACE_BOB:
1976         case DEINTERLACE_BLEND:
1977         case DEINTERLACE_LINEAR:
1978             if( ( i_old_mode == DEINTERLACE_BOB )
1979                 || ( i_old_mode == DEINTERLACE_BLEND )
1980                 || ( i_old_mode == DEINTERLACE_LINEAR ) )
1981             {
1982                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
1983                 return VLC_SUCCESS;
1984             }
1985             break;
1986         }
1987         break;
1988
1989     default:
1990         break;
1991     }
1992
1993     /* We need to kill the old vout */
1994
1995     DEL_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
1996
1997     vlc_object_detach( p_vout->p_sys->p_vout );
1998     vout_Destroy( p_vout->p_sys->p_vout );
1999
2000     /* Try to open a new video output */
2001     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
2002
2003     if( p_vout->p_sys->p_vout == NULL )
2004     {
2005         /* Everything failed */
2006         msg_Err( p_vout, "cannot open vout, aborting" );
2007
2008         vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
2009         return VLC_EGENERIC;
2010     }
2011
2012     ADD_CALLBACKS( p_vout->p_sys->p_vout, SendEvents );
2013
2014     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
2015     return VLC_SUCCESS;
2016 }
2017
2018 /*****************************************************************************
2019  * SendEventsToChild: forward events to the child/children vout
2020  *****************************************************************************/
2021 static int SendEventsToChild( vlc_object_t *p_this, char const *psz_var,
2022                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
2023 {
2024     vout_thread_t *p_vout = (vout_thread_t *)p_this;
2025     var_Set( p_vout->p_sys->p_vout, psz_var, newval );
2026     return VLC_SUCCESS;
2027 }