git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace.c

   1 /*****************************************************************************
   2  * deinterlace.c : deinterlacer plugin for vlc
   3  *****************************************************************************
   4  * Copyright (C) 2000-2009 the VideoLAN team
   5  * $Id$
   6  *
   7  * Author: Sam Hocevar <sam@zoy.org>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  22  *****************************************************************************/
  23
  24 /*****************************************************************************
  25  * Preamble
  26  *****************************************************************************/
  27
  28 #ifdef HAVE_CONFIG_H
  29 # include "config.h"
  30 #endif
  31
  32 #include <errno.h>
  33 #include <assert.h>
  34
  35 #ifdef HAVE_ALTIVEC_H
  36 #   include <altivec.h>
  37 #endif
  38
  39 #include <vlc_common.h>
  40 #include <vlc_plugin.h>
  41 #include <vlc_vout.h>
  42 #include <vlc_sout.h>
  43 #include <vlc_filter.h>
  44 #include <vlc_cpu.h>
  45
  46 #ifdef CAN_COMPILE_MMXEXT
  47 #   include "mmx.h"
  48 #endif
  49
  50 #include "filter_common.h"
  51
  52 #define DEINTERLACE_DISCARD 1
  53 #define DEINTERLACE_MEAN    2
  54 #define DEINTERLACE_BLEND   3
  55 #define DEINTERLACE_BOB     4
  56 #define DEINTERLACE_LINEAR  5
  57 #define DEINTERLACE_X       6
  58 #define DEINTERLACE_YADIF   7
  59 #define DEINTERLACE_YADIF2X 8
  60
  61 /*****************************************************************************
  62  * Local protypes
  63  *****************************************************************************/
  64 static int  Create    ( vlc_object_t * );
  65 static void Destroy   ( vlc_object_t * );
  66
  67 static int  Init      ( vout_thread_t * );
  68 static void End       ( vout_thread_t * );
  69 static void Render    ( vout_thread_t *, picture_t * );
  70
  71 static int  MouseEvent( vlc_object_t *p_this, char const *psz_var,
  72                         vlc_value_t oldval, vlc_value_t newval, void *p_data );
  73
  74 static void RenderDiscard( vout_thread_t *, picture_t *, picture_t *, int );
  75 static void RenderBob    ( vout_thread_t *, picture_t *, picture_t *, int );
  76 static void RenderMean   ( vout_thread_t *, picture_t *, picture_t * );
  77 static void RenderBlend  ( vout_thread_t *, picture_t *, picture_t * );
  78 static void RenderLinear ( vout_thread_t *, picture_t *, picture_t *, int );
  79 static void RenderX      ( picture_t *, picture_t * );
  80 static void RenderYadif  ( vout_thread_t *, picture_t *, picture_t *, int, int );
  81
  82 static void MergeGeneric ( void *, const void *, const void *, size_t );
  83 #if defined(CAN_COMPILE_C_ALTIVEC)
  84 static void MergeAltivec ( void *, const void *, const void *, size_t );
  85 #endif
  86 #if defined(CAN_COMPILE_MMXEXT)
  87 static void MergeMMXEXT  ( void *, const void *, const void *, size_t );
  88 #endif
  89 #if defined(CAN_COMPILE_3DNOW)
  90 static void Merge3DNow   ( void *, const void *, const void *, size_t );
  91 #endif
  92 #if defined(CAN_COMPILE_SSE)
  93 static void MergeSSE2    ( void *, const void *, const void *, size_t );
  94 #endif
  95 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
  96 static void EndMMX       ( void );
  97 #endif
  98 #if defined(CAN_COMPILE_3DNOW)
  99 static void End3DNow     ( void );
 100 #endif
 101 #if defined __ARM_NEON__
 102 static void MergeNEON (void *, const void *, const void *, size_t);
 103 #endif
 104
 105 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method );
 106 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout );
 107
 108 static int OpenFilter( vlc_object_t *p_this );
 109 static void CloseFilter( vlc_object_t *p_this );
 110
 111 /*****************************************************************************
 112  * Callback prototypes
 113  *****************************************************************************/
 114 static int FilterCallback( vlc_object_t *, char const *,
 115                            vlc_value_t, vlc_value_t, void * );
 116
 117 /*****************************************************************************
 118  * Module descriptor
 119  *****************************************************************************/
 120 #define MODE_TEXT N_("Deinterlace mode")
 121 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
 122
 123 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
 124 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
 125
 126 #define FILTER_CFG_PREFIX "sout-deinterlace-"
 127
 128 static const char *const mode_list[] = {
 129     "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
 130 static const char *const mode_list_text[] = {
 131     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
 132
 133 vlc_module_begin ()
 134     set_description( N_("Deinterlacing video filter") )
 135     set_shortname( N_("Deinterlace" ))
 136     set_capability( "video filter", 0 )
 137     set_category( CAT_VIDEO )
 138     set_subcategory( SUBCAT_VIDEO_VFILTER )
 139
 140     set_section( N_("Display"),NULL)
 141     add_string( "filter-deinterlace-mode", "discard", NULL, MODE_TEXT,
 142                 MODE_LONGTEXT, false )
 143         change_string_list( mode_list, mode_list_text, 0 )
 144         change_safe ()
 145
 146     add_shortcut( "deinterlace" )
 147     set_callbacks( Create, Destroy )
 148
 149     add_submodule ()
 150     set_capability( "video filter2", 0 )
 151     set_section( N_("Streaming"),NULL)
 152     add_string( FILTER_CFG_PREFIX "mode", "blend", NULL, SOUT_MODE_TEXT,
 153                 SOUT_MODE_LONGTEXT, false )
 154         change_string_list( mode_list, mode_list_text, 0 )
 155     add_shortcut( "deinterlace" )
 156     set_callbacks( OpenFilter, CloseFilter )
 157 vlc_module_end ()
 158
 159 static const char *const ppsz_filter_options[] = {
 160     "mode", NULL
 161 };
 162
 163 /*****************************************************************************
 164  * vout_sys_t: Deinterlace video output method descriptor
 165  *****************************************************************************
 166  * This structure is part of the video output thread descriptor.
 167  * It describes the Deinterlace specific properties of an output thread.
 168  *****************************************************************************/
 169 #define HISTORY_SIZE (3)
 170 struct vout_sys_t
 171 {
 172     int        i_mode;        /* Deinterlace mode */
 173     bool b_double_rate; /* Shall we double the framerate? */
 174     bool b_half_height; /* Shall be devide the height by 2 */
 175
 176     mtime_t    last_date;
 177     mtime_t    next_date;
 178
 179     vout_thread_t *p_vout;
 180
 181     vlc_mutex_t filter_lock;
 182
 183     void (*pf_merge) ( void *, const void *, const void *, size_t );
 184     void (*pf_end_merge) ( void );
 185
 186     /* Yadif */
 187     picture_t *pp_history[HISTORY_SIZE];
 188 };
 189
 190 /*****************************************************************************
 191  * Control: control facility for the vout (forwards to child vout)
 192  *****************************************************************************/
 193 static int Control( vout_thread_t *p_vout, int i_query, va_list args )
 194 {
 195     return vout_vaControl( p_vout->p_sys->p_vout, i_query, args );
 196 }
 197
 198 /*****************************************************************************
 199  * Create: allocates Deinterlace video thread output method
 200  *****************************************************************************
 201  * This function allocates and initializes a Deinterlace vout method.
 202  *****************************************************************************/
 203 static int Create( vlc_object_t *p_this )
 204 {
 205     vout_thread_t *p_vout = (vout_thread_t *)p_this;
 206     vout_sys_t *p_sys;
 207     char *psz_mode;
 208
 209     /* Allocate structure */
 210     p_sys = p_vout->p_sys = malloc( sizeof( vout_sys_t ) );
 211     if( p_vout->p_sys == NULL )
 212         return VLC_ENOMEM;
 213
 214     p_vout->pf_init = Init;
 215     p_vout->pf_end = End;
 216     p_vout->pf_manage = NULL;
 217     p_vout->pf_render = Render;
 218     p_vout->pf_display = NULL;
 219     p_vout->pf_control = Control;
 220
 221     p_sys->i_mode = DEINTERLACE_DISCARD;
 222     p_sys->b_double_rate = false;
 223     p_sys->b_half_height = true;
 224     p_sys->last_date = 0;
 225     p_sys->p_vout = 0;
 226     vlc_mutex_init( &p_sys->filter_lock );
 227
 228 #if defined(CAN_COMPILE_C_ALTIVEC)
 229     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
 230     {
 231         p_sys->pf_merge = MergeAltivec;
 232         p_sys->pf_end_merge = NULL;
 233     }
 234     else
 235 #endif
 236 #if defined(CAN_COMPILE_SSE)
 237     if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
 238     {
 239         p_sys->pf_merge = MergeSSE2;
 240         p_sys->pf_end_merge = EndMMX;
 241     }
 242     else
 243 #endif
 244 #if defined(CAN_COMPILE_MMXEXT)
 245     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
 246     {
 247         p_sys->pf_merge = MergeMMXEXT;
 248         p_sys->pf_end_merge = EndMMX;
 249     }
 250     else
 251 #endif
 252 #if defined(CAN_COMPILE_3DNOW)
 253     if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
 254     {
 255         p_sys->pf_merge = Merge3DNow;
 256         p_sys->pf_end_merge = End3DNow;
 257     }
 258     else
 259 #endif
 260 #if defined __ARM_NEON__
 261     if( vlc_CPU() & CPU_CAPABILITY_NEON )
 262     {
 263         p_sys->pf_merge = MergeNEON;
 264         p_sys->pf_end_merge = NULL;
 265     }
 266     else
 267 #endif
 268     {
 269         p_sys->pf_merge = MergeGeneric;
 270         p_sys->pf_end_merge = NULL;
 271     }
 272
 273     /* Look what method was requested */
 274     psz_mode = var_CreateGetString( p_vout, "filter-deinterlace-mode" );
 275
 276     if( !psz_mode )
 277     {
 278         msg_Err( p_vout, "configuration variable filter-deinterlace-mode empty" );
 279         msg_Err( p_vout, "no deinterlace mode provided, using \"discard\"" );
 280
 281         psz_mode = strdup( "discard" );
 282     }
 283
 284     SetFilterMethod( p_vout, psz_mode );
 285
 286     free( psz_mode );
 287
 288     return VLC_SUCCESS;
 289 }
 290
 291 /*****************************************************************************
 292  * SetFilterMethod: setup the deinterlace method to use.
 293  *****************************************************************************/
 294 static void SetFilterMethod( vout_thread_t *p_vout, const char *psz_method )
 295 {
 296     vout_sys_t *p_sys = p_vout->p_sys;
 297     if( !strcmp( psz_method, "mean" ) )
 298     {
 299         p_sys->i_mode = DEINTERLACE_MEAN;
 300         p_sys->b_double_rate = false;
 301         p_sys->b_half_height = true;
 302     }
 303     else if( !strcmp( psz_method, "blend" )
 304              || !strcmp( psz_method, "average" )
 305              || !strcmp( psz_method, "combine-fields" ) )
 306     {
 307         p_sys->i_mode = DEINTERLACE_BLEND;
 308         p_sys->b_double_rate = false;
 309         p_sys->b_half_height = false;
 310     }
 311     else if( !strcmp( psz_method, "bob" )
 312              || !strcmp( psz_method, "progressive-scan" ) )
 313     {
 314         p_sys->i_mode = DEINTERLACE_BOB;
 315         p_sys->b_double_rate = true;
 316         p_sys->b_half_height = false;
 317     }
 318     else if( !strcmp( psz_method, "linear" ) )
 319     {
 320         p_sys->i_mode = DEINTERLACE_LINEAR;
 321         p_sys->b_double_rate = true;
 322         p_sys->b_half_height = false;
 323     }
 324     else if( !strcmp( psz_method, "x" ) )
 325     {
 326         p_sys->i_mode = DEINTERLACE_X;
 327         p_sys->b_double_rate = false;
 328         p_sys->b_half_height = false;
 329     }
 330     else if( !strcmp( psz_method, "yadif" ) )
 331     {
 332         p_sys->i_mode = DEINTERLACE_YADIF;
 333         p_sys->b_double_rate = false;
 334         p_sys->b_half_height = false;
 335     }
 336     else if( !strcmp( psz_method, "yadif2x" ) )
 337     {
 338         p_sys->i_mode = DEINTERLACE_YADIF2X;
 339         p_sys->b_double_rate = true;
 340         p_sys->b_half_height = false;
 341     }
 342     else
 343     {
 344         const bool b_i422 = p_vout->render.i_chroma == VLC_CODEC_I422 ||
 345                             p_vout->render.i_chroma == VLC_CODEC_J422;
 346         if( strcmp( psz_method, "discard" ) )
 347             msg_Err( p_vout, "no valid deinterlace mode provided, "
 348                      "using \"discard\"" );
 349
 350         p_sys->i_mode = DEINTERLACE_DISCARD;
 351         p_sys->b_double_rate = false;
 352         p_sys->b_half_height = !b_i422;
 353     }
 354
 355     msg_Dbg( p_vout, "using %s deinterlace method", psz_method );
 356 }
 357
 358 static void GetOutputFormat( vout_thread_t *p_vout,
 359                              video_format_t *p_dst, const video_format_t *p_src )
 360 {
 361     *p_dst = *p_src;
 362
 363     if( p_vout->p_sys->b_half_height )
 364     {
 365         p_dst->i_height /= 2;
 366         p_dst->i_visible_height /= 2;
 367         p_dst->i_y_offset /= 2;
 368         p_dst->i_sar_den *= 2;
 369     }
 370
 371     if( p_src->i_chroma == VLC_CODEC_I422 ||
 372         p_src->i_chroma == VLC_CODEC_J422 )
 373     {
 374         switch( p_vout->p_sys->i_mode )
 375         {
 376         case DEINTERLACE_MEAN:
 377         case DEINTERLACE_LINEAR:
 378         case DEINTERLACE_X:
 379         case DEINTERLACE_YADIF:
 380         case DEINTERLACE_YADIF2X:
 381             p_dst->i_chroma = p_src->i_chroma;
 382             break;
 383         default:
 384             p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
 385                                                                   VLC_CODEC_J420;
 386             break;
 387         }
 388     }
 389 }
 390
 391 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
 392 {
 393     return i_chroma == VLC_CODEC_I420 ||
 394            i_chroma == VLC_CODEC_J420 ||
 395            i_chroma == VLC_CODEC_YV12 ||
 396            i_chroma == VLC_CODEC_I422 ||
 397            i_chroma == VLC_CODEC_J422;
 398 }
 399
 400 /*****************************************************************************
 401  * Init: initialize Deinterlace video thread output method
 402  *****************************************************************************/
 403 static int Init( vout_thread_t *p_vout )
 404 {
 405     I_OUTPUTPICTURES = 0;
 406
 407     if( !IsChromaSupported( p_vout->render.i_chroma ) )
 408         return VLC_EGENERIC; /* unknown chroma */
 409
 410     /* Initialize the output structure, full of directbuffers since we want
 411      * the decoder to output directly to our structures. */
 412     p_vout->output.i_chroma = p_vout->render.i_chroma;
 413     p_vout->output.i_width  = p_vout->render.i_width;
 414     p_vout->output.i_height = p_vout->render.i_height;
 415     p_vout->output.i_aspect = p_vout->render.i_aspect;
 416     p_vout->fmt_out = p_vout->fmt_in;
 417
 418     /* Try to open the real video output */
 419     p_vout->p_sys->p_vout = SpawnRealVout( p_vout );
 420
 421     if( p_vout->p_sys->p_vout == NULL )
 422     {
 423         /* Everything failed */
 424         msg_Err( p_vout, "cannot open vout, aborting" );
 425
 426         return VLC_EGENERIC;
 427     }
 428
 429     for( int i = 0; i < HISTORY_SIZE; i++ )
 430         p_vout->p_sys->pp_history[i] = NULL;
 431
 432     vout_filter_AllocateDirectBuffers( p_vout, VOUT_MAX_PICTURES );
 433
 434     vout_filter_AddChild( p_vout, p_vout->p_sys->p_vout, MouseEvent );
 435
 436     var_AddCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
 437
 438     return VLC_SUCCESS;
 439 }
 440
 441 /*****************************************************************************
 442  * SpawnRealVout: spawn the real video output.
 443  *****************************************************************************/
 444 static vout_thread_t *SpawnRealVout( vout_thread_t *p_vout )
 445 {
 446     msg_Dbg( p_vout, "spawning the real video output" );
 447
 448     video_format_t fmt;
 449     GetOutputFormat( p_vout, &fmt, &p_vout->fmt_out );
 450
 451     return vout_Create( p_vout, &fmt );
 452 }
 453
 454 /*****************************************************************************
 455  * End: terminate Deinterlace video thread output method
 456  *****************************************************************************/
 457 static void End( vout_thread_t *p_vout )
 458 {
 459     vout_sys_t *p_sys = p_vout->p_sys;
 460
 461     var_DelCallback( p_vout, "filter-deinterlace-mode", FilterCallback, NULL );
 462
 463     for( int i = 0; i < HISTORY_SIZE; i++ )
 464     {
 465         if( p_sys->pp_history[i] )
 466             picture_Release( p_sys->pp_history[i] );
 467     }
 468
 469     if( p_sys->p_vout )
 470     {
 471         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
 472         vout_CloseAndRelease( p_sys->p_vout );
 473     }
 474
 475     vout_filter_ReleaseDirectBuffers( p_vout );
 476 }
 477
 478 /*****************************************************************************
 479  * Destroy: destroy Deinterlace video thread output method
 480  *****************************************************************************
 481  * Terminate an output method created by DeinterlaceCreateOutputMethod
 482  *****************************************************************************/
 483 static void Destroy( vlc_object_t *p_this )
 484 {
 485     vout_thread_t *p_vout = (vout_thread_t *)p_this;
 486     vlc_mutex_destroy( &p_vout->p_sys->filter_lock );
 487     free( p_vout->p_sys );
 488 }
 489
 490 /**
 491  * Forward mouse event with proper conversion.
 492  */
 493 static int MouseEvent( vlc_object_t *p_this, char const *psz_var,
 494                        vlc_value_t oldval, vlc_value_t newval, void *p_data )
 495 {
 496     vout_thread_t *p_vout = p_data;
 497     VLC_UNUSED(p_this); VLC_UNUSED(oldval);
 498
 499     if( !strcmp( psz_var, "mouse-y" ) && p_vout->p_sys->b_half_height )
 500         newval.i_int *= 2;
 501
 502     return var_Set( p_vout, psz_var, newval );
 503 }
 504
 505 /*****************************************************************************
 506  * Render: displays previously rendered output
 507  *****************************************************************************
 508  * This function send the currently rendered image to Deinterlace image,
 509  * waits until it is displayed and switch the two rendering buffers, preparing
 510  * next frame.
 511  *****************************************************************************/
 512 static void Render ( vout_thread_t *p_vout, picture_t *p_pic )
 513 {
 514     vout_sys_t *p_sys = p_vout->p_sys;
 515     picture_t *pp_outpic[2];
 516
 517     /* FIXME are they needed ? */
 518     p_vout->fmt_out.i_x_offset = p_vout->fmt_in.i_x_offset;
 519     p_vout->fmt_out.i_y_offset = p_vout->fmt_in.i_y_offset;
 520     p_vout->fmt_out.i_visible_width = p_vout->fmt_in.i_visible_width;
 521     p_vout->fmt_out.i_visible_height = p_vout->fmt_in.i_visible_height;
 522
 523     /* FIXME p_sys->p_vout->* should NOT be changed FIXME */
 524     p_sys->p_vout->fmt_in.i_x_offset = p_vout->fmt_out.i_x_offset;
 525     p_sys->p_vout->fmt_in.i_y_offset = p_vout->fmt_out.i_y_offset;
 526     p_sys->p_vout->fmt_in.i_visible_width = p_vout->fmt_out.i_visible_width;
 527     p_sys->p_vout->fmt_in.i_visible_height = p_vout->fmt_in.i_visible_height;
 528     if( p_vout->p_sys->b_half_height )
 529     {
 530         p_sys->p_vout->fmt_in.i_y_offset /= 2;
 531         p_sys->p_vout->fmt_in.i_visible_height /= 2;
 532     }
 533
 534     if( p_vout->i_changes & VOUT_ASPECT_CHANGE )
 535     {
 536         p_vout->i_changes &= ~VOUT_ASPECT_CHANGE;
 537
 538         p_vout->fmt_out.i_aspect = p_vout->fmt_in.i_aspect;
 539         p_vout->fmt_out.i_sar_num = p_vout->fmt_in.i_sar_num;
 540         p_vout->fmt_out.i_sar_den = p_vout->fmt_in.i_sar_den;
 541
 542         video_format_t fmt = p_vout->fmt_out;
 543         if( p_vout->p_sys->b_half_height )
 544         {
 545             fmt.i_height /= 2; fmt.i_visible_height /= 2; fmt.i_y_offset /= 2;
 546             fmt.i_sar_den *= 2;
 547         }
 548
 549         p_sys->p_vout = vout_Request( p_vout, p_sys->p_vout, &fmt );
 550     }
 551     if( !p_sys->p_vout )
 552         return;
 553
 554     pp_outpic[0] = pp_outpic[1] = NULL;
 555
 556     vlc_mutex_lock( &p_vout->p_sys->filter_lock );
 557
 558     /* Get a new picture */
 559     while( ( pp_outpic[0] = vout_CreatePicture( p_vout->p_sys->p_vout,
 560                                                 0, 0, 0 ) )
 561               == NULL )
 562     {
 563         if( !vlc_object_alive( p_vout ) || p_vout->b_error )
 564         {
 565             vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
 566             return;
 567         }
 568         msleep( VOUT_OUTMEM_SLEEP );
 569     }
 570
 571     pp_outpic[0]->date = p_pic->date;
 572
 573     /* If we are using double rate, get an additional new picture */
 574     if( p_vout->p_sys->b_double_rate )
 575     {
 576         while( ( pp_outpic[1] = vout_CreatePicture( p_vout->p_sys->p_vout,
 577                                                  0, 0, 0 ) )
 578                   == NULL )
 579         {
 580             if( !vlc_object_alive( p_vout ) || p_vout->b_error )
 581             {
 582                 vout_DestroyPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 583                 vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
 584                 return;
 585             }
 586             msleep( VOUT_OUTMEM_SLEEP );
 587         }
 588
 589         /* 20ms is a bit arbitrary, but it's only for the first image we get */
 590         if( !p_vout->p_sys->last_date )
 591             pp_outpic[1]->date = p_pic->date + 20000;
 592         else
 593             pp_outpic[1]->date = (3 * p_pic->date - p_vout->p_sys->last_date) / 2;
 594         p_vout->p_sys->last_date = p_pic->date;
 595     }
 596
 597     switch( p_vout->p_sys->i_mode )
 598     {
 599         case DEINTERLACE_DISCARD:
 600             RenderDiscard( p_vout, pp_outpic[0], p_pic, 0 );
 601             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 602             break;
 603
 604         case DEINTERLACE_BOB:
 605             RenderBob( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
 606             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 607             RenderBob( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
 608             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
 609             break;
 610
 611         case DEINTERLACE_LINEAR:
 612             RenderLinear( p_vout, pp_outpic[0], p_pic, p_pic->b_top_field_first ? 0 : 1 );
 613             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 614             RenderLinear( p_vout, pp_outpic[1], p_pic, p_pic->b_top_field_first ? 1 : 0 );
 615             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
 616             break;
 617
 618         case DEINTERLACE_MEAN:
 619             RenderMean( p_vout, pp_outpic[0], p_pic );
 620             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 621             break;
 622
 623         case DEINTERLACE_BLEND:
 624             RenderBlend( p_vout, pp_outpic[0], p_pic );
 625             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 626             break;
 627
 628         case DEINTERLACE_X:
 629             RenderX( pp_outpic[0], p_pic );
 630             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 631             break;
 632
 633         case DEINTERLACE_YADIF:
 634             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, 0 );
 635             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 636             break;
 637
 638         case DEINTERLACE_YADIF2X:
 639             RenderYadif( p_vout, pp_outpic[0], p_pic, 0, p_pic->b_top_field_first ? 0 : 1 );
 640             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[0] );
 641             RenderYadif( p_vout, pp_outpic[1], p_pic, 1, p_pic->b_top_field_first ? 1 : 0 );
 642             vout_DisplayPicture( p_vout->p_sys->p_vout, pp_outpic[1] );
 643             break;
 644     }
 645     vlc_mutex_unlock( &p_vout->p_sys->filter_lock );
 646 }
 647
 648 /*****************************************************************************
 649  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
 650  *****************************************************************************/
 651 static void RenderDiscard( vout_thread_t *p_vout,
 652                            picture_t *p_outpic, picture_t *p_pic, int i_field )
 653 {
 654     int i_plane;
 655
 656     /* Copy image and skip lines */
 657     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 658     {
 659         uint8_t *p_in, *p_out_end, *p_out;
 660         int i_increment;
 661
 662         p_in = p_pic->p[i_plane].p_pixels
 663                    + i_field * p_pic->p[i_plane].i_pitch;
 664
 665         p_out = p_outpic->p[i_plane].p_pixels;
 666         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 667                              * p_outpic->p[i_plane].i_visible_lines;
 668
 669         switch( p_vout->render.i_chroma )
 670         {
 671         case VLC_CODEC_I420:
 672         case VLC_CODEC_J420:
 673         case VLC_CODEC_YV12:
 674
 675             for( ; p_out < p_out_end ; )
 676             {
 677                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 678
 679                 p_out += p_outpic->p[i_plane].i_pitch;
 680                 p_in += 2 * p_pic->p[i_plane].i_pitch;
 681             }
 682             break;
 683
 684         case VLC_CODEC_I422:
 685         case VLC_CODEC_J422:
 686
 687             i_increment = 2 * p_pic->p[i_plane].i_pitch;
 688
 689             if( i_plane == Y_PLANE )
 690             {
 691                 for( ; p_out < p_out_end ; )
 692                 {
 693                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 694                     p_out += p_outpic->p[i_plane].i_pitch;
 695                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 696                     p_out += p_outpic->p[i_plane].i_pitch;
 697                     p_in += i_increment;
 698                 }
 699             }
 700             else
 701             {
 702                 for( ; p_out < p_out_end ; )
 703                 {
 704                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 705                     p_out += p_outpic->p[i_plane].i_pitch;
 706                     p_in += i_increment;
 707                 }
 708             }
 709             break;
 710
 711         default:
 712             break;
 713         }
 714     }
 715 }
 716
 717 /*****************************************************************************
 718  * RenderBob: renders a BOB picture - simple copy
 719  *****************************************************************************/
 720 static void RenderBob( vout_thread_t *p_vout,
 721                        picture_t *p_outpic, picture_t *p_pic, int i_field )
 722 {
 723     int i_plane;
 724
 725     /* Copy image and skip lines */
 726     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 727     {
 728         uint8_t *p_in, *p_out_end, *p_out;
 729
 730         p_in = p_pic->p[i_plane].p_pixels;
 731         p_out = p_outpic->p[i_plane].p_pixels;
 732         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 733                              * p_outpic->p[i_plane].i_visible_lines;
 734
 735         switch( p_vout->render.i_chroma )
 736         {
 737             case VLC_CODEC_I420:
 738             case VLC_CODEC_J420:
 739             case VLC_CODEC_YV12:
 740                 /* For BOTTOM field we need to add the first line */
 741                 if( i_field == 1 )
 742                 {
 743                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 744                     p_in += p_pic->p[i_plane].i_pitch;
 745                     p_out += p_outpic->p[i_plane].i_pitch;
 746                 }
 747
 748                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
 749
 750                 for( ; p_out < p_out_end ; )
 751                 {
 752                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 753
 754                     p_out += p_outpic->p[i_plane].i_pitch;
 755
 756                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 757
 758                     p_in += 2 * p_pic->p[i_plane].i_pitch;
 759                     p_out += p_outpic->p[i_plane].i_pitch;
 760                 }
 761
 762                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 763
 764                 /* For TOP field we need to add the last line */
 765                 if( i_field == 0 )
 766                 {
 767                     p_in += p_pic->p[i_plane].i_pitch;
 768                     p_out += p_outpic->p[i_plane].i_pitch;
 769                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 770                 }
 771                 break;
 772
 773             case VLC_CODEC_I422:
 774             case VLC_CODEC_J422:
 775                 /* For BOTTOM field we need to add the first line */
 776                 if( i_field == 1 )
 777                 {
 778                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 779                     p_in += p_pic->p[i_plane].i_pitch;
 780                     p_out += p_outpic->p[i_plane].i_pitch;
 781                 }
 782
 783                 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
 784
 785                 if( i_plane == Y_PLANE )
 786                 {
 787                     for( ; p_out < p_out_end ; )
 788                     {
 789                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 790
 791                         p_out += p_outpic->p[i_plane].i_pitch;
 792
 793                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 794
 795                         p_in += 2 * p_pic->p[i_plane].i_pitch;
 796                         p_out += p_outpic->p[i_plane].i_pitch;
 797                     }
 798                 }
 799                 else
 800                 {
 801                     for( ; p_out < p_out_end ; )
 802                     {
 803                         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 804
 805                         p_out += p_outpic->p[i_plane].i_pitch;
 806                         p_in += 2 * p_pic->p[i_plane].i_pitch;
 807                     }
 808                 }
 809
 810                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 811
 812                 /* For TOP field we need to add the last line */
 813                 if( i_field == 0 )
 814                 {
 815                     p_in += p_pic->p[i_plane].i_pitch;
 816                     p_out += p_outpic->p[i_plane].i_pitch;
 817                     vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 818                 }
 819                 break;
 820         }
 821     }
 822 }
 823
 824 #define Merge p_vout->p_sys->pf_merge
 825 #define EndMerge if(p_vout->p_sys->pf_end_merge) p_vout->p_sys->pf_end_merge
 826
 827 /*****************************************************************************
 828  * RenderLinear: BOB with linear interpolation
 829  *****************************************************************************/
 830 static void RenderLinear( vout_thread_t *p_vout,
 831                           picture_t *p_outpic, picture_t *p_pic, int i_field )
 832 {
 833     int i_plane;
 834
 835     /* Copy image and skip lines */
 836     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 837     {
 838         uint8_t *p_in, *p_out_end, *p_out;
 839
 840         p_in = p_pic->p[i_plane].p_pixels;
 841         p_out = p_outpic->p[i_plane].p_pixels;
 842         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 843                              * p_outpic->p[i_plane].i_visible_lines;
 844
 845         /* For BOTTOM field we need to add the first line */
 846         if( i_field == 1 )
 847         {
 848             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 849             p_in += p_pic->p[i_plane].i_pitch;
 850             p_out += p_outpic->p[i_plane].i_pitch;
 851         }
 852
 853         p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
 854
 855         for( ; p_out < p_out_end ; )
 856         {
 857             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 858
 859             p_out += p_outpic->p[i_plane].i_pitch;
 860
 861             Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
 862                    p_pic->p[i_plane].i_pitch );
 863
 864             p_in += 2 * p_pic->p[i_plane].i_pitch;
 865             p_out += p_outpic->p[i_plane].i_pitch;
 866         }
 867
 868         vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 869
 870         /* For TOP field we need to add the last line */
 871         if( i_field == 0 )
 872         {
 873             p_in += p_pic->p[i_plane].i_pitch;
 874             p_out += p_outpic->p[i_plane].i_pitch;
 875             vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 876         }
 877     }
 878     EndMerge();
 879 }
 880
 881 static void RenderMean( vout_thread_t *p_vout,
 882                         picture_t *p_outpic, picture_t *p_pic )
 883 {
 884     int i_plane;
 885
 886     /* Copy image and skip lines */
 887     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 888     {
 889         uint8_t *p_in, *p_out_end, *p_out;
 890
 891         p_in = p_pic->p[i_plane].p_pixels;
 892
 893         p_out = p_outpic->p[i_plane].p_pixels;
 894         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 895                              * p_outpic->p[i_plane].i_visible_lines;
 896
 897         /* All lines: mean value */
 898         for( ; p_out < p_out_end ; )
 899         {
 900             Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 901                    p_pic->p[i_plane].i_pitch );
 902
 903             p_out += p_outpic->p[i_plane].i_pitch;
 904             p_in += 2 * p_pic->p[i_plane].i_pitch;
 905         }
 906     }
 907     EndMerge();
 908 }
 909
 910 static void RenderBlend( vout_thread_t *p_vout,
 911                          picture_t *p_outpic, picture_t *p_pic )
 912 {
 913     int i_plane;
 914
 915     /* Copy image and skip lines */
 916     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
 917     {
 918         uint8_t *p_in, *p_out_end, *p_out;
 919
 920         p_in = p_pic->p[i_plane].p_pixels;
 921
 922         p_out = p_outpic->p[i_plane].p_pixels;
 923         p_out_end = p_out + p_outpic->p[i_plane].i_pitch
 924                              * p_outpic->p[i_plane].i_visible_lines;
 925
 926         switch( p_vout->render.i_chroma )
 927         {
 928             case VLC_CODEC_I420:
 929             case VLC_CODEC_J420:
 930             case VLC_CODEC_YV12:
 931                 /* First line: simple copy */
 932                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 933                 p_out += p_outpic->p[i_plane].i_pitch;
 934
 935                 /* Remaining lines: mean value */
 936                 for( ; p_out < p_out_end ; )
 937                 {
 938                     Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 939                            p_pic->p[i_plane].i_pitch );
 940
 941                     p_out += p_outpic->p[i_plane].i_pitch;
 942                     p_in += p_pic->p[i_plane].i_pitch;
 943                 }
 944                 break;
 945
 946             case VLC_CODEC_I422:
 947             case VLC_CODEC_J422:
 948                 /* First line: simple copy */
 949                 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
 950                 p_out += p_outpic->p[i_plane].i_pitch;
 951
 952                 /* Remaining lines: mean value */
 953                 if( i_plane == Y_PLANE )
 954                 {
 955                     for( ; p_out < p_out_end ; )
 956                     {
 957                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 958                                p_pic->p[i_plane].i_pitch );
 959
 960                         p_out += p_outpic->p[i_plane].i_pitch;
 961                         p_in += p_pic->p[i_plane].i_pitch;
 962                     }
 963                 }
 964
 965                 else
 966                 {
 967                     for( ; p_out < p_out_end ; )
 968                     {
 969                         Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
 970                                p_pic->p[i_plane].i_pitch );
 971
 972                         p_out += p_outpic->p[i_plane].i_pitch;
 973                         p_in += 2*p_pic->p[i_plane].i_pitch;
 974                     }
 975                 }
 976                 break;
 977         }
 978     }
 979     EndMerge();
 980 }
 981
 982 #undef Merge
 983
 984 static void MergeGeneric( void *_p_dest, const void *_p_s1,
 985                           const void *_p_s2, size_t i_bytes )
 986 {
 987     uint8_t* p_dest = (uint8_t*)_p_dest;
 988     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
 989     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
 990     uint8_t* p_end = p_dest + i_bytes - 8;
 991
 992     while( p_dest < p_end )
 993     {
 994         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 995         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 996         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 997         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 998         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
 999         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1000         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1001         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1002     }
1003
1004     p_end += 8;
1005
1006     while( p_dest < p_end )
1007     {
1008         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1009     }
1010 }
1011
1012 #if defined(CAN_COMPILE_MMXEXT)
1013 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
1014                          size_t i_bytes )
1015 {
1016     uint8_t* p_dest = (uint8_t*)_p_dest;
1017     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1018     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1019     uint8_t* p_end = p_dest + i_bytes - 8;
1020     while( p_dest < p_end )
1021     {
1022         __asm__  __volatile__( "movq %2,%%mm1;"
1023                                "pavgb %1, %%mm1;"
1024                                "movq %%mm1, %0" :"=m" (*p_dest):
1025                                                  "m" (*p_s1),
1026                                                  "m" (*p_s2) );
1027         p_dest += 8;
1028         p_s1 += 8;
1029         p_s2 += 8;
1030     }
1031
1032     p_end += 8;
1033
1034     while( p_dest < p_end )
1035     {
1036         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1037     }
1038 }
1039 #endif
1040
1041 #if defined(CAN_COMPILE_3DNOW)
1042 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
1043                         size_t i_bytes )
1044 {
1045     uint8_t* p_dest = (uint8_t*)_p_dest;
1046     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1047     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1048     uint8_t* p_end = p_dest + i_bytes - 8;
1049     while( p_dest < p_end )
1050     {
1051         __asm__  __volatile__( "movq %2,%%mm1;"
1052                                "pavgusb %1, %%mm1;"
1053                                "movq %%mm1, %0" :"=m" (*p_dest):
1054                                                  "m" (*p_s1),
1055                                                  "m" (*p_s2) );
1056         p_dest += 8;
1057         p_s1 += 8;
1058         p_s2 += 8;
1059     }
1060
1061     p_end += 8;
1062
1063     while( p_dest < p_end )
1064     {
1065         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1066     }
1067 }
1068 #endif
1069
1070 #if defined(CAN_COMPILE_SSE)
1071 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
1072                        size_t i_bytes )
1073 {
1074     uint8_t* p_dest = (uint8_t*)_p_dest;
1075     const uint8_t *p_s1 = (const uint8_t *)_p_s1;
1076     const uint8_t *p_s2 = (const uint8_t *)_p_s2;
1077     uint8_t* p_end;
1078     while( (uintptr_t)p_s1 % 16 )
1079     {
1080         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1081     }
1082     p_end = p_dest + i_bytes - 16;
1083     while( p_dest < p_end )
1084     {
1085         __asm__  __volatile__( "movdqu %2,%%xmm1;"
1086                                "pavgb %1, %%xmm1;"
1087                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
1088                                                  "m" (*p_s1),
1089                                                  "m" (*p_s2) );
1090         p_dest += 16;
1091         p_s1 += 16;
1092         p_s2 += 16;
1093     }
1094
1095     p_end += 16;
1096
1097     while( p_dest < p_end )
1098     {
1099         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1100     }
1101 }
1102 #endif
1103
1104 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
1105 static void EndMMX( void )
1106 {
1107     __asm__ __volatile__( "emms" :: );
1108 }
1109 #endif
1110
1111 #if defined(CAN_COMPILE_3DNOW)
1112 static void End3DNow( void )
1113 {
1114     __asm__ __volatile__( "femms" :: );
1115 }
1116 #endif
1117
1118 #ifdef CAN_COMPILE_C_ALTIVEC
1119 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1120                           const void *_p_s2, size_t i_bytes )
1121 {
1122     uint8_t *p_dest = (uint8_t *)_p_dest;
1123     uint8_t *p_s1   = (uint8_t *)_p_s1;
1124     uint8_t *p_s2   = (uint8_t *)_p_s2;
1125     uint8_t *p_end  = p_dest + i_bytes - 15;
1126
1127     /* Use C until the first 16-bytes aligned destination pixel */
1128     while( (uintptr_t)p_dest & 0xF )
1129     {
1130         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1131     }
1132
1133     if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1134     {
1135         /* Unaligned source */
1136         vector unsigned char s1v, s2v, destv;
1137         vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1138         vector unsigned char perm1v, perm2v;
1139
1140         perm1v = vec_lvsl( 0, p_s1 );
1141         perm2v = vec_lvsl( 0, p_s2 );
1142         s1oldv = vec_ld( 0, p_s1 );
1143         s2oldv = vec_ld( 0, p_s2 );
1144
1145         while( p_dest < p_end )
1146         {
1147             s1newv = vec_ld( 16, p_s1 );
1148             s2newv = vec_ld( 16, p_s2 );
1149             s1v    = vec_perm( s1oldv, s1newv, perm1v );
1150             s2v    = vec_perm( s2oldv, s2newv, perm2v );
1151             s1oldv = s1newv;
1152             s2oldv = s2newv;
1153             destv  = vec_avg( s1v, s2v );
1154             vec_st( destv, 0, p_dest );
1155
1156             p_s1   += 16;
1157             p_s2   += 16;
1158             p_dest += 16;
1159         }
1160     }
1161     else
1162     {
1163         /* Aligned source */
1164         vector unsigned char s1v, s2v, destv;
1165
1166         while( p_dest < p_end )
1167         {
1168             s1v   = vec_ld( 0, p_s1 );
1169             s2v   = vec_ld( 0, p_s2 );
1170             destv = vec_avg( s1v, s2v );
1171             vec_st( destv, 0, p_dest );
1172
1173             p_s1   += 16;
1174             p_s2   += 16;
1175             p_dest += 16;
1176         }
1177     }
1178
1179     p_end += 15;
1180
1181     while( p_dest < p_end )
1182     {
1183         *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1184     }
1185 }
1186 #endif
1187
1188 #ifdef __ARM_NEON__
1189 static void MergeNEON (void *restrict out, const void *in1,
1190                        const void *in2, size_t n)
1191 {
1192     uint8_t *outp = out;
1193     const uint8_t *in1p = in1;
1194     const uint8_t *in2p = in2;
1195     size_t mis = ((uintptr_t)outp) & 15;
1196
1197     if (mis)
1198     {
1199         MergeGeneric (outp, in1p, in2p, mis);
1200         outp += mis;
1201         in1p += mis;
1202         in2p += mis;
1203         n -= mis;
1204     }
1205
1206     uint8_t *end = outp + (n & ~15);
1207
1208     if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
1209         while (outp < end)
1210             asm volatile (
1211                 "vld1.u8  {q0-q1}, [%[in1]]!\n"
1212                 "vld1.u8  {q2-q3}, [%[in2]]!\n"
1213                 "vhadd.u8 q4, q0, q2\n"
1214                 "vld1.u8  {q6-q7}, [%[in1]]!\n"
1215                 "vhadd.u8 q5, q1, q3\n"
1216                 "vld1.u8  {q8-q9}, [%[in2]]!\n"
1217                 "vhadd.u8 q10, q6, q8\n"
1218                 "vhadd.u8 q11, q7, q9\n"
1219                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1220                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1221                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1222                 :
1223                 : "q0", "q1", "q2", "memory");
1224     else
1225          while (outp < end)
1226             asm volatile (
1227                 "vld1.u8  {q0-q1}, [%[in1],:128]!\n"
1228                 "vld1.u8  {q2-q3}, [%[in2],:128]!\n"
1229                 "vhadd.u8 q4, q0, q2\n"
1230                 "vld1.u8  {q6-q7}, [%[in1],:128]!\n"
1231                 "vhadd.u8 q5, q1, q3\n"
1232                 "vld1.u8  {q8-q9}, [%[in2],:128]!\n"
1233                 "vhadd.u8 q10, q6, q8\n"
1234                 "vhadd.u8 q11, q7, q9\n"
1235                 "vst1.u8  {q4-q5}, [%[out],:128]!\n"
1236                 "vst1.u8  {q10-q11}, [%[out],:128]!\n"
1237                 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1238                 :
1239                 : "q0", "q1", "q2", "memory");
1240     n &= 15;
1241     if (n)
1242         MergeGeneric (outp, in1p, in2p, n);
1243 }
1244 #endif
1245
1246 /*****************************************************************************
1247  * RenderX: This algo works on a 8x8 block basic, it copies the top field
1248  * and apply a process to recreate the bottom field :
1249  *  If a 8x8 block is classified as :
1250  *   - progressive: it applies a small blend (1,6,1)
1251  *   - interlaced:
1252  *    * in the MMX version: we do a ME between the 2 fields, if there is a
1253  *    good match we use MC to recreate the bottom field (with a small
1254  *    blend (1,6,1) )
1255  *    * otherwise: it recreates the bottom field by an edge oriented
1256  *    interpolation.
1257   *****************************************************************************/
1258
1259 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1260  * XXX: It need to access to 8x10
1261  * We use more than 8 lines to help with scrolling (text)
1262  * (and because XDeint8x8Frame use line 9)
1263  * XXX: smooth/uniform area with noise detection doesn't works well
1264  * but it's not really a problem because they don't have much details anyway
1265  */
1266 static inline int ssd( int a ) { return a*a; }
1267 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1268 {
1269     int y, x;
1270     int ff, fr;
1271     int fc;
1272
1273     /* Detect interlacing */
1274     fc = 0;
1275     for( y = 0; y < 7; y += 2 )
1276     {
1277         ff = fr = 0;
1278         for( x = 0; x < 8; x++ )
1279         {
1280             fr += ssd(src[      x] - src[1*i_src+x]) +
1281                   ssd(src[i_src+x] - src[2*i_src+x]);
1282             ff += ssd(src[      x] - src[2*i_src+x]) +
1283                   ssd(src[i_src+x] - src[3*i_src+x]);
1284         }
1285         if( ff < 6*fr/8 && fr > 32 )
1286             fc++;
1287
1288         src += 2*i_src;
1289     }
1290
1291     return fc < 1 ? false : true;
1292 }
1293 #ifdef CAN_COMPILE_MMXEXT
1294 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1295 {
1296
1297     int y, x;
1298     int32_t ff, fr;
1299     int fc;
1300
1301     /* Detect interlacing */
1302     fc = 0;
1303     pxor_r2r( mm7, mm7 );
1304     for( y = 0; y < 9; y += 2 )
1305     {
1306         ff = fr = 0;
1307         pxor_r2r( mm5, mm5 );
1308         pxor_r2r( mm6, mm6 );
1309         for( x = 0; x < 8; x+=4 )
1310         {
1311             movd_m2r( src[        x], mm0 );
1312             movd_m2r( src[1*i_src+x], mm1 );
1313             movd_m2r( src[2*i_src+x], mm2 );
1314             movd_m2r( src[3*i_src+x], mm3 );
1315
1316             punpcklbw_r2r( mm7, mm0 );
1317             punpcklbw_r2r( mm7, mm1 );
1318             punpcklbw_r2r( mm7, mm2 );
1319             punpcklbw_r2r( mm7, mm3 );
1320
1321             movq_r2r( mm0, mm4 );
1322
1323             psubw_r2r( mm1, mm0 );
1324             psubw_r2r( mm2, mm4 );
1325
1326             psubw_r2r( mm1, mm2 );
1327             psubw_r2r( mm1, mm3 );
1328
1329             pmaddwd_r2r( mm0, mm0 );
1330             pmaddwd_r2r( mm4, mm4 );
1331             pmaddwd_r2r( mm2, mm2 );
1332             pmaddwd_r2r( mm3, mm3 );
1333             paddd_r2r( mm0, mm2 );
1334             paddd_r2r( mm4, mm3 );
1335             paddd_r2r( mm2, mm5 );
1336             paddd_r2r( mm3, mm6 );
1337         }
1338
1339         movq_r2r( mm5, mm0 );
1340         psrlq_i2r( 32, mm0 );
1341         paddd_r2r( mm0, mm5 );
1342         movd_r2m( mm5, fr );
1343
1344         movq_r2r( mm6, mm0 );
1345         psrlq_i2r( 32, mm0 );
1346         paddd_r2r( mm0, mm6 );
1347         movd_r2m( mm6, ff );
1348
1349         if( ff < 6*fr/8 && fr > 32 )
1350             fc++;
1351
1352         src += 2*i_src;
1353     }
1354     return fc;
1355 }
1356 #endif
1357
1358 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1359                                     uint8_t *src1, int i_src1,
1360                                     uint8_t *src2, int i_src2 )
1361 {
1362     int y, x;
1363
1364     /* Progressive */
1365     for( y = 0; y < 8; y += 2 )
1366     {
1367         memcpy( dst, src1, 8 );
1368         dst  += i_dst;
1369
1370         for( x = 0; x < 8; x++ )
1371             dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1372         dst += i_dst;
1373
1374         src1 += i_src1;
1375         src2 += i_src2;
1376     }
1377 }
1378
1379 #ifdef CAN_COMPILE_MMXEXT
1380 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1381                                          uint8_t *src1, int i_src1,
1382                                          uint8_t *src2, int i_src2 )
1383 {
1384     static const uint64_t m_4 = INT64_C(0x0004000400040004);
1385     int y, x;
1386
1387     /* Progressive */
1388     pxor_r2r( mm7, mm7 );
1389     for( y = 0; y < 8; y += 2 )
1390     {
1391         for( x = 0; x < 8; x +=4 )
1392         {
1393             movd_m2r( src1[x], mm0 );
1394             movd_r2m( mm0, dst[x] );
1395
1396             movd_m2r( src2[x], mm1 );
1397             movd_m2r( src1[i_src1+x], mm2 );
1398
1399             punpcklbw_r2r( mm7, mm0 );
1400             punpcklbw_r2r( mm7, mm1 );
1401             punpcklbw_r2r( mm7, mm2 );
1402             paddw_r2r( mm1, mm1 );
1403             movq_r2r( mm1, mm3 );
1404             paddw_r2r( mm3, mm3 );
1405             paddw_r2r( mm2, mm0 );
1406             paddw_r2r( mm3, mm1 );
1407             paddw_m2r( m_4, mm1 );
1408             paddw_r2r( mm1, mm0 );
1409             psraw_i2r( 3, mm0 );
1410             packuswb_r2r( mm7, mm0 );
1411             movd_r2m( mm0, dst[i_dst+x] );
1412         }
1413         dst += 2*i_dst;
1414         src1 += i_src1;
1415         src2 += i_src2;
1416     }
1417 }
1418
1419 #endif
1420
1421 /* For debug */
1422 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1423 {
1424     int y;
1425     for( y = 0; y < 8; y++ )
1426         memset( &dst[y*i_dst], v, 8 );
1427 }
1428
1429 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1430  * neighbour
1431  * (Use 8x9 pixels)
1432  * TODO: a better one for the inner part.
1433  */
1434 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1435                                      uint8_t *src, int i_src )
1436 {
1437     int y, x;
1438
1439     /* Interlaced */
1440     for( y = 0; y < 8; y += 2 )
1441     {
1442         memcpy( dst, src, 8 );
1443         dst += i_dst;
1444
1445         for( x = 0; x < 8; x++ )
1446             dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1447         dst += 1*i_dst;
1448         src += 2*i_src;
1449     }
1450 }
1451 #ifdef CAN_COMPILE_MMXEXT
1452 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1453                                           uint8_t *src, int i_src )
1454 {
1455     int y;
1456
1457     /* Interlaced */
1458     for( y = 0; y < 8; y += 2 )
1459     {
1460         movq_m2r( src[0], mm0 );
1461         movq_r2m( mm0, dst[0] );
1462         dst += i_dst;
1463
1464         movq_m2r( src[2*i_src], mm1 );
1465         pavgb_r2r( mm1, mm0 );
1466
1467         movq_r2m( mm0, dst[0] );
1468
1469         dst += 1*i_dst;
1470         src += 2*i_src;
1471     }
1472 }
1473 #endif
1474
1475 /* XDeint8x8Field: Edge oriented interpolation
1476  * (Need -4 and +5 pixels H, +1 line)
1477  */
1478 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1479                                     uint8_t *src, int i_src )
1480 {
1481     int y, x;
1482
1483     /* Interlaced */
1484     for( y = 0; y < 8; y += 2 )
1485     {
1486         memcpy( dst, src, 8 );
1487         dst += i_dst;
1488
1489         for( x = 0; x < 8; x++ )
1490         {
1491             uint8_t *src2 = &src[2*i_src];
1492             /* I use 8 pixels just to match the MMX version, but it's overkill
1493              * 5 would be enough (less isn't good) */
1494             const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1495                            abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1496                            abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1497                            abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1498
1499             const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1500                            abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1501                            abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1502                            abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1503
1504             const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1505                            abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1506                            abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1507                            abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1508
1509             if( c0 < c1 && c1 <= c2 )
1510                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1511             else if( c2 < c1 && c1 <= c0 )
1512                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1513             else
1514                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1515         }
1516
1517         dst += 1*i_dst;
1518         src += 2*i_src;
1519     }
1520 }
1521 #ifdef CAN_COMPILE_MMXEXT
1522 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1523                                          uint8_t *src, int i_src )
1524 {
1525     int y, x;
1526
1527     /* Interlaced */
1528     for( y = 0; y < 8; y += 2 )
1529     {
1530         memcpy( dst, src, 8 );
1531         dst += i_dst;
1532
1533         for( x = 0; x < 8; x++ )
1534         {
1535             uint8_t *src2 = &src[2*i_src];
1536             int32_t c0, c1, c2;
1537
1538             movq_m2r( src[x-2], mm0 );
1539             movq_m2r( src[x-3], mm1 );
1540             movq_m2r( src[x-4], mm2 );
1541
1542             psadbw_m2r( src2[x-4], mm0 );
1543             psadbw_m2r( src2[x-3], mm1 );
1544             psadbw_m2r( src2[x-2], mm2 );
1545
1546             movd_r2m( mm0, c2 );
1547             movd_r2m( mm1, c1 );
1548             movd_r2m( mm2, c0 );
1549
1550             if( c0 < c1 && c1 <= c2 )
1551                 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1552             else if( c2 < c1 && c1 <= c0 )
1553                 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1554             else
1555                 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1556         }
1557
1558         dst += 1*i_dst;
1559         src += 2*i_src;
1560     }
1561 }
1562 #endif
1563
1564 /* NxN arbitray size (and then only use pixel in the NxN block)
1565  */
1566 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1567                                    int i_height, int i_width )
1568 {
1569     int y, x;
1570     int ff, fr;
1571     int fc;
1572
1573
1574     /* Detect interlacing */
1575     /* FIXME way too simple, need to be more like XDeint8x8Detect */
1576     ff = fr = 0;
1577     fc = 0;
1578     for( y = 0; y < i_height - 2; y += 2 )
1579     {
1580         const uint8_t *s = &src[y*i_src];
1581         for( x = 0; x < i_width; x++ )
1582         {
1583             fr += ssd(s[      x] - s[1*i_src+x]);
1584             ff += ssd(s[      x] - s[2*i_src+x]);
1585         }
1586         if( ff < fr && fr > i_width / 2 )
1587             fc++;
1588     }
1589
1590     return fc < 2 ? false : true;
1591 }
1592
1593 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1594                                    uint8_t *src, int i_src,
1595                                    int i_width, int i_height )
1596 {
1597     int y, x;
1598
1599     /* Progressive */
1600     for( y = 0; y < i_height; y += 2 )
1601     {
1602         memcpy( dst, src, i_width );
1603         dst += i_dst;
1604
1605         if( y < i_height - 2 )
1606         {
1607             for( x = 0; x < i_width; x++ )
1608                 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1609         }
1610         else
1611         {
1612             /* Blend last line */
1613             for( x = 0; x < i_width; x++ )
1614                 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1615         }
1616         dst += 1*i_dst;
1617         src += 2*i_src;
1618     }
1619 }
1620
1621 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1622                                    uint8_t *src, int i_src,
1623                                    int i_width, int i_height )
1624 {
1625     int y, x;
1626
1627     /* Interlaced */
1628     for( y = 0; y < i_height; y += 2 )
1629     {
1630         memcpy( dst, src, i_width );
1631         dst += i_dst;
1632
1633         if( y < i_height - 2 )
1634         {
1635             for( x = 0; x < i_width; x++ )
1636                 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1637         }
1638         else
1639         {
1640             /* Blend last line */
1641             for( x = 0; x < i_width; x++ )
1642                 dst[x] = (src[x] + src[i_src+x]) >> 1;
1643         }
1644         dst += 1*i_dst;
1645         src += 2*i_src;
1646     }
1647 }
1648
1649 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1650                               int i_width, int i_height )
1651 {
1652     if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1653         XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1654     else
1655         XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1656 }
1657
1658
1659 static inline int median( int a, int b, int c )
1660 {
1661     int min = a, max =a;
1662     if( b < min )
1663         min = b;
1664     else
1665         max = b;
1666
1667     if( c < min )
1668         min = c;
1669     else if( c > max )
1670         max = c;
1671
1672     return a + b + c - min - max;
1673 }
1674
1675
1676 /* XDeintBand8x8:
1677  */
1678 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1679                                    uint8_t *src, int i_src,
1680                                    const int i_mbx, int i_modx )
1681 {
1682     int x;
1683
1684     for( x = 0; x < i_mbx; x++ )
1685     {
1686         int s;
1687         if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1688         {
1689             if( x == 0 || x == i_mbx - 1 )
1690                 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1691             else
1692                 XDeint8x8FieldC( dst, i_dst, src, i_src );
1693         }
1694         else
1695         {
1696             XDeint8x8MergeC( dst, i_dst,
1697                              &src[0*i_src], 2*i_src,
1698                              &src[1*i_src], 2*i_src );
1699         }
1700
1701         dst += 8;
1702         src += 8;
1703     }
1704
1705     if( i_modx )
1706         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1707 }
1708 #ifdef CAN_COMPILE_MMXEXT
1709 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1710                                         uint8_t *src, int i_src,
1711                                         const int i_mbx, int i_modx )
1712 {
1713     int x;
1714
1715     /* Reset current line */
1716     for( x = 0; x < i_mbx; x++ )
1717     {
1718         int s;
1719         if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1720         {
1721             if( x == 0 || x == i_mbx - 1 )
1722                 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1723             else
1724                 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1725         }
1726         else
1727         {
1728             XDeint8x8MergeMMXEXT( dst, i_dst,
1729                                   &src[0*i_src], 2*i_src,
1730                                   &src[1*i_src], 2*i_src );
1731         }
1732
1733         dst += 8;
1734         src += 8;
1735     }
1736
1737     if( i_modx )
1738         XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1739 }
1740 #endif
1741
1742 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1743 {
1744     int i_plane;
1745
1746     /* Copy image and skip lines */
1747     for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1748     {
1749         const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1750         const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1751
1752         const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1753         const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1754
1755         const int i_dst = p_outpic->p[i_plane].i_pitch;
1756         const int i_src = p_pic->p[i_plane].i_pitch;
1757
1758         int y, x;
1759
1760         for( y = 0; y < i_mby; y++ )
1761         {
1762             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1763             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1764
1765 #ifdef CAN_COMPILE_MMXEXT
1766             if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1767                 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1768             else
1769 #endif
1770                 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1771         }
1772
1773         /* Last line (C only)*/
1774         if( i_mody )
1775         {
1776             uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1777             uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1778
1779             for( x = 0; x < i_mbx; x++ )
1780             {
1781                 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1782
1783                 dst += 8;
1784                 src += 8;
1785             }
1786
1787             if( i_modx )
1788                 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1789         }
1790     }
1791
1792 #ifdef CAN_COMPILE_MMXEXT
1793     if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1794         emms();
1795 #endif
1796 }
1797
1798 /*****************************************************************************
1799  * Yadif (Yet Another DeInterlacing Filter).
1800  *****************************************************************************/
1801 /* */
1802 struct vf_priv_s {
1803     /*
1804      * 0: Output 1 frame for each frame.
1805      * 1: Output 1 frame for each field.
1806      * 2: Like 0 but skips spatial interlacing check.
1807      * 3: Like 1 but skips spatial interlacing check.
1808      *
1809      * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1810      */
1811     int mode;
1812 };
1813
1814 /* I am unsure it is the right one */
1815 typedef intptr_t x86_reg;
1816
1817 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1818 #define FFMAX(a,b)      __MAX(a,b)
1819 #define FFMAX3(a,b,c)   FFMAX(FFMAX(a,b),c)
1820 #define FFMIN(a,b)      __MIN(a,b)
1821 #define FFMIN3(a,b,c)   FFMIN(FFMIN(a,b),c)
1822
1823 /* yadif.h comes from vf_yadif.c of mplayer project */
1824 #include "yadif.h"
1825
1826 static void RenderYadif( vout_thread_t *p_vout, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1827 {
1828     vout_sys_t *p_sys = p_vout->p_sys;
1829
1830     /* */
1831     assert( i_order == 0 || i_order == 1 );
1832     assert( i_field == 0 || i_field == 1 );
1833
1834     if( i_order == 0 )
1835     {
1836         /* Duplicate the picture
1837          * TODO when the vout rework is finished, picture_Hold() might be enough
1838          * but becarefull, the pitches must match */
1839         picture_t *p_dup = picture_NewFromFormat( &p_src->format );
1840         if( p_dup )
1841             picture_Copy( p_dup, p_src );
1842
1843         /* Slide the history */
1844         if( p_sys->pp_history[0] )
1845             picture_Release( p_sys->pp_history[0]  );
1846         for( int i = 1; i < HISTORY_SIZE; i++ )
1847             p_sys->pp_history[i-1] = p_sys->pp_history[i];
1848         p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1849     }
1850
1851     /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1852     picture_t *p_prev = p_sys->pp_history[0];
1853     picture_t *p_cur  = p_sys->pp_history[1];
1854     picture_t *p_next = p_sys->pp_history[2];
1855
1856     /* Filter if we have all the pictures we need */
1857     if( p_prev && p_cur && p_next )
1858     {
1859         /* */
1860         void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1861 #if defined(HAVE_YADIF_SSE2)
1862         if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1863             filter = yadif_filter_line_mmx2;
1864         else
1865 #endif
1866             filter = yadif_filter_line_c;
1867
1868         for( int n = 0; n < p_dst->i_planes; n++ )
1869         {
1870             const plane_t *prevp = &p_prev->p[n];
1871             const plane_t *curp  = &p_cur->p[n];
1872             const plane_t *nextp = &p_next->p[n];
1873             plane_t *dstp        = &p_dst->p[n];
1874
1875             for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1876             {
1877                 if( (y % 2) == i_field )
1878                 {
1879                     vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1880                                 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1881                 }
1882                 else
1883                 {
1884                     struct vf_priv_s cfg;
1885                     /* Spatial checks only when enough data */
1886                     cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1887
1888                     assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1889                     filter( &cfg,
1890                             &dstp->p_pixels[y * dstp->i_pitch],
1891                             &prevp->p_pixels[y * prevp->i_pitch],
1892                             &curp->p_pixels[y * curp->i_pitch],
1893                             &nextp->p_pixels[y * nextp->i_pitch],
1894                             dstp->i_visible_pitch,
1895                             curp->i_pitch,
1896                             (i_field ^ (i_order == i_field)) & 1 );
1897                 }
1898
1899                 /* We duplicate the first and last lines */
1900                 if( y == 1 )
1901                     vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1902                 else if( y == dstp->i_visible_lines - 2 )
1903                     vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1904             }
1905         }
1906
1907         /* */
1908         p_dst->date = (p_next->date - p_cur->date) * i_order / 2 + p_cur->date;
1909     }
1910     else
1911     {
1912         /* Fallback to something simple
1913          * XXX it is wrong when we have 2 pictures, we should not output a picture */
1914         RenderX( p_dst, p_src );
1915     }
1916 }
1917
1918 /*****************************************************************************
1919  * FilterCallback: called when changing the deinterlace method on the fly.
1920  *****************************************************************************/
1921 static int FilterCallback( vlc_object_t *p_this, char const *psz_cmd,
1922                            vlc_value_t oldval, vlc_value_t newval,
1923                            void *p_data )
1924 {
1925     VLC_UNUSED(psz_cmd); VLC_UNUSED(p_data); VLC_UNUSED(oldval);
1926     vout_thread_t * p_vout = (vout_thread_t *)p_this;
1927     vout_sys_t *p_sys = p_vout->p_sys;
1928
1929     msg_Dbg( p_vout, "using %s deinterlace mode", newval.psz_string );
1930
1931     vlc_mutex_lock( &p_sys->filter_lock );
1932     const bool b_old_half_height = p_sys->b_half_height;
1933
1934     SetFilterMethod( p_vout, newval.psz_string );
1935
1936     if( !b_old_half_height == !p_sys->b_half_height )
1937     {
1938         vlc_mutex_unlock( &p_sys->filter_lock );
1939         return VLC_SUCCESS;
1940     }
1941
1942     /* We need to kill the old vout */
1943     if( p_sys->p_vout )
1944     {
1945         vout_filter_DelChild( p_vout, p_sys->p_vout, MouseEvent );
1946         vout_CloseAndRelease( p_sys->p_vout );
1947     }
1948
1949     /* Try to open a new video output */
1950     p_sys->p_vout = SpawnRealVout( p_vout );
1951
1952     if( p_sys->p_vout == NULL )
1953     {
1954         /* Everything failed */
1955         msg_Err( p_vout, "cannot open vout, aborting" );
1956
1957         vlc_mutex_unlock( &p_sys->filter_lock );
1958         return VLC_EGENERIC;
1959     }
1960
1961     vout_filter_AddChild( p_vout, p_sys->p_vout, MouseEvent );
1962
1963     vlc_mutex_unlock( &p_sys->filter_lock );
1964     return VLC_SUCCESS;
1965 }
1966
1967 /*****************************************************************************
1968  * video filter2 functions
1969  *****************************************************************************/
1970 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1971 {
1972     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
1973     picture_t *p_pic_dst;
1974
1975     /* Request output picture */
1976     p_pic_dst = filter_NewPicture( p_filter );
1977     if( p_pic_dst == NULL )
1978     {
1979         picture_Release( p_pic );
1980         return NULL;
1981     }
1982
1983     switch( p_vout->p_sys->i_mode )
1984     {
1985         case DEINTERLACE_DISCARD:
1986             RenderDiscard( p_vout, p_pic_dst, p_pic, 0 );
1987             break;
1988
1989         case DEINTERLACE_BOB:
1990 #if 0
1991             RenderBob( p_vout, pp_outpic[0], p_pic, 0 );
1992             RenderBob( p_vout, pp_outpic[1], p_pic, 1 );
1993             break;
1994 #endif
1995
1996         case DEINTERLACE_LINEAR:
1997 #if 0
1998             RenderLinear( p_vout, pp_outpic[0], p_pic, 0 );
1999             RenderLinear( p_vout, pp_outpic[1], p_pic, 1 );
2000 #endif
2001             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2002             picture_Release( p_pic_dst );
2003             picture_Release( p_pic );
2004             return NULL;
2005
2006         case DEINTERLACE_MEAN:
2007             RenderMean( p_vout, p_pic_dst, p_pic );
2008             break;
2009
2010         case DEINTERLACE_BLEND:
2011             RenderBlend( p_vout, p_pic_dst, p_pic );
2012             break;
2013
2014         case DEINTERLACE_X:
2015             RenderX( p_pic_dst, p_pic );
2016             break;
2017
2018         case DEINTERLACE_YADIF:
2019             msg_Err( p_vout, "delaying frames is not supported yet" );
2020             picture_Release( p_pic_dst );
2021             picture_Release( p_pic );
2022             return NULL;
2023
2024         case DEINTERLACE_YADIF2X:
2025             msg_Err( p_vout, "doubling the frame rate is not supported yet" );
2026             picture_Release( p_pic_dst );
2027             picture_Release( p_pic );
2028             return NULL;
2029     }
2030
2031     picture_CopyProperties( p_pic_dst, p_pic );
2032     p_pic_dst->b_progressive = true;
2033
2034     picture_Release( p_pic );
2035     return p_pic_dst;
2036 }
2037
2038 /*****************************************************************************
2039  * OpenFilter:
2040  *****************************************************************************/
2041 static int OpenFilter( vlc_object_t *p_this )
2042 {
2043     filter_t *p_filter = (filter_t*)p_this;
2044     vout_thread_t *p_vout;
2045     vlc_value_t val;
2046
2047     if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
2048         return VLC_EGENERIC;
2049
2050     /* Impossible to use VLC_OBJECT_VOUT here because it would be used
2051      * by spu filters */
2052     p_vout = vlc_object_create( p_filter, sizeof(vout_thread_t) );
2053     vlc_object_attach( p_vout, p_filter );
2054     p_filter->p_sys = (filter_sys_t *)p_vout;
2055     p_vout->render.i_chroma = p_filter->fmt_in.video.i_chroma;
2056
2057     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2058                    p_filter->p_cfg );
2059     var_Get( p_filter, FILTER_CFG_PREFIX "mode", &val );
2060
2061     var_Create( p_filter, "filter-deinterlace-mode", VLC_VAR_STRING );
2062     var_Set( p_filter, "filter-deinterlace-mode", val );
2063     free( val.psz_string );
2064
2065     if( Create( VLC_OBJECT(p_vout) ) != VLC_SUCCESS )
2066     {
2067         vlc_object_detach( p_vout );
2068         vlc_object_release( p_vout );
2069         return VLC_EGENERIC;
2070     }
2071
2072     video_format_t fmt;
2073     GetOutputFormat( p_vout, &fmt, &p_filter->fmt_in.video );
2074     if( !p_filter->b_allow_fmt_out_change &&
2075         ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
2076           fmt.i_height != p_filter->fmt_in.video.i_height ) )
2077     {
2078         CloseFilter( VLC_OBJECT(p_filter) );
2079         return VLC_EGENERIC;
2080     }
2081     p_filter->fmt_out.video = fmt;
2082     p_filter->fmt_out.i_codec = fmt.i_chroma;
2083     p_filter->pf_video_filter = Deinterlace;
2084
2085     msg_Dbg( p_filter, "deinterlacing" );
2086
2087     return VLC_SUCCESS;
2088 }
2089
2090 /*****************************************************************************
2091  * CloseFilter: clean up the filter
2092  *****************************************************************************/
2093 static void CloseFilter( vlc_object_t *p_this )
2094 {
2095     filter_t *p_filter = (filter_t*)p_this;
2096     vout_thread_t *p_vout = (vout_thread_t *)p_filter->p_sys;
2097
2098     Destroy( VLC_OBJECT(p_vout) );
2099     vlc_object_detach( p_vout );
2100     vlc_object_release( p_vout );
2101 }
2102