1 /*****************************************************************************
2 * deinterlace.c : deinterlacer plugin for vlc
3 *****************************************************************************
4 * Copyright (C) 2000-2009 the VideoLAN team
7 * Author: Sam Hocevar <sam@zoy.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
40 #include <vlc_filter.h>
43 #ifdef CAN_COMPILE_MMXEXT
47 #define DEINTERLACE_DISCARD 1
48 #define DEINTERLACE_MEAN 2
49 #define DEINTERLACE_BLEND 3
50 #define DEINTERLACE_BOB 4
51 #define DEINTERLACE_LINEAR 5
52 #define DEINTERLACE_X 6
53 #define DEINTERLACE_YADIF 7
54 #define DEINTERLACE_YADIF2X 8
56 /*****************************************************************************
58 *****************************************************************************/
59 static int Open ( vlc_object_t * );
60 static void Close( vlc_object_t * );
62 #define MODE_TEXT N_("Deinterlace mode")
63 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
65 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
66 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
68 #define FILTER_CFG_PREFIX "sout-deinterlace-"
70 static const char *const mode_list[] = {
71 "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
72 static const char *const mode_list_text[] = {
73 N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
76 set_description( N_("Deinterlacing video filter") )
77 set_shortname( N_("Deinterlace" ))
78 set_capability( "video filter2", 0 )
79 set_category( CAT_VIDEO )
80 set_subcategory( SUBCAT_VIDEO_VFILTER )
82 add_string( FILTER_CFG_PREFIX "mode", "blend", SOUT_MODE_TEXT,
83 SOUT_MODE_LONGTEXT, false )
84 change_string_list( mode_list, mode_list_text, 0 )
86 add_shortcut( "deinterlace" )
87 set_callbacks( Open, Close )
91 /*****************************************************************************
93 *****************************************************************************/
94 static void RenderDiscard( filter_t *, picture_t *, picture_t *, int );
95 static void RenderBob ( filter_t *, picture_t *, picture_t *, int );
96 static void RenderMean ( filter_t *, picture_t *, picture_t * );
97 static void RenderBlend ( filter_t *, picture_t *, picture_t * );
98 static void RenderLinear ( filter_t *, picture_t *, picture_t *, int );
99 static void RenderX ( picture_t *, picture_t * );
100 static int RenderYadif ( filter_t *, picture_t *, picture_t *, int, int );
102 static void MergeGeneric ( void *, const void *, const void *, size_t );
103 #if defined(CAN_COMPILE_C_ALTIVEC)
104 static void MergeAltivec ( void *, const void *, const void *, size_t );
106 #if defined(CAN_COMPILE_MMXEXT)
107 static void MergeMMXEXT ( void *, const void *, const void *, size_t );
109 #if defined(CAN_COMPILE_3DNOW)
110 static void Merge3DNow ( void *, const void *, const void *, size_t );
112 #if defined(CAN_COMPILE_SSE)
113 static void MergeSSE2 ( void *, const void *, const void *, size_t );
115 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
116 static void EndMMX ( void );
118 #if defined(CAN_COMPILE_3DNOW)
119 static void End3DNow ( void );
121 #if defined __ARM_NEON__
122 static void MergeNEON (void *, const void *, const void *, size_t);
125 static const char *const ppsz_filter_options[] = {
129 /* Used for framerate doublers */
130 #define METADATA_SIZE (3)
132 mtime_t pi_date[METADATA_SIZE];
133 int pi_nb_fields[METADATA_SIZE];
134 bool pb_top_field_first[METADATA_SIZE];
135 } metadata_history_t;
137 #define HISTORY_SIZE (3)
138 #define CUSTOM_PTS -1
141 int i_mode; /* Deinterlace mode */
142 bool b_double_rate; /* Shall we double the framerate? */
143 bool b_half_height; /* Shall be divide the height by 2 */
145 void (*pf_merge) ( void *, const void *, const void *, size_t );
146 void (*pf_end_merge) ( void );
148 /* Metadata history (PTS, nb_fields, TFF). Used for framerate doublers. */
149 metadata_history_t meta;
151 /* Output frame timing / framerate doubler control (see below) */
155 picture_t *pp_history[HISTORY_SIZE];
158 /* NOTE on i_frame_offset:
160 This value indicates the offset between input and output frames in the currently active deinterlace algorithm.
161 See the rationale below for why this is needed and how it is used.
163 Valid range: 0 <= i_frame_offset < METADATA_SIZE, or i_frame_offset = CUSTOM_PTS.
164 The special value CUSTOM_PTS is only allowed if b_double_rate is false.
166 If CUSTOM_PTS is used, the algorithm must compute the outgoing PTSs itself,
167 and additionally, read the TFF/BFF information itself (if it needs it)
168 from the incoming frames.
171 0 = output frame corresponds to the current input frame
172 (no frame offset; default if not set),
173 1 = output frame corresponds to the previous input frame
174 (e.g. Yadif and Yadif2x work like this),
177 If necessary, i_frame_offset should be updated by the active deinterlace algorithm
178 to indicate the correct delay for the *next* input frame. It does not matter at which i_order
179 the algorithm updates this information, but the new value will only take effect upon the
180 next call to Deinterlace() (i.e. at the next incoming frame).
182 The first-ever frame that arrives to the filter after Open() is always handled as having
183 i_frame_offset = 0. For the second and all subsequent frames, each algorithm is responsible
184 for setting the offset correctly. (The default is 0, so if that is correct, there's no need
187 This solution guarantees that i_frame_offset:
188 1) is up to date at the start of each frame,
189 2) does not change (as far as Deinterlace() is concerned) during a frame, and
190 3) does not need a special API for setting the value at the start of each input frame,
191 before the algorithm starts rendering the (first) output frame for that input frame.
193 The deinterlace algorithm is allowed to behave differently for different input frames.
194 This is especially important for startup, when full history (as defined by each algorithm)
195 is not yet available. During the first-ever input frame, it is clear that it is the
196 only possible source for information, so i_frame_offset = 0 is necessarily correct.
197 After that, what to do is up to each algorithm.
199 Having the correct offset at the start of each input frame is critically important in order to:
200 1) Allocate the correct number of output frames for framerate doublers, and to
201 2) Pass correct TFF/BFF information to the algorithm.
203 These points are important for proper soft field repeat support. This feature is used in some
204 streams originating from film. In soft NTSC telecine, the number of fields alternates as 3,2,3,2,...
205 and the video field dominance flips every two frames (after every "3"). Also, some streams
206 request an occasional field repeat (nb_fields = 3), after which the video field dominance flips.
207 To render such streams correctly, the nb_fields and TFF/BFF information must be taken from
208 the specific input frame that the algorithm intends to render.
210 Additionally, the output PTS is automatically computed by Deinterlace() from i_frame_offset and i_order.
212 It is possible to use the special value CUSTOM_PTS to indicate that the algorithm computes
213 the output PTSs itself. In this case, Deinterlace() will pass them through. This special value
214 is not valid for framerate doublers, as by definition they are field renderers, so they need to
215 use the original field timings to work correctly. Basically, this special value is only intended
216 for algorithms that need to perform nontrivial framerate conversions (such as IVTC).
220 /*****************************************************************************
221 * SetFilterMethod: setup the deinterlace method to use.
222 *****************************************************************************/
223 static void SetFilterMethod( filter_t *p_filter, const char *psz_method, vlc_fourcc_t i_chroma )
225 filter_sys_t *p_sys = p_filter->p_sys;
230 if( !strcmp( psz_method, "mean" ) )
232 p_sys->i_mode = DEINTERLACE_MEAN;
233 p_sys->b_double_rate = false;
234 p_sys->b_half_height = true;
236 else if( !strcmp( psz_method, "bob" )
237 || !strcmp( psz_method, "progressive-scan" ) )
239 p_sys->i_mode = DEINTERLACE_BOB;
240 p_sys->b_double_rate = true;
241 p_sys->b_half_height = false;
243 else if( !strcmp( psz_method, "linear" ) )
245 p_sys->i_mode = DEINTERLACE_LINEAR;
246 p_sys->b_double_rate = true;
247 p_sys->b_half_height = false;
249 else if( !strcmp( psz_method, "x" ) )
251 p_sys->i_mode = DEINTERLACE_X;
252 p_sys->b_double_rate = false;
253 p_sys->b_half_height = false;
255 else if( !strcmp( psz_method, "yadif" ) )
257 p_sys->i_mode = DEINTERLACE_YADIF;
258 p_sys->b_double_rate = false;
259 p_sys->b_half_height = false;
261 else if( !strcmp( psz_method, "yadif2x" ) )
263 p_sys->i_mode = DEINTERLACE_YADIF2X;
264 p_sys->b_double_rate = true;
265 p_sys->b_half_height = false;
267 else if( !strcmp( psz_method, "discard" ) )
269 const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
270 i_chroma == VLC_CODEC_J422;
272 p_sys->i_mode = DEINTERLACE_DISCARD;
273 p_sys->b_double_rate = false;
274 p_sys->b_half_height = !b_i422;
278 if( strcmp( psz_method, "blend" ) )
280 "no valid deinterlace mode provided, using \"blend\"" );
282 p_sys->i_mode = DEINTERLACE_BLEND;
283 p_sys->b_double_rate = false;
284 p_sys->b_half_height = false;
287 p_sys->i_frame_offset = 0; /* reset to default when method changes */
289 msg_Dbg( p_filter, "using %s deinterlace method", psz_method );
292 static void GetOutputFormat( filter_t *p_filter,
293 video_format_t *p_dst, const video_format_t *p_src )
295 filter_sys_t *p_sys = p_filter->p_sys;
298 if( p_sys->b_half_height )
300 p_dst->i_height /= 2;
301 p_dst->i_visible_height /= 2;
302 p_dst->i_y_offset /= 2;
303 p_dst->i_sar_den *= 2;
306 if( p_src->i_chroma == VLC_CODEC_I422 ||
307 p_src->i_chroma == VLC_CODEC_J422 )
309 switch( p_sys->i_mode )
311 case DEINTERLACE_MEAN:
312 case DEINTERLACE_LINEAR:
314 case DEINTERLACE_YADIF:
315 case DEINTERLACE_YADIF2X:
316 p_dst->i_chroma = p_src->i_chroma;
319 p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
326 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
328 return i_chroma == VLC_CODEC_I420 ||
329 i_chroma == VLC_CODEC_J420 ||
330 i_chroma == VLC_CODEC_YV12 ||
331 i_chroma == VLC_CODEC_I422 ||
332 i_chroma == VLC_CODEC_J422;
335 /*****************************************************************************
336 * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
337 *****************************************************************************/
338 static void RenderDiscard( filter_t *p_filter,
339 picture_t *p_outpic, picture_t *p_pic, int i_field )
343 /* Copy image and skip lines */
344 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
346 uint8_t *p_in, *p_out_end, *p_out;
349 p_in = p_pic->p[i_plane].p_pixels
350 + i_field * p_pic->p[i_plane].i_pitch;
352 p_out = p_outpic->p[i_plane].p_pixels;
353 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
354 * p_outpic->p[i_plane].i_visible_lines;
356 switch( p_filter->fmt_in.video.i_chroma )
362 for( ; p_out < p_out_end ; )
364 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
366 p_out += p_outpic->p[i_plane].i_pitch;
367 p_in += 2 * p_pic->p[i_plane].i_pitch;
374 i_increment = 2 * p_pic->p[i_plane].i_pitch;
376 if( i_plane == Y_PLANE )
378 for( ; p_out < p_out_end ; )
380 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
381 p_out += p_outpic->p[i_plane].i_pitch;
382 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
383 p_out += p_outpic->p[i_plane].i_pitch;
389 for( ; p_out < p_out_end ; )
391 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
392 p_out += p_outpic->p[i_plane].i_pitch;
404 /*****************************************************************************
405 * RenderBob: renders a BOB picture - simple copy
406 *****************************************************************************/
407 static void RenderBob( filter_t *p_filter,
408 picture_t *p_outpic, picture_t *p_pic, int i_field )
412 /* Copy image and skip lines */
413 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
415 uint8_t *p_in, *p_out_end, *p_out;
417 p_in = p_pic->p[i_plane].p_pixels;
418 p_out = p_outpic->p[i_plane].p_pixels;
419 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
420 * p_outpic->p[i_plane].i_visible_lines;
422 switch( p_filter->fmt_in.video.i_chroma )
427 /* For BOTTOM field we need to add the first line */
430 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
431 p_in += p_pic->p[i_plane].i_pitch;
432 p_out += p_outpic->p[i_plane].i_pitch;
435 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
437 for( ; p_out < p_out_end ; )
439 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
441 p_out += p_outpic->p[i_plane].i_pitch;
443 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
445 p_in += 2 * p_pic->p[i_plane].i_pitch;
446 p_out += p_outpic->p[i_plane].i_pitch;
449 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
451 /* For TOP field we need to add the last line */
454 p_in += p_pic->p[i_plane].i_pitch;
455 p_out += p_outpic->p[i_plane].i_pitch;
456 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
462 /* For BOTTOM field we need to add the first line */
465 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
466 p_in += p_pic->p[i_plane].i_pitch;
467 p_out += p_outpic->p[i_plane].i_pitch;
470 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
472 if( i_plane == Y_PLANE )
474 for( ; p_out < p_out_end ; )
476 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
478 p_out += p_outpic->p[i_plane].i_pitch;
480 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
482 p_in += 2 * p_pic->p[i_plane].i_pitch;
483 p_out += p_outpic->p[i_plane].i_pitch;
488 for( ; p_out < p_out_end ; )
490 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
492 p_out += p_outpic->p[i_plane].i_pitch;
493 p_in += 2 * p_pic->p[i_plane].i_pitch;
497 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
499 /* For TOP field we need to add the last line */
502 p_in += p_pic->p[i_plane].i_pitch;
503 p_out += p_outpic->p[i_plane].i_pitch;
504 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
511 #define Merge p_filter->p_sys->pf_merge
512 #define EndMerge if(p_filter->p_sys->pf_end_merge) p_filter->p_sys->pf_end_merge
514 /*****************************************************************************
515 * RenderLinear: BOB with linear interpolation
516 *****************************************************************************/
517 static void RenderLinear( filter_t *p_filter,
518 picture_t *p_outpic, picture_t *p_pic, int i_field )
522 /* Copy image and skip lines */
523 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
525 uint8_t *p_in, *p_out_end, *p_out;
527 p_in = p_pic->p[i_plane].p_pixels;
528 p_out = p_outpic->p[i_plane].p_pixels;
529 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
530 * p_outpic->p[i_plane].i_visible_lines;
532 /* For BOTTOM field we need to add the first line */
535 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
536 p_in += p_pic->p[i_plane].i_pitch;
537 p_out += p_outpic->p[i_plane].i_pitch;
540 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
542 for( ; p_out < p_out_end ; )
544 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
546 p_out += p_outpic->p[i_plane].i_pitch;
548 Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
549 p_pic->p[i_plane].i_pitch );
551 p_in += 2 * p_pic->p[i_plane].i_pitch;
552 p_out += p_outpic->p[i_plane].i_pitch;
555 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
557 /* For TOP field we need to add the last line */
560 p_in += p_pic->p[i_plane].i_pitch;
561 p_out += p_outpic->p[i_plane].i_pitch;
562 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
568 static void RenderMean( filter_t *p_filter,
569 picture_t *p_outpic, picture_t *p_pic )
573 /* Copy image and skip lines */
574 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
576 uint8_t *p_in, *p_out_end, *p_out;
578 p_in = p_pic->p[i_plane].p_pixels;
580 p_out = p_outpic->p[i_plane].p_pixels;
581 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
582 * p_outpic->p[i_plane].i_visible_lines;
584 /* All lines: mean value */
585 for( ; p_out < p_out_end ; )
587 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
588 p_pic->p[i_plane].i_pitch );
590 p_out += p_outpic->p[i_plane].i_pitch;
591 p_in += 2 * p_pic->p[i_plane].i_pitch;
597 static void RenderBlend( filter_t *p_filter,
598 picture_t *p_outpic, picture_t *p_pic )
602 /* Copy image and skip lines */
603 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
605 uint8_t *p_in, *p_out_end, *p_out;
607 p_in = p_pic->p[i_plane].p_pixels;
609 p_out = p_outpic->p[i_plane].p_pixels;
610 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
611 * p_outpic->p[i_plane].i_visible_lines;
613 switch( p_filter->fmt_in.video.i_chroma )
618 /* First line: simple copy */
619 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
620 p_out += p_outpic->p[i_plane].i_pitch;
622 /* Remaining lines: mean value */
623 for( ; p_out < p_out_end ; )
625 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
626 p_pic->p[i_plane].i_pitch );
628 p_out += p_outpic->p[i_plane].i_pitch;
629 p_in += p_pic->p[i_plane].i_pitch;
635 /* First line: simple copy */
636 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
637 p_out += p_outpic->p[i_plane].i_pitch;
639 /* Remaining lines: mean value */
640 if( i_plane == Y_PLANE )
642 for( ; p_out < p_out_end ; )
644 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
645 p_pic->p[i_plane].i_pitch );
647 p_out += p_outpic->p[i_plane].i_pitch;
648 p_in += p_pic->p[i_plane].i_pitch;
654 for( ; p_out < p_out_end ; )
656 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
657 p_pic->p[i_plane].i_pitch );
659 p_out += p_outpic->p[i_plane].i_pitch;
660 p_in += 2*p_pic->p[i_plane].i_pitch;
671 static void MergeGeneric( void *_p_dest, const void *_p_s1,
672 const void *_p_s2, size_t i_bytes )
674 uint8_t* p_dest = (uint8_t*)_p_dest;
675 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
676 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
677 uint8_t* p_end = p_dest + i_bytes - 8;
679 while( p_dest < p_end )
681 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
682 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
683 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
684 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
685 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
686 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
687 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
688 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
693 while( p_dest < p_end )
695 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
699 #if defined(CAN_COMPILE_MMXEXT)
700 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
703 uint8_t* p_dest = (uint8_t*)_p_dest;
704 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
705 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
706 uint8_t* p_end = p_dest + i_bytes - 8;
707 while( p_dest < p_end )
709 __asm__ __volatile__( "movq %2,%%mm1;"
711 "movq %%mm1, %0" :"=m" (*p_dest):
721 while( p_dest < p_end )
723 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
728 #if defined(CAN_COMPILE_3DNOW)
729 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
732 uint8_t* p_dest = (uint8_t*)_p_dest;
733 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
734 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
735 uint8_t* p_end = p_dest + i_bytes - 8;
736 while( p_dest < p_end )
738 __asm__ __volatile__( "movq %2,%%mm1;"
740 "movq %%mm1, %0" :"=m" (*p_dest):
750 while( p_dest < p_end )
752 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
757 #if defined(CAN_COMPILE_SSE)
758 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
761 uint8_t* p_dest = (uint8_t*)_p_dest;
762 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
763 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
765 while( (uintptr_t)p_s1 % 16 )
767 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
769 p_end = p_dest + i_bytes - 16;
770 while( p_dest < p_end )
772 __asm__ __volatile__( "movdqu %2,%%xmm1;"
774 "movdqu %%xmm1, %0" :"=m" (*p_dest):
784 while( p_dest < p_end )
786 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
791 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
792 static void EndMMX( void )
794 __asm__ __volatile__( "emms" :: );
798 #if defined(CAN_COMPILE_3DNOW)
799 static void End3DNow( void )
801 __asm__ __volatile__( "femms" :: );
805 #ifdef CAN_COMPILE_C_ALTIVEC
806 static void MergeAltivec( void *_p_dest, const void *_p_s1,
807 const void *_p_s2, size_t i_bytes )
809 uint8_t *p_dest = (uint8_t *)_p_dest;
810 uint8_t *p_s1 = (uint8_t *)_p_s1;
811 uint8_t *p_s2 = (uint8_t *)_p_s2;
812 uint8_t *p_end = p_dest + i_bytes - 15;
814 /* Use C until the first 16-bytes aligned destination pixel */
815 while( (uintptr_t)p_dest & 0xF )
817 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
820 if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
822 /* Unaligned source */
823 vector unsigned char s1v, s2v, destv;
824 vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
825 vector unsigned char perm1v, perm2v;
827 perm1v = vec_lvsl( 0, p_s1 );
828 perm2v = vec_lvsl( 0, p_s2 );
829 s1oldv = vec_ld( 0, p_s1 );
830 s2oldv = vec_ld( 0, p_s2 );
832 while( p_dest < p_end )
834 s1newv = vec_ld( 16, p_s1 );
835 s2newv = vec_ld( 16, p_s2 );
836 s1v = vec_perm( s1oldv, s1newv, perm1v );
837 s2v = vec_perm( s2oldv, s2newv, perm2v );
840 destv = vec_avg( s1v, s2v );
841 vec_st( destv, 0, p_dest );
851 vector unsigned char s1v, s2v, destv;
853 while( p_dest < p_end )
855 s1v = vec_ld( 0, p_s1 );
856 s2v = vec_ld( 0, p_s2 );
857 destv = vec_avg( s1v, s2v );
858 vec_st( destv, 0, p_dest );
868 while( p_dest < p_end )
870 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
876 static void MergeNEON (void *restrict out, const void *in1,
877 const void *in2, size_t n)
880 const uint8_t *in1p = in1;
881 const uint8_t *in2p = in2;
882 size_t mis = ((uintptr_t)outp) & 15;
886 MergeGeneric (outp, in1p, in2p, mis);
893 uint8_t *end = outp + (n & ~15);
895 if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
898 "vld1.u8 {q0-q1}, [%[in1]]!\n"
899 "vld1.u8 {q2-q3}, [%[in2]]!\n"
900 "vhadd.u8 q4, q0, q2\n"
901 "vld1.u8 {q6-q7}, [%[in1]]!\n"
902 "vhadd.u8 q5, q1, q3\n"
903 "vld1.u8 {q8-q9}, [%[in2]]!\n"
904 "vhadd.u8 q10, q6, q8\n"
905 "vhadd.u8 q11, q7, q9\n"
906 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
907 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
908 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
910 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
911 "q8", "q9", "q10", "q11", "memory");
915 "vld1.u8 {q0-q1}, [%[in1],:128]!\n"
916 "vld1.u8 {q2-q3}, [%[in2],:128]!\n"
917 "vhadd.u8 q4, q0, q2\n"
918 "vld1.u8 {q6-q7}, [%[in1],:128]!\n"
919 "vhadd.u8 q5, q1, q3\n"
920 "vld1.u8 {q8-q9}, [%[in2],:128]!\n"
921 "vhadd.u8 q10, q6, q8\n"
922 "vhadd.u8 q11, q7, q9\n"
923 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
924 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
925 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
927 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
928 "q8", "q9", "q10", "q11", "memory");
931 MergeGeneric (outp, in1p, in2p, n);
935 /*****************************************************************************
936 * RenderX: This algo works on a 8x8 block basic, it copies the top field
937 * and apply a process to recreate the bottom field :
938 * If a 8x8 block is classified as :
939 * - progressive: it applies a small blend (1,6,1)
941 * * in the MMX version: we do a ME between the 2 fields, if there is a
942 * good match we use MC to recreate the bottom field (with a small
944 * * otherwise: it recreates the bottom field by an edge oriented
946 *****************************************************************************/
948 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
949 * XXX: It need to access to 8x10
950 * We use more than 8 lines to help with scrolling (text)
951 * (and because XDeint8x8Frame use line 9)
952 * XXX: smooth/uniform area with noise detection doesn't works well
953 * but it's not really a problem because they don't have much details anyway
955 static inline int ssd( int a ) { return a*a; }
956 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
962 /* Detect interlacing */
964 for( y = 0; y < 7; y += 2 )
967 for( x = 0; x < 8; x++ )
969 fr += ssd(src[ x] - src[1*i_src+x]) +
970 ssd(src[i_src+x] - src[2*i_src+x]);
971 ff += ssd(src[ x] - src[2*i_src+x]) +
972 ssd(src[i_src+x] - src[3*i_src+x]);
974 if( ff < 6*fr/8 && fr > 32 )
980 return fc < 1 ? false : true;
982 #ifdef CAN_COMPILE_MMXEXT
983 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
990 /* Detect interlacing */
992 pxor_r2r( mm7, mm7 );
993 for( y = 0; y < 9; y += 2 )
996 pxor_r2r( mm5, mm5 );
997 pxor_r2r( mm6, mm6 );
998 for( x = 0; x < 8; x+=4 )
1000 movd_m2r( src[ x], mm0 );
1001 movd_m2r( src[1*i_src+x], mm1 );
1002 movd_m2r( src[2*i_src+x], mm2 );
1003 movd_m2r( src[3*i_src+x], mm3 );
1005 punpcklbw_r2r( mm7, mm0 );
1006 punpcklbw_r2r( mm7, mm1 );
1007 punpcklbw_r2r( mm7, mm2 );
1008 punpcklbw_r2r( mm7, mm3 );
1010 movq_r2r( mm0, mm4 );
1012 psubw_r2r( mm1, mm0 );
1013 psubw_r2r( mm2, mm4 );
1015 psubw_r2r( mm1, mm2 );
1016 psubw_r2r( mm1, mm3 );
1018 pmaddwd_r2r( mm0, mm0 );
1019 pmaddwd_r2r( mm4, mm4 );
1020 pmaddwd_r2r( mm2, mm2 );
1021 pmaddwd_r2r( mm3, mm3 );
1022 paddd_r2r( mm0, mm2 );
1023 paddd_r2r( mm4, mm3 );
1024 paddd_r2r( mm2, mm5 );
1025 paddd_r2r( mm3, mm6 );
1028 movq_r2r( mm5, mm0 );
1029 psrlq_i2r( 32, mm0 );
1030 paddd_r2r( mm0, mm5 );
1031 movd_r2m( mm5, fr );
1033 movq_r2r( mm6, mm0 );
1034 psrlq_i2r( 32, mm0 );
1035 paddd_r2r( mm0, mm6 );
1036 movd_r2m( mm6, ff );
1038 if( ff < 6*fr/8 && fr > 32 )
1047 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1048 uint8_t *src1, int i_src1,
1049 uint8_t *src2, int i_src2 )
1054 for( y = 0; y < 8; y += 2 )
1056 memcpy( dst, src1, 8 );
1059 for( x = 0; x < 8; x++ )
1060 dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1068 #ifdef CAN_COMPILE_MMXEXT
1069 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1070 uint8_t *src1, int i_src1,
1071 uint8_t *src2, int i_src2 )
1073 static const uint64_t m_4 = INT64_C(0x0004000400040004);
1077 pxor_r2r( mm7, mm7 );
1078 for( y = 0; y < 8; y += 2 )
1080 for( x = 0; x < 8; x +=4 )
1082 movd_m2r( src1[x], mm0 );
1083 movd_r2m( mm0, dst[x] );
1085 movd_m2r( src2[x], mm1 );
1086 movd_m2r( src1[i_src1+x], mm2 );
1088 punpcklbw_r2r( mm7, mm0 );
1089 punpcklbw_r2r( mm7, mm1 );
1090 punpcklbw_r2r( mm7, mm2 );
1091 paddw_r2r( mm1, mm1 );
1092 movq_r2r( mm1, mm3 );
1093 paddw_r2r( mm3, mm3 );
1094 paddw_r2r( mm2, mm0 );
1095 paddw_r2r( mm3, mm1 );
1096 paddw_m2r( m_4, mm1 );
1097 paddw_r2r( mm1, mm0 );
1098 psraw_i2r( 3, mm0 );
1099 packuswb_r2r( mm7, mm0 );
1100 movd_r2m( mm0, dst[i_dst+x] );
1111 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1114 for( y = 0; y < 8; y++ )
1115 memset( &dst[y*i_dst], v, 8 );
1118 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1121 * TODO: a better one for the inner part.
1123 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1124 uint8_t *src, int i_src )
1129 for( y = 0; y < 8; y += 2 )
1131 memcpy( dst, src, 8 );
1134 for( x = 0; x < 8; x++ )
1135 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1140 #ifdef CAN_COMPILE_MMXEXT
1141 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1142 uint8_t *src, int i_src )
1147 for( y = 0; y < 8; y += 2 )
1149 movq_m2r( src[0], mm0 );
1150 movq_r2m( mm0, dst[0] );
1153 movq_m2r( src[2*i_src], mm1 );
1154 pavgb_r2r( mm1, mm0 );
1156 movq_r2m( mm0, dst[0] );
1164 /* XDeint8x8Field: Edge oriented interpolation
1165 * (Need -4 and +5 pixels H, +1 line)
1167 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1168 uint8_t *src, int i_src )
1173 for( y = 0; y < 8; y += 2 )
1175 memcpy( dst, src, 8 );
1178 for( x = 0; x < 8; x++ )
1180 uint8_t *src2 = &src[2*i_src];
1181 /* I use 8 pixels just to match the MMX version, but it's overkill
1182 * 5 would be enough (less isn't good) */
1183 const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1184 abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1185 abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1186 abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1188 const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1189 abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1190 abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1191 abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1193 const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1194 abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1195 abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1196 abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1198 if( c0 < c1 && c1 <= c2 )
1199 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1200 else if( c2 < c1 && c1 <= c0 )
1201 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1203 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1210 #ifdef CAN_COMPILE_MMXEXT
1211 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1212 uint8_t *src, int i_src )
1217 for( y = 0; y < 8; y += 2 )
1219 memcpy( dst, src, 8 );
1222 for( x = 0; x < 8; x++ )
1224 uint8_t *src2 = &src[2*i_src];
1227 movq_m2r( src[x-2], mm0 );
1228 movq_m2r( src[x-3], mm1 );
1229 movq_m2r( src[x-4], mm2 );
1231 psadbw_m2r( src2[x-4], mm0 );
1232 psadbw_m2r( src2[x-3], mm1 );
1233 psadbw_m2r( src2[x-2], mm2 );
1235 movd_r2m( mm0, c2 );
1236 movd_r2m( mm1, c1 );
1237 movd_r2m( mm2, c0 );
1239 if( c0 < c1 && c1 <= c2 )
1240 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1241 else if( c2 < c1 && c1 <= c0 )
1242 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1244 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1253 /* NxN arbitray size (and then only use pixel in the NxN block)
1255 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1256 int i_height, int i_width )
1263 /* Detect interlacing */
1264 /* FIXME way too simple, need to be more like XDeint8x8Detect */
1267 for( y = 0; y < i_height - 2; y += 2 )
1269 const uint8_t *s = &src[y*i_src];
1270 for( x = 0; x < i_width; x++ )
1272 fr += ssd(s[ x] - s[1*i_src+x]);
1273 ff += ssd(s[ x] - s[2*i_src+x]);
1275 if( ff < fr && fr > i_width / 2 )
1279 return fc < 2 ? false : true;
1282 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1283 uint8_t *src, int i_src,
1284 int i_width, int i_height )
1289 for( y = 0; y < i_height; y += 2 )
1291 memcpy( dst, src, i_width );
1294 if( y < i_height - 2 )
1296 for( x = 0; x < i_width; x++ )
1297 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1301 /* Blend last line */
1302 for( x = 0; x < i_width; x++ )
1303 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1310 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1311 uint8_t *src, int i_src,
1312 int i_width, int i_height )
1317 for( y = 0; y < i_height; y += 2 )
1319 memcpy( dst, src, i_width );
1322 if( y < i_height - 2 )
1324 for( x = 0; x < i_width; x++ )
1325 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1329 /* Blend last line */
1330 for( x = 0; x < i_width; x++ )
1331 dst[x] = (src[x] + src[i_src+x]) >> 1;
1338 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1339 int i_width, int i_height )
1341 if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1342 XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1344 XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1348 static inline int median( int a, int b, int c )
1350 int min = a, max =a;
1361 return a + b + c - min - max;
1367 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1368 uint8_t *src, int i_src,
1369 const int i_mbx, int i_modx )
1373 for( x = 0; x < i_mbx; x++ )
1376 if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1378 if( x == 0 || x == i_mbx - 1 )
1379 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1381 XDeint8x8FieldC( dst, i_dst, src, i_src );
1385 XDeint8x8MergeC( dst, i_dst,
1386 &src[0*i_src], 2*i_src,
1387 &src[1*i_src], 2*i_src );
1395 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1397 #ifdef CAN_COMPILE_MMXEXT
1398 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1399 uint8_t *src, int i_src,
1400 const int i_mbx, int i_modx )
1404 /* Reset current line */
1405 for( x = 0; x < i_mbx; x++ )
1408 if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1410 if( x == 0 || x == i_mbx - 1 )
1411 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1413 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1417 XDeint8x8MergeMMXEXT( dst, i_dst,
1418 &src[0*i_src], 2*i_src,
1419 &src[1*i_src], 2*i_src );
1427 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1431 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1435 /* Copy image and skip lines */
1436 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1438 const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1439 const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1441 const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1442 const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1444 const int i_dst = p_outpic->p[i_plane].i_pitch;
1445 const int i_src = p_pic->p[i_plane].i_pitch;
1449 for( y = 0; y < i_mby; y++ )
1451 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1452 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1454 #ifdef CAN_COMPILE_MMXEXT
1455 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1456 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1459 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1462 /* Last line (C only)*/
1465 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1466 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1468 for( x = 0; x < i_mbx; x++ )
1470 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1477 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1481 #ifdef CAN_COMPILE_MMXEXT
1482 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1487 /*****************************************************************************
1488 * Yadif (Yet Another DeInterlacing Filter).
1489 *****************************************************************************/
1493 * 0: Output 1 frame for each frame.
1494 * 1: Output 1 frame for each field.
1495 * 2: Like 0 but skips spatial interlacing check.
1496 * 3: Like 1 but skips spatial interlacing check.
1498 * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1503 /* I am unsure it is the right one */
1504 typedef intptr_t x86_reg;
1506 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1507 #define FFMAX(a,b) __MAX(a,b)
1508 #define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
1509 #define FFMIN(a,b) __MIN(a,b)
1510 #define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
1512 /* yadif.h comes from vf_yadif.c of mplayer project */
1515 static int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1517 filter_sys_t *p_sys = p_filter->p_sys;
1520 assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
1521 assert( i_field == 0 || i_field == 1 );
1525 /* Duplicate the picture
1526 * TODO when the vout rework is finished, picture_Hold() might be enough
1527 * but becarefull, the pitches must match */
1528 picture_t *p_dup = picture_NewFromFormat( &p_src->format );
1530 picture_Copy( p_dup, p_src );
1532 /* Slide the history */
1533 if( p_sys->pp_history[0] )
1534 picture_Release( p_sys->pp_history[0] );
1535 for( int i = 1; i < HISTORY_SIZE; i++ )
1536 p_sys->pp_history[i-1] = p_sys->pp_history[i];
1537 p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1540 /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1541 picture_t *p_prev = p_sys->pp_history[0];
1542 picture_t *p_cur = p_sys->pp_history[1];
1543 picture_t *p_next = p_sys->pp_history[2];
1545 /* Account for soft field repeat.
1547 The "parity" parameter affects the algorithm like this (from yadif.h):
1548 uint8_t *prev2= parity ? prev : cur ;
1549 uint8_t *next2= parity ? cur : next;
1551 The original parity expression that was used here is:
1552 (i_field ^ (i_order == i_field)) & 1
1555 i_field = 0, i_order = 0 => 1
1556 i_field = 1, i_order = 1 => 0
1557 i_field = 1, i_order = 0 => 1
1558 i_field = 0, i_order = 1 => 0
1560 => equivalent with e.g. (1 - i_order) or (i_order + 1) % 2
1562 Thus, in a normal two-field frame,
1563 parity 1 = first field (i_order == 0)
1564 parity 0 = second field (i_order == 1)
1566 Now, with three fields, where the third is a copy of the first,
1567 i_order = 0 => parity 1 (as usual)
1568 i_order = 1 => due to the repeat, prev = cur, but also next = cur.
1569 Because in such a case there is no motion (otherwise field repeat makes no sense),
1570 we don't actually need to invoke Yadif's filter(). Thus, set "parity" to 2,
1571 and use this to bypass the filter.
1572 i_order = 2 => parity 0 (as usual)
1575 if( p_cur && p_cur->i_nb_fields > 2 )
1576 yadif_parity = (i_order + 1) % 3; /* 1, *2*, 0; where 2 is a special value meaning "bypass filter". */
1578 yadif_parity = (i_order + 1) % 2; /* 1, 0 */
1580 /* Filter if we have all the pictures we need */
1581 if( p_prev && p_cur && p_next )
1584 void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1585 #if defined(HAVE_YADIF_SSE2)
1586 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1587 filter = yadif_filter_line_mmx2;
1590 filter = yadif_filter_line_c;
1592 for( int n = 0; n < p_dst->i_planes; n++ )
1594 const plane_t *prevp = &p_prev->p[n];
1595 const plane_t *curp = &p_cur->p[n];
1596 const plane_t *nextp = &p_next->p[n];
1597 plane_t *dstp = &p_dst->p[n];
1599 for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1601 if( (y % 2) == i_field || yadif_parity == 2 )
1603 vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1604 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1608 struct vf_priv_s cfg;
1609 /* Spatial checks only when enough data */
1610 cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1612 assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1614 &dstp->p_pixels[y * dstp->i_pitch],
1615 &prevp->p_pixels[y * prevp->i_pitch],
1616 &curp->p_pixels[y * curp->i_pitch],
1617 &nextp->p_pixels[y * nextp->i_pitch],
1618 dstp->i_visible_pitch,
1623 /* We duplicate the first and last lines */
1625 vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1626 else if( y == dstp->i_visible_lines - 2 )
1627 vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1631 p_sys->i_frame_offset = 1; /* p_curr will be rendered at next frame, too */
1635 else if( !p_prev && !p_cur && p_next )
1637 /* NOTE: For the first frame, we use the default frame offset
1638 as set by Open() or SetFilterMethod(). It is always 0. */
1640 /* FIXME not good as it does not use i_order/i_field */
1641 RenderX( p_dst, p_next );
1646 p_sys->i_frame_offset = 1; /* p_curr will be rendered at next frame */
1648 return VLC_EGENERIC;
1652 /*****************************************************************************
1653 * video filter2 functions
1654 *****************************************************************************/
1655 #define DEINTERLACE_DST_SIZE 3
1656 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1658 filter_sys_t *p_sys = p_filter->p_sys;
1659 picture_t *p_dst[DEINTERLACE_DST_SIZE];
1661 /* Request output picture */
1662 p_dst[0] = filter_NewPicture( p_filter );
1663 if( p_dst[0] == NULL )
1665 picture_Release( p_pic );
1668 picture_CopyProperties( p_dst[0], p_pic );
1670 /* Any unused p_dst pointers must be NULL, because they are used to check how many output frames we have. */
1671 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
1674 /* Slide the metadata history. */
1675 for( int i = 1; i < METADATA_SIZE; i++ )
1677 p_sys->meta.pi_date[i-1] = p_sys->meta.pi_date[i];
1678 p_sys->meta.pi_nb_fields[i-1] = p_sys->meta.pi_nb_fields[i];
1679 p_sys->meta.pb_top_field_first[i-1] = p_sys->meta.pb_top_field_first[i];
1681 /* The last element corresponds to the current input frame. */
1682 p_sys->meta.pi_date[METADATA_SIZE-1] = p_pic->date;
1683 p_sys->meta.pi_nb_fields[METADATA_SIZE-1] = p_pic->i_nb_fields;
1684 p_sys->meta.pb_top_field_first[METADATA_SIZE-1] = p_pic->b_top_field_first;
1686 /* Remember the frame offset that we should use for this frame.
1687 The value in p_sys will be updated to reflect the correct value
1688 for the *next* frame when we call the renderer. */
1689 int i_frame_offset = p_sys->i_frame_offset;
1690 int i_meta_idx = (METADATA_SIZE-1) - i_frame_offset;
1692 /* These correspond to the current *outgoing* frame. */
1693 bool b_top_field_first;
1695 if( i_frame_offset != CUSTOM_PTS )
1697 /* Pick the correct values from the history. */
1698 b_top_field_first = p_sys->meta.pb_top_field_first[i_meta_idx];
1699 i_nb_fields = p_sys->meta.pi_nb_fields[i_meta_idx];
1703 /* Framerate doublers must not request CUSTOM_PTS, as they need the original field timings,
1704 and need Deinterlace() to allocate the correct number of output frames. */
1705 assert( !p_sys->b_double_rate );
1707 /* NOTE: i_nb_fields is only used for framerate doublers, so it is unused in this case.
1708 b_top_field_first is only passed to the algorithm. We assume that algorithms that
1709 request CUSTOM_PTS will, if necessary, extract the TFF/BFF information themselves.
1711 b_top_field_first = p_pic->b_top_field_first; /* this is not guaranteed to be meaningful */
1712 i_nb_fields = p_pic->i_nb_fields; /* unused */
1715 /* For framerate doublers, determine field duration and allocate output frames. */
1716 mtime_t i_field_dur = 0;
1717 int i_double_rate_alloc_end = 0; /* One past last for allocated output frames in p_dst[].
1718 Used only for framerate doublers. Will be inited below.
1719 Declared here because the PTS logic needs the result. */
1720 if( p_sys->b_double_rate )
1722 /* Calculate one field duration. */
1724 int iend = METADATA_SIZE-1;
1725 /* Find oldest valid logged date. Note: the current input frame doesn't count. */
1726 for( ; i < iend; i++ )
1727 if( p_sys->meta.pi_date[i] > VLC_TS_INVALID )
1731 /* Count how many fields the valid history entries (except the new frame) represent. */
1732 int i_fields_total = 0;
1733 for( int j = i ; j < iend; j++ )
1734 i_fields_total += p_sys->meta.pi_nb_fields[j];
1735 /* One field took this long. */
1736 i_field_dur = (p_pic->date - p_sys->meta.pi_date[i]) / i_fields_total;
1738 /* Note that we default to field duration 0 if it could not be determined.
1739 This behaves the same as the old code - leaving the extra output frame
1740 dates the same as p_pic->date if the last cached date was not valid.
1743 i_double_rate_alloc_end = i_nb_fields;
1744 if( i_nb_fields > DEINTERLACE_DST_SIZE )
1746 /* Note that the effective buffer size depends also on the constant private_picture in vout_wrapper.c,
1747 since that determines the maximum number of output pictures filter_NewPicture() will successfully
1748 allocate for one input frame.
1750 msg_Err( p_filter, "Framerate doubler: output buffer too small; fields = %d, buffer size = %d. Dropping the remaining fields.", i_nb_fields, DEINTERLACE_DST_SIZE );
1751 i_double_rate_alloc_end = DEINTERLACE_DST_SIZE;
1754 /* Allocate output frames. */
1755 for( int i = 1; i < i_double_rate_alloc_end ; ++i )
1757 p_dst[i-1]->p_next =
1758 p_dst[i] = filter_NewPicture( p_filter );
1761 picture_CopyProperties( p_dst[i], p_pic );
1765 msg_Err( p_filter, "Framerate doubler: could not allocate output frame %d", i+1 );
1766 i_double_rate_alloc_end = i; /* Inform the PTS logic about the correct end position. */
1767 break; /* If this happens, the rest of the allocations aren't likely to work, either... */
1770 /* Now we have allocated *up to* the correct number of frames; normally, exactly the correct number.
1771 Upon alloc failure, we may have succeeded in allocating *some* output frames, but fewer than
1772 were desired. In such a case, as many will be rendered as were successfully allocated.
1774 Note that now p_dst[i] != NULL for 0 <= i < i_double_rate_alloc_end. */
1776 assert( p_sys->b_double_rate == true || p_dst[1] == NULL );
1777 assert( i_nb_fields > 2 || p_dst[2] == NULL );
1780 switch( p_sys->i_mode )
1782 case DEINTERLACE_DISCARD:
1783 RenderDiscard( p_filter, p_dst[0], p_pic, 0 );
1786 case DEINTERLACE_BOB:
1787 RenderBob( p_filter, p_dst[0], p_pic, !b_top_field_first );
1789 RenderBob( p_filter, p_dst[1], p_pic, b_top_field_first );
1791 RenderBob( p_filter, p_dst[2], p_pic, !b_top_field_first );
1794 case DEINTERLACE_LINEAR:
1795 RenderLinear( p_filter, p_dst[0], p_pic, !b_top_field_first );
1797 RenderLinear( p_filter, p_dst[1], p_pic, b_top_field_first );
1799 RenderLinear( p_filter, p_dst[2], p_pic, !b_top_field_first );
1802 case DEINTERLACE_MEAN:
1803 RenderMean( p_filter, p_dst[0], p_pic );
1806 case DEINTERLACE_BLEND:
1807 RenderBlend( p_filter, p_dst[0], p_pic );
1811 RenderX( p_dst[0], p_pic );
1814 case DEINTERLACE_YADIF:
1815 if( RenderYadif( p_filter, p_dst[0], p_pic, 0, 0 ) )
1819 case DEINTERLACE_YADIF2X:
1820 if( RenderYadif( p_filter, p_dst[0], p_pic, 0, !b_top_field_first ) )
1823 RenderYadif( p_filter, p_dst[1], p_pic, 1, b_top_field_first );
1825 RenderYadif( p_filter, p_dst[2], p_pic, 2, !b_top_field_first );
1829 /* Set output timestamps, if the algorithm didn't request CUSTOM_PTS for this frame. */
1830 assert( i_frame_offset <= METADATA_SIZE || i_frame_offset == CUSTOM_PTS );
1831 if( i_frame_offset != CUSTOM_PTS )
1833 mtime_t i_base_pts = p_sys->meta.pi_date[i_meta_idx];
1835 /* Note: in the usual case (i_frame_offset = 0 and b_double_rate = false),
1836 this effectively does nothing. This is needed to correct the timestamp
1837 when i_frame_offset > 0. */
1838 p_dst[0]->date = i_base_pts;
1840 if( p_sys->b_double_rate )
1842 /* Processing all actually allocated output frames. */
1843 for( int i = 1; i < i_double_rate_alloc_end; ++i )
1845 /* XXX it's not really good especially for the first picture, but
1846 * I don't think that delaying by one frame is worth it */
1847 if( i_base_pts > VLC_TS_INVALID )
1848 p_dst[i]->date = i_base_pts + i * i_field_dur;
1850 p_dst[i]->date = VLC_TS_INVALID;
1855 p_dst[0]->b_progressive = true;
1856 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
1859 p_dst[i]->b_progressive = true;
1862 picture_Release( p_pic );
1866 picture_Release( p_dst[0] );
1867 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
1870 picture_Release( p_dst[i] );
1872 picture_Release( p_pic );
1876 static void Flush( filter_t *p_filter )
1878 filter_sys_t *p_sys = p_filter->p_sys;
1880 for( int i = 0; i < METADATA_SIZE; i++ )
1882 p_sys->meta.pi_date[i] = VLC_TS_INVALID;
1883 p_sys->meta.pi_nb_fields[i] = 2;
1884 p_sys->meta.pb_top_field_first[i] = true;
1886 p_sys->i_frame_offset = 0; /* reset to default value (first frame after flush cannot have offset) */
1887 for( int i = 0; i < HISTORY_SIZE; i++ )
1889 if( p_sys->pp_history[i] )
1890 picture_Release( p_sys->pp_history[i] );
1891 p_sys->pp_history[i] = NULL;
1895 static int Mouse( filter_t *p_filter,
1896 vlc_mouse_t *p_mouse, const vlc_mouse_t *p_old, const vlc_mouse_t *p_new )
1900 if( p_filter->p_sys->b_half_height )
1906 /*****************************************************************************
1908 *****************************************************************************/
1909 static int Open( vlc_object_t *p_this )
1911 filter_t *p_filter = (filter_t*)p_this;
1912 filter_sys_t *p_sys;
1914 if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
1915 return VLC_EGENERIC;
1918 p_sys = p_filter->p_sys = malloc( sizeof( *p_sys ) );
1922 p_sys->i_mode = DEINTERLACE_BLEND;
1923 p_sys->b_double_rate = false;
1924 p_sys->b_half_height = true;
1925 for( int i = 0; i < METADATA_SIZE; i++ )
1927 p_sys->meta.pi_date[i] = VLC_TS_INVALID;
1928 p_sys->meta.pi_nb_fields[i] = 2;
1929 p_sys->meta.pb_top_field_first[i] = true;
1931 p_sys->i_frame_offset = 0; /* start with default value (first-ever frame cannot have offset) */
1932 for( int i = 0; i < HISTORY_SIZE; i++ )
1933 p_sys->pp_history[i] = NULL;
1935 #if defined(CAN_COMPILE_C_ALTIVEC)
1936 if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
1938 p_sys->pf_merge = MergeAltivec;
1939 p_sys->pf_end_merge = NULL;
1943 #if defined(CAN_COMPILE_SSE)
1944 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1946 p_sys->pf_merge = MergeSSE2;
1947 p_sys->pf_end_merge = EndMMX;
1951 #if defined(CAN_COMPILE_MMXEXT)
1952 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1954 p_sys->pf_merge = MergeMMXEXT;
1955 p_sys->pf_end_merge = EndMMX;
1959 #if defined(CAN_COMPILE_3DNOW)
1960 if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
1962 p_sys->pf_merge = Merge3DNow;
1963 p_sys->pf_end_merge = End3DNow;
1967 #if defined __ARM_NEON__
1968 if( vlc_CPU() & CPU_CAPABILITY_NEON )
1970 p_sys->pf_merge = MergeNEON;
1971 p_sys->pf_end_merge = NULL;
1976 p_sys->pf_merge = MergeGeneric;
1977 p_sys->pf_end_merge = NULL;
1981 config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
1984 char *psz_mode = var_GetNonEmptyString( p_filter, FILTER_CFG_PREFIX "mode" );
1985 SetFilterMethod( p_filter, psz_mode, p_filter->fmt_in.video.i_chroma );
1990 GetOutputFormat( p_filter, &fmt, &p_filter->fmt_in.video );
1991 if( !p_filter->b_allow_fmt_out_change &&
1992 ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
1993 fmt.i_height != p_filter->fmt_in.video.i_height ) )
1995 Close( VLC_OBJECT(p_filter) );
1996 return VLC_EGENERIC;
1998 p_filter->fmt_out.video = fmt;
1999 p_filter->fmt_out.i_codec = fmt.i_chroma;
2000 p_filter->pf_video_filter = Deinterlace;
2001 p_filter->pf_video_flush = Flush;
2002 p_filter->pf_video_mouse = Mouse;
2004 msg_Dbg( p_filter, "deinterlacing" );
2009 /*****************************************************************************
2010 * Close: clean up the filter
2011 *****************************************************************************/
2012 static void Close( vlc_object_t *p_this )
2014 filter_t *p_filter = (filter_t*)p_this;
2017 free( p_filter->p_sys );