1 /*****************************************************************************
2 * deinterlace.c : deinterlacer plugin for vlc
3 *****************************************************************************
4 * Copyright (C) 2000-2009 the VideoLAN team
7 * Author: Sam Hocevar <sam@zoy.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
40 #include <vlc_filter.h>
43 #ifdef CAN_COMPILE_MMXEXT
47 #define DEINTERLACE_DISCARD 1
48 #define DEINTERLACE_MEAN 2
49 #define DEINTERLACE_BLEND 3
50 #define DEINTERLACE_BOB 4
51 #define DEINTERLACE_LINEAR 5
52 #define DEINTERLACE_X 6
53 #define DEINTERLACE_YADIF 7
54 #define DEINTERLACE_YADIF2X 8
56 /*****************************************************************************
58 *****************************************************************************/
59 static int Open ( vlc_object_t * );
60 static void Close( vlc_object_t * );
62 #define MODE_TEXT N_("Deinterlace mode")
63 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
65 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
66 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
68 #define FILTER_CFG_PREFIX "sout-deinterlace-"
70 static const char *const mode_list[] = {
71 "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
72 static const char *const mode_list_text[] = {
73 N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X", "Yadif", "Yadif (2x)" };
76 set_description( N_("Deinterlacing video filter") )
77 set_shortname( N_("Deinterlace" ))
78 set_capability( "video filter2", 0 )
79 set_category( CAT_VIDEO )
80 set_subcategory( SUBCAT_VIDEO_VFILTER )
82 add_string( FILTER_CFG_PREFIX "mode", "blend", SOUT_MODE_TEXT,
83 SOUT_MODE_LONGTEXT, false )
84 change_string_list( mode_list, mode_list_text, 0 )
86 add_shortcut( "deinterlace" )
87 set_callbacks( Open, Close )
91 /*****************************************************************************
93 *****************************************************************************/
94 static void RenderDiscard( filter_t *, picture_t *, picture_t *, int );
95 static void RenderBob ( filter_t *, picture_t *, picture_t *, int );
96 static void RenderMean ( filter_t *, picture_t *, picture_t * );
97 static void RenderBlend ( filter_t *, picture_t *, picture_t * );
98 static void RenderLinear ( filter_t *, picture_t *, picture_t *, int );
99 static void RenderX ( picture_t *, picture_t * );
100 static int RenderYadif ( filter_t *, picture_t *, picture_t *, int, int );
102 static void MergeGeneric ( void *, const void *, const void *, size_t );
103 #if defined(CAN_COMPILE_C_ALTIVEC)
104 static void MergeAltivec ( void *, const void *, const void *, size_t );
106 #if defined(CAN_COMPILE_MMXEXT)
107 static void MergeMMXEXT ( void *, const void *, const void *, size_t );
109 #if defined(CAN_COMPILE_3DNOW)
110 static void Merge3DNow ( void *, const void *, const void *, size_t );
112 #if defined(CAN_COMPILE_SSE)
113 static void MergeSSE2 ( void *, const void *, const void *, size_t );
115 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
116 static void EndMMX ( void );
118 #if defined(CAN_COMPILE_3DNOW)
119 static void End3DNow ( void );
121 #if defined __ARM_NEON__
122 static void MergeNEON (void *, const void *, const void *, size_t);
125 static const char *const ppsz_filter_options[] = {
129 /* Used for framerate doublers */
130 #define METADATA_SIZE (3)
132 mtime_t pi_date[METADATA_SIZE];
133 int pi_nb_fields[METADATA_SIZE];
134 bool pb_top_field_first[METADATA_SIZE];
135 } metadata_history_t;
137 #define HISTORY_SIZE (3)
138 #define CUSTOM_PTS -1
141 int i_mode; /* Deinterlace mode */
142 bool b_double_rate; /* Shall we double the framerate? */
143 bool b_half_height; /* Shall be divide the height by 2 */
144 bool b_use_frame_history; /* Does the algorithm need the input frame history buffer? */
146 void (*pf_merge) ( void *, const void *, const void *, size_t );
147 void (*pf_end_merge) ( void );
149 /* Metadata history (PTS, nb_fields, TFF). Used for framerate doublers. */
150 metadata_history_t meta;
152 /* Output frame timing / framerate doubler control (see below) */
155 /* Input frame history buffer for algorithms that perform temporal filtering. */
156 picture_t *pp_history[HISTORY_SIZE];
159 /* NOTE on i_frame_offset:
161 This value indicates the offset between input and output frames in the currently active deinterlace algorithm.
162 See the rationale below for why this is needed and how it is used.
164 Valid range: 0 <= i_frame_offset < METADATA_SIZE, or i_frame_offset = CUSTOM_PTS.
165 The special value CUSTOM_PTS is only allowed if b_double_rate is false.
167 If CUSTOM_PTS is used, the algorithm must compute the outgoing PTSs itself,
168 and additionally, read the TFF/BFF information itself (if it needs it)
169 from the incoming frames.
172 0 = output frame corresponds to the current input frame
173 (no frame offset; default if not set),
174 1 = output frame corresponds to the previous input frame
175 (e.g. Yadif and Yadif2x work like this),
178 If necessary, i_frame_offset should be updated by the active deinterlace algorithm
179 to indicate the correct delay for the *next* input frame. It does not matter at which i_order
180 the algorithm updates this information, but the new value will only take effect upon the
181 next call to Deinterlace() (i.e. at the next incoming frame).
183 The first-ever frame that arrives to the filter after Open() is always handled as having
184 i_frame_offset = 0. For the second and all subsequent frames, each algorithm is responsible
185 for setting the offset correctly. (The default is 0, so if that is correct, there's no need
188 This solution guarantees that i_frame_offset:
189 1) is up to date at the start of each frame,
190 2) does not change (as far as Deinterlace() is concerned) during a frame, and
191 3) does not need a special API for setting the value at the start of each input frame,
192 before the algorithm starts rendering the (first) output frame for that input frame.
194 The deinterlace algorithm is allowed to behave differently for different input frames.
195 This is especially important for startup, when full history (as defined by each algorithm)
196 is not yet available. During the first-ever input frame, it is clear that it is the
197 only possible source for information, so i_frame_offset = 0 is necessarily correct.
198 After that, what to do is up to each algorithm.
200 Having the correct offset at the start of each input frame is critically important in order to:
201 1) Allocate the correct number of output frames for framerate doublers, and to
202 2) Pass correct TFF/BFF information to the algorithm.
204 These points are important for proper soft field repeat support. This feature is used in some
205 streams originating from film. In soft NTSC telecine, the number of fields alternates as 3,2,3,2,...
206 and the video field dominance flips every two frames (after every "3"). Also, some streams
207 request an occasional field repeat (nb_fields = 3), after which the video field dominance flips.
208 To render such streams correctly, the nb_fields and TFF/BFF information must be taken from
209 the specific input frame that the algorithm intends to render.
211 Additionally, the output PTS is automatically computed by Deinterlace() from i_frame_offset and i_order.
213 It is possible to use the special value CUSTOM_PTS to indicate that the algorithm computes
214 the output PTSs itself. In this case, Deinterlace() will pass them through. This special value
215 is not valid for framerate doublers, as by definition they are field renderers, so they need to
216 use the original field timings to work correctly. Basically, this special value is only intended
217 for algorithms that need to perform nontrivial framerate conversions (such as IVTC).
221 /*****************************************************************************
222 * SetFilterMethod: setup the deinterlace method to use.
223 *****************************************************************************/
224 static void SetFilterMethod( filter_t *p_filter, const char *psz_method, vlc_fourcc_t i_chroma )
226 filter_sys_t *p_sys = p_filter->p_sys;
231 if( !strcmp( psz_method, "mean" ) )
233 p_sys->i_mode = DEINTERLACE_MEAN;
234 p_sys->b_double_rate = false;
235 p_sys->b_half_height = true;
236 p_sys->b_use_frame_history = false;
238 else if( !strcmp( psz_method, "bob" )
239 || !strcmp( psz_method, "progressive-scan" ) )
241 p_sys->i_mode = DEINTERLACE_BOB;
242 p_sys->b_double_rate = true;
243 p_sys->b_half_height = false;
244 p_sys->b_use_frame_history = false;
246 else if( !strcmp( psz_method, "linear" ) )
248 p_sys->i_mode = DEINTERLACE_LINEAR;
249 p_sys->b_double_rate = true;
250 p_sys->b_half_height = false;
251 p_sys->b_use_frame_history = false;
253 else if( !strcmp( psz_method, "x" ) )
255 p_sys->i_mode = DEINTERLACE_X;
256 p_sys->b_double_rate = false;
257 p_sys->b_half_height = false;
258 p_sys->b_use_frame_history = false;
260 else if( !strcmp( psz_method, "yadif" ) )
262 p_sys->i_mode = DEINTERLACE_YADIF;
263 p_sys->b_double_rate = false;
264 p_sys->b_half_height = false;
265 p_sys->b_use_frame_history = true;
267 else if( !strcmp( psz_method, "yadif2x" ) )
269 p_sys->i_mode = DEINTERLACE_YADIF2X;
270 p_sys->b_double_rate = true;
271 p_sys->b_half_height = false;
272 p_sys->b_use_frame_history = true;
274 else if( !strcmp( psz_method, "discard" ) )
276 const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
277 i_chroma == VLC_CODEC_J422;
279 p_sys->i_mode = DEINTERLACE_DISCARD;
280 p_sys->b_double_rate = false;
281 p_sys->b_half_height = !b_i422;
282 p_sys->b_use_frame_history = false;
286 if( strcmp( psz_method, "blend" ) )
288 "no valid deinterlace mode provided, using \"blend\"" );
290 p_sys->i_mode = DEINTERLACE_BLEND;
291 p_sys->b_double_rate = false;
292 p_sys->b_half_height = false;
293 p_sys->b_use_frame_history = false;
296 p_sys->i_frame_offset = 0; /* reset to default when method changes */
298 msg_Dbg( p_filter, "using %s deinterlace method", psz_method );
301 static void GetOutputFormat( filter_t *p_filter,
302 video_format_t *p_dst, const video_format_t *p_src )
304 filter_sys_t *p_sys = p_filter->p_sys;
307 if( p_sys->b_half_height )
309 p_dst->i_height /= 2;
310 p_dst->i_visible_height /= 2;
311 p_dst->i_y_offset /= 2;
312 p_dst->i_sar_den *= 2;
315 if( p_src->i_chroma == VLC_CODEC_I422 ||
316 p_src->i_chroma == VLC_CODEC_J422 )
318 switch( p_sys->i_mode )
320 case DEINTERLACE_MEAN:
321 case DEINTERLACE_LINEAR:
323 case DEINTERLACE_YADIF:
324 case DEINTERLACE_YADIF2X:
325 p_dst->i_chroma = p_src->i_chroma;
328 p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
335 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
337 return i_chroma == VLC_CODEC_I420 ||
338 i_chroma == VLC_CODEC_J420 ||
339 i_chroma == VLC_CODEC_YV12 ||
340 i_chroma == VLC_CODEC_I422 ||
341 i_chroma == VLC_CODEC_J422;
344 /*****************************************************************************
345 * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
346 *****************************************************************************/
347 static void RenderDiscard( filter_t *p_filter,
348 picture_t *p_outpic, picture_t *p_pic, int i_field )
352 /* Copy image and skip lines */
353 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
355 uint8_t *p_in, *p_out_end, *p_out;
358 p_in = p_pic->p[i_plane].p_pixels
359 + i_field * p_pic->p[i_plane].i_pitch;
361 p_out = p_outpic->p[i_plane].p_pixels;
362 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
363 * p_outpic->p[i_plane].i_visible_lines;
365 switch( p_filter->fmt_in.video.i_chroma )
371 for( ; p_out < p_out_end ; )
373 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
375 p_out += p_outpic->p[i_plane].i_pitch;
376 p_in += 2 * p_pic->p[i_plane].i_pitch;
383 i_increment = 2 * p_pic->p[i_plane].i_pitch;
385 if( i_plane == Y_PLANE )
387 for( ; p_out < p_out_end ; )
389 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
390 p_out += p_outpic->p[i_plane].i_pitch;
391 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
392 p_out += p_outpic->p[i_plane].i_pitch;
398 for( ; p_out < p_out_end ; )
400 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
401 p_out += p_outpic->p[i_plane].i_pitch;
413 /*****************************************************************************
414 * RenderBob: renders a BOB picture - simple copy
415 *****************************************************************************/
416 static void RenderBob( filter_t *p_filter,
417 picture_t *p_outpic, picture_t *p_pic, int i_field )
421 /* Copy image and skip lines */
422 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
424 uint8_t *p_in, *p_out_end, *p_out;
426 p_in = p_pic->p[i_plane].p_pixels;
427 p_out = p_outpic->p[i_plane].p_pixels;
428 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
429 * p_outpic->p[i_plane].i_visible_lines;
431 switch( p_filter->fmt_in.video.i_chroma )
436 /* For BOTTOM field we need to add the first line */
439 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
440 p_in += p_pic->p[i_plane].i_pitch;
441 p_out += p_outpic->p[i_plane].i_pitch;
444 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
446 for( ; p_out < p_out_end ; )
448 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
450 p_out += p_outpic->p[i_plane].i_pitch;
452 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
454 p_in += 2 * p_pic->p[i_plane].i_pitch;
455 p_out += p_outpic->p[i_plane].i_pitch;
458 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
460 /* For TOP field we need to add the last line */
463 p_in += p_pic->p[i_plane].i_pitch;
464 p_out += p_outpic->p[i_plane].i_pitch;
465 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
471 /* For BOTTOM field we need to add the first line */
474 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
475 p_in += p_pic->p[i_plane].i_pitch;
476 p_out += p_outpic->p[i_plane].i_pitch;
479 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
481 if( i_plane == Y_PLANE )
483 for( ; p_out < p_out_end ; )
485 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
487 p_out += p_outpic->p[i_plane].i_pitch;
489 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
491 p_in += 2 * p_pic->p[i_plane].i_pitch;
492 p_out += p_outpic->p[i_plane].i_pitch;
497 for( ; p_out < p_out_end ; )
499 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
501 p_out += p_outpic->p[i_plane].i_pitch;
502 p_in += 2 * p_pic->p[i_plane].i_pitch;
506 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
508 /* For TOP field we need to add the last line */
511 p_in += p_pic->p[i_plane].i_pitch;
512 p_out += p_outpic->p[i_plane].i_pitch;
513 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
520 #define Merge p_filter->p_sys->pf_merge
521 #define EndMerge if(p_filter->p_sys->pf_end_merge) p_filter->p_sys->pf_end_merge
523 /*****************************************************************************
524 * RenderLinear: BOB with linear interpolation
525 *****************************************************************************/
526 static void RenderLinear( filter_t *p_filter,
527 picture_t *p_outpic, picture_t *p_pic, int i_field )
531 /* Copy image and skip lines */
532 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
534 uint8_t *p_in, *p_out_end, *p_out;
536 p_in = p_pic->p[i_plane].p_pixels;
537 p_out = p_outpic->p[i_plane].p_pixels;
538 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
539 * p_outpic->p[i_plane].i_visible_lines;
541 /* For BOTTOM field we need to add the first line */
544 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
545 p_in += p_pic->p[i_plane].i_pitch;
546 p_out += p_outpic->p[i_plane].i_pitch;
549 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
551 for( ; p_out < p_out_end ; )
553 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
555 p_out += p_outpic->p[i_plane].i_pitch;
557 Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
558 p_pic->p[i_plane].i_pitch );
560 p_in += 2 * p_pic->p[i_plane].i_pitch;
561 p_out += p_outpic->p[i_plane].i_pitch;
564 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
566 /* For TOP field we need to add the last line */
569 p_in += p_pic->p[i_plane].i_pitch;
570 p_out += p_outpic->p[i_plane].i_pitch;
571 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
577 static void RenderMean( filter_t *p_filter,
578 picture_t *p_outpic, picture_t *p_pic )
582 /* Copy image and skip lines */
583 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
585 uint8_t *p_in, *p_out_end, *p_out;
587 p_in = p_pic->p[i_plane].p_pixels;
589 p_out = p_outpic->p[i_plane].p_pixels;
590 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
591 * p_outpic->p[i_plane].i_visible_lines;
593 /* All lines: mean value */
594 for( ; p_out < p_out_end ; )
596 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
597 p_pic->p[i_plane].i_pitch );
599 p_out += p_outpic->p[i_plane].i_pitch;
600 p_in += 2 * p_pic->p[i_plane].i_pitch;
606 static void RenderBlend( filter_t *p_filter,
607 picture_t *p_outpic, picture_t *p_pic )
611 /* Copy image and skip lines */
612 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
614 uint8_t *p_in, *p_out_end, *p_out;
616 p_in = p_pic->p[i_plane].p_pixels;
618 p_out = p_outpic->p[i_plane].p_pixels;
619 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
620 * p_outpic->p[i_plane].i_visible_lines;
622 switch( p_filter->fmt_in.video.i_chroma )
627 /* First line: simple copy */
628 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
629 p_out += p_outpic->p[i_plane].i_pitch;
631 /* Remaining lines: mean value */
632 for( ; p_out < p_out_end ; )
634 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
635 p_pic->p[i_plane].i_pitch );
637 p_out += p_outpic->p[i_plane].i_pitch;
638 p_in += p_pic->p[i_plane].i_pitch;
644 /* First line: simple copy */
645 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
646 p_out += p_outpic->p[i_plane].i_pitch;
648 /* Remaining lines: mean value */
649 if( i_plane == Y_PLANE )
651 for( ; p_out < p_out_end ; )
653 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
654 p_pic->p[i_plane].i_pitch );
656 p_out += p_outpic->p[i_plane].i_pitch;
657 p_in += p_pic->p[i_plane].i_pitch;
663 for( ; p_out < p_out_end ; )
665 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
666 p_pic->p[i_plane].i_pitch );
668 p_out += p_outpic->p[i_plane].i_pitch;
669 p_in += 2*p_pic->p[i_plane].i_pitch;
680 static void MergeGeneric( void *_p_dest, const void *_p_s1,
681 const void *_p_s2, size_t i_bytes )
683 uint8_t* p_dest = (uint8_t*)_p_dest;
684 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
685 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
686 uint8_t* p_end = p_dest + i_bytes - 8;
688 while( p_dest < p_end )
690 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
691 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
692 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
693 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
694 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
695 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
696 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
697 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
702 while( p_dest < p_end )
704 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
708 #if defined(CAN_COMPILE_MMXEXT)
709 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
712 uint8_t* p_dest = (uint8_t*)_p_dest;
713 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
714 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
715 uint8_t* p_end = p_dest + i_bytes - 8;
716 while( p_dest < p_end )
718 __asm__ __volatile__( "movq %2,%%mm1;"
720 "movq %%mm1, %0" :"=m" (*p_dest):
730 while( p_dest < p_end )
732 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
737 #if defined(CAN_COMPILE_3DNOW)
738 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
741 uint8_t* p_dest = (uint8_t*)_p_dest;
742 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
743 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
744 uint8_t* p_end = p_dest + i_bytes - 8;
745 while( p_dest < p_end )
747 __asm__ __volatile__( "movq %2,%%mm1;"
749 "movq %%mm1, %0" :"=m" (*p_dest):
759 while( p_dest < p_end )
761 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
766 #if defined(CAN_COMPILE_SSE)
767 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
770 uint8_t* p_dest = (uint8_t*)_p_dest;
771 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
772 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
774 while( (uintptr_t)p_s1 % 16 )
776 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
778 p_end = p_dest + i_bytes - 16;
779 while( p_dest < p_end )
781 __asm__ __volatile__( "movdqu %2,%%xmm1;"
783 "movdqu %%xmm1, %0" :"=m" (*p_dest):
793 while( p_dest < p_end )
795 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
800 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
801 static void EndMMX( void )
803 __asm__ __volatile__( "emms" :: );
807 #if defined(CAN_COMPILE_3DNOW)
808 static void End3DNow( void )
810 __asm__ __volatile__( "femms" :: );
814 #ifdef CAN_COMPILE_C_ALTIVEC
815 static void MergeAltivec( void *_p_dest, const void *_p_s1,
816 const void *_p_s2, size_t i_bytes )
818 uint8_t *p_dest = (uint8_t *)_p_dest;
819 uint8_t *p_s1 = (uint8_t *)_p_s1;
820 uint8_t *p_s2 = (uint8_t *)_p_s2;
821 uint8_t *p_end = p_dest + i_bytes - 15;
823 /* Use C until the first 16-bytes aligned destination pixel */
824 while( (uintptr_t)p_dest & 0xF )
826 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
829 if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
831 /* Unaligned source */
832 vector unsigned char s1v, s2v, destv;
833 vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
834 vector unsigned char perm1v, perm2v;
836 perm1v = vec_lvsl( 0, p_s1 );
837 perm2v = vec_lvsl( 0, p_s2 );
838 s1oldv = vec_ld( 0, p_s1 );
839 s2oldv = vec_ld( 0, p_s2 );
841 while( p_dest < p_end )
843 s1newv = vec_ld( 16, p_s1 );
844 s2newv = vec_ld( 16, p_s2 );
845 s1v = vec_perm( s1oldv, s1newv, perm1v );
846 s2v = vec_perm( s2oldv, s2newv, perm2v );
849 destv = vec_avg( s1v, s2v );
850 vec_st( destv, 0, p_dest );
860 vector unsigned char s1v, s2v, destv;
862 while( p_dest < p_end )
864 s1v = vec_ld( 0, p_s1 );
865 s2v = vec_ld( 0, p_s2 );
866 destv = vec_avg( s1v, s2v );
867 vec_st( destv, 0, p_dest );
877 while( p_dest < p_end )
879 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
885 static void MergeNEON (void *restrict out, const void *in1,
886 const void *in2, size_t n)
889 const uint8_t *in1p = in1;
890 const uint8_t *in2p = in2;
891 size_t mis = ((uintptr_t)outp) & 15;
895 MergeGeneric (outp, in1p, in2p, mis);
902 uint8_t *end = outp + (n & ~15);
904 if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
907 "vld1.u8 {q0-q1}, [%[in1]]!\n"
908 "vld1.u8 {q2-q3}, [%[in2]]!\n"
909 "vhadd.u8 q4, q0, q2\n"
910 "vld1.u8 {q6-q7}, [%[in1]]!\n"
911 "vhadd.u8 q5, q1, q3\n"
912 "vld1.u8 {q8-q9}, [%[in2]]!\n"
913 "vhadd.u8 q10, q6, q8\n"
914 "vhadd.u8 q11, q7, q9\n"
915 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
916 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
917 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
919 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
920 "q8", "q9", "q10", "q11", "memory");
924 "vld1.u8 {q0-q1}, [%[in1],:128]!\n"
925 "vld1.u8 {q2-q3}, [%[in2],:128]!\n"
926 "vhadd.u8 q4, q0, q2\n"
927 "vld1.u8 {q6-q7}, [%[in1],:128]!\n"
928 "vhadd.u8 q5, q1, q3\n"
929 "vld1.u8 {q8-q9}, [%[in2],:128]!\n"
930 "vhadd.u8 q10, q6, q8\n"
931 "vhadd.u8 q11, q7, q9\n"
932 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
933 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
934 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
936 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
937 "q8", "q9", "q10", "q11", "memory");
940 MergeGeneric (outp, in1p, in2p, n);
944 /*****************************************************************************
945 * RenderX: This algo works on a 8x8 block basic, it copies the top field
946 * and apply a process to recreate the bottom field :
947 * If a 8x8 block is classified as :
948 * - progressive: it applies a small blend (1,6,1)
950 * * in the MMX version: we do a ME between the 2 fields, if there is a
951 * good match we use MC to recreate the bottom field (with a small
953 * * otherwise: it recreates the bottom field by an edge oriented
955 *****************************************************************************/
957 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
958 * XXX: It need to access to 8x10
959 * We use more than 8 lines to help with scrolling (text)
960 * (and because XDeint8x8Frame use line 9)
961 * XXX: smooth/uniform area with noise detection doesn't works well
962 * but it's not really a problem because they don't have much details anyway
964 static inline int ssd( int a ) { return a*a; }
965 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
971 /* Detect interlacing */
973 for( y = 0; y < 7; y += 2 )
976 for( x = 0; x < 8; x++ )
978 fr += ssd(src[ x] - src[1*i_src+x]) +
979 ssd(src[i_src+x] - src[2*i_src+x]);
980 ff += ssd(src[ x] - src[2*i_src+x]) +
981 ssd(src[i_src+x] - src[3*i_src+x]);
983 if( ff < 6*fr/8 && fr > 32 )
989 return fc < 1 ? false : true;
991 #ifdef CAN_COMPILE_MMXEXT
992 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
999 /* Detect interlacing */
1001 pxor_r2r( mm7, mm7 );
1002 for( y = 0; y < 9; y += 2 )
1005 pxor_r2r( mm5, mm5 );
1006 pxor_r2r( mm6, mm6 );
1007 for( x = 0; x < 8; x+=4 )
1009 movd_m2r( src[ x], mm0 );
1010 movd_m2r( src[1*i_src+x], mm1 );
1011 movd_m2r( src[2*i_src+x], mm2 );
1012 movd_m2r( src[3*i_src+x], mm3 );
1014 punpcklbw_r2r( mm7, mm0 );
1015 punpcklbw_r2r( mm7, mm1 );
1016 punpcklbw_r2r( mm7, mm2 );
1017 punpcklbw_r2r( mm7, mm3 );
1019 movq_r2r( mm0, mm4 );
1021 psubw_r2r( mm1, mm0 );
1022 psubw_r2r( mm2, mm4 );
1024 psubw_r2r( mm1, mm2 );
1025 psubw_r2r( mm1, mm3 );
1027 pmaddwd_r2r( mm0, mm0 );
1028 pmaddwd_r2r( mm4, mm4 );
1029 pmaddwd_r2r( mm2, mm2 );
1030 pmaddwd_r2r( mm3, mm3 );
1031 paddd_r2r( mm0, mm2 );
1032 paddd_r2r( mm4, mm3 );
1033 paddd_r2r( mm2, mm5 );
1034 paddd_r2r( mm3, mm6 );
1037 movq_r2r( mm5, mm0 );
1038 psrlq_i2r( 32, mm0 );
1039 paddd_r2r( mm0, mm5 );
1040 movd_r2m( mm5, fr );
1042 movq_r2r( mm6, mm0 );
1043 psrlq_i2r( 32, mm0 );
1044 paddd_r2r( mm0, mm6 );
1045 movd_r2m( mm6, ff );
1047 if( ff < 6*fr/8 && fr > 32 )
1056 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1057 uint8_t *src1, int i_src1,
1058 uint8_t *src2, int i_src2 )
1063 for( y = 0; y < 8; y += 2 )
1065 memcpy( dst, src1, 8 );
1068 for( x = 0; x < 8; x++ )
1069 dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1077 #ifdef CAN_COMPILE_MMXEXT
1078 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1079 uint8_t *src1, int i_src1,
1080 uint8_t *src2, int i_src2 )
1082 static const uint64_t m_4 = INT64_C(0x0004000400040004);
1086 pxor_r2r( mm7, mm7 );
1087 for( y = 0; y < 8; y += 2 )
1089 for( x = 0; x < 8; x +=4 )
1091 movd_m2r( src1[x], mm0 );
1092 movd_r2m( mm0, dst[x] );
1094 movd_m2r( src2[x], mm1 );
1095 movd_m2r( src1[i_src1+x], mm2 );
1097 punpcklbw_r2r( mm7, mm0 );
1098 punpcklbw_r2r( mm7, mm1 );
1099 punpcklbw_r2r( mm7, mm2 );
1100 paddw_r2r( mm1, mm1 );
1101 movq_r2r( mm1, mm3 );
1102 paddw_r2r( mm3, mm3 );
1103 paddw_r2r( mm2, mm0 );
1104 paddw_r2r( mm3, mm1 );
1105 paddw_m2r( m_4, mm1 );
1106 paddw_r2r( mm1, mm0 );
1107 psraw_i2r( 3, mm0 );
1108 packuswb_r2r( mm7, mm0 );
1109 movd_r2m( mm0, dst[i_dst+x] );
1120 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1123 for( y = 0; y < 8; y++ )
1124 memset( &dst[y*i_dst], v, 8 );
1127 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1130 * TODO: a better one for the inner part.
1132 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1133 uint8_t *src, int i_src )
1138 for( y = 0; y < 8; y += 2 )
1140 memcpy( dst, src, 8 );
1143 for( x = 0; x < 8; x++ )
1144 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1149 #ifdef CAN_COMPILE_MMXEXT
1150 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1151 uint8_t *src, int i_src )
1156 for( y = 0; y < 8; y += 2 )
1158 movq_m2r( src[0], mm0 );
1159 movq_r2m( mm0, dst[0] );
1162 movq_m2r( src[2*i_src], mm1 );
1163 pavgb_r2r( mm1, mm0 );
1165 movq_r2m( mm0, dst[0] );
1173 /* XDeint8x8Field: Edge oriented interpolation
1174 * (Need -4 and +5 pixels H, +1 line)
1176 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1177 uint8_t *src, int i_src )
1182 for( y = 0; y < 8; y += 2 )
1184 memcpy( dst, src, 8 );
1187 for( x = 0; x < 8; x++ )
1189 uint8_t *src2 = &src[2*i_src];
1190 /* I use 8 pixels just to match the MMX version, but it's overkill
1191 * 5 would be enough (less isn't good) */
1192 const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1193 abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1194 abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1195 abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1197 const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1198 abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1199 abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1200 abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1202 const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1203 abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1204 abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1205 abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1207 if( c0 < c1 && c1 <= c2 )
1208 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1209 else if( c2 < c1 && c1 <= c0 )
1210 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1212 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1219 #ifdef CAN_COMPILE_MMXEXT
1220 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1221 uint8_t *src, int i_src )
1226 for( y = 0; y < 8; y += 2 )
1228 memcpy( dst, src, 8 );
1231 for( x = 0; x < 8; x++ )
1233 uint8_t *src2 = &src[2*i_src];
1236 movq_m2r( src[x-2], mm0 );
1237 movq_m2r( src[x-3], mm1 );
1238 movq_m2r( src[x-4], mm2 );
1240 psadbw_m2r( src2[x-4], mm0 );
1241 psadbw_m2r( src2[x-3], mm1 );
1242 psadbw_m2r( src2[x-2], mm2 );
1244 movd_r2m( mm0, c2 );
1245 movd_r2m( mm1, c1 );
1246 movd_r2m( mm2, c0 );
1248 if( c0 < c1 && c1 <= c2 )
1249 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1250 else if( c2 < c1 && c1 <= c0 )
1251 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1253 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1262 /* NxN arbitray size (and then only use pixel in the NxN block)
1264 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1265 int i_height, int i_width )
1272 /* Detect interlacing */
1273 /* FIXME way too simple, need to be more like XDeint8x8Detect */
1276 for( y = 0; y < i_height - 2; y += 2 )
1278 const uint8_t *s = &src[y*i_src];
1279 for( x = 0; x < i_width; x++ )
1281 fr += ssd(s[ x] - s[1*i_src+x]);
1282 ff += ssd(s[ x] - s[2*i_src+x]);
1284 if( ff < fr && fr > i_width / 2 )
1288 return fc < 2 ? false : true;
1291 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1292 uint8_t *src, int i_src,
1293 int i_width, int i_height )
1298 for( y = 0; y < i_height; y += 2 )
1300 memcpy( dst, src, i_width );
1303 if( y < i_height - 2 )
1305 for( x = 0; x < i_width; x++ )
1306 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1310 /* Blend last line */
1311 for( x = 0; x < i_width; x++ )
1312 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1319 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1320 uint8_t *src, int i_src,
1321 int i_width, int i_height )
1326 for( y = 0; y < i_height; y += 2 )
1328 memcpy( dst, src, i_width );
1331 if( y < i_height - 2 )
1333 for( x = 0; x < i_width; x++ )
1334 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1338 /* Blend last line */
1339 for( x = 0; x < i_width; x++ )
1340 dst[x] = (src[x] + src[i_src+x]) >> 1;
1347 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1348 int i_width, int i_height )
1350 if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1351 XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1353 XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1357 static inline int median( int a, int b, int c )
1359 int min = a, max =a;
1370 return a + b + c - min - max;
1376 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1377 uint8_t *src, int i_src,
1378 const int i_mbx, int i_modx )
1382 for( x = 0; x < i_mbx; x++ )
1385 if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1387 if( x == 0 || x == i_mbx - 1 )
1388 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1390 XDeint8x8FieldC( dst, i_dst, src, i_src );
1394 XDeint8x8MergeC( dst, i_dst,
1395 &src[0*i_src], 2*i_src,
1396 &src[1*i_src], 2*i_src );
1404 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1406 #ifdef CAN_COMPILE_MMXEXT
1407 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1408 uint8_t *src, int i_src,
1409 const int i_mbx, int i_modx )
1413 /* Reset current line */
1414 for( x = 0; x < i_mbx; x++ )
1417 if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1419 if( x == 0 || x == i_mbx - 1 )
1420 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1422 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1426 XDeint8x8MergeMMXEXT( dst, i_dst,
1427 &src[0*i_src], 2*i_src,
1428 &src[1*i_src], 2*i_src );
1436 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1440 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1444 /* Copy image and skip lines */
1445 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1447 const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1448 const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1450 const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1451 const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1453 const int i_dst = p_outpic->p[i_plane].i_pitch;
1454 const int i_src = p_pic->p[i_plane].i_pitch;
1458 for( y = 0; y < i_mby; y++ )
1460 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1461 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1463 #ifdef CAN_COMPILE_MMXEXT
1464 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1465 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1468 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1471 /* Last line (C only)*/
1474 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1475 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1477 for( x = 0; x < i_mbx; x++ )
1479 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1486 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1490 #ifdef CAN_COMPILE_MMXEXT
1491 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1496 /*****************************************************************************
1497 * Yadif (Yet Another DeInterlacing Filter).
1498 *****************************************************************************/
1502 * 0: Output 1 frame for each frame.
1503 * 1: Output 1 frame for each field.
1504 * 2: Like 0 but skips spatial interlacing check.
1505 * 3: Like 1 but skips spatial interlacing check.
1507 * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1512 /* I am unsure it is the right one */
1513 typedef intptr_t x86_reg;
1515 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1516 #define FFMAX(a,b) __MAX(a,b)
1517 #define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
1518 #define FFMIN(a,b) __MIN(a,b)
1519 #define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
1521 /* yadif.h comes from vf_yadif.c of mplayer project */
1524 static int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1528 filter_sys_t *p_sys = p_filter->p_sys;
1531 assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
1532 assert( i_field == 0 || i_field == 1 );
1534 /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1535 picture_t *p_prev = p_sys->pp_history[0];
1536 picture_t *p_cur = p_sys->pp_history[1];
1537 picture_t *p_next = p_sys->pp_history[2];
1539 /* Account for soft field repeat.
1541 The "parity" parameter affects the algorithm like this (from yadif.h):
1542 uint8_t *prev2= parity ? prev : cur ;
1543 uint8_t *next2= parity ? cur : next;
1545 The original parity expression that was used here is:
1546 (i_field ^ (i_order == i_field)) & 1
1549 i_field = 0, i_order = 0 => 1
1550 i_field = 1, i_order = 1 => 0
1551 i_field = 1, i_order = 0 => 1
1552 i_field = 0, i_order = 1 => 0
1554 => equivalent with e.g. (1 - i_order) or (i_order + 1) % 2
1556 Thus, in a normal two-field frame,
1557 parity 1 = first field (i_order == 0)
1558 parity 0 = second field (i_order == 1)
1560 Now, with three fields, where the third is a copy of the first,
1561 i_order = 0 => parity 1 (as usual)
1562 i_order = 1 => due to the repeat, prev = cur, but also next = cur.
1563 Because in such a case there is no motion (otherwise field repeat makes no sense),
1564 we don't actually need to invoke Yadif's filter(). Thus, set "parity" to 2,
1565 and use this to bypass the filter.
1566 i_order = 2 => parity 0 (as usual)
1569 if( p_cur && p_cur->i_nb_fields > 2 )
1570 yadif_parity = (i_order + 1) % 3; /* 1, *2*, 0; where 2 is a special value meaning "bypass filter". */
1572 yadif_parity = (i_order + 1) % 2; /* 1, 0 */
1574 /* Filter if we have all the pictures we need */
1575 if( p_prev && p_cur && p_next )
1578 void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1579 #if defined(HAVE_YADIF_SSE2)
1580 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1581 filter = yadif_filter_line_mmx2;
1584 filter = yadif_filter_line_c;
1586 for( int n = 0; n < p_dst->i_planes; n++ )
1588 const plane_t *prevp = &p_prev->p[n];
1589 const plane_t *curp = &p_cur->p[n];
1590 const plane_t *nextp = &p_next->p[n];
1591 plane_t *dstp = &p_dst->p[n];
1593 for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1595 if( (y % 2) == i_field || yadif_parity == 2 )
1597 vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1598 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1602 struct vf_priv_s cfg;
1603 /* Spatial checks only when enough data */
1604 cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1606 assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1608 &dstp->p_pixels[y * dstp->i_pitch],
1609 &prevp->p_pixels[y * prevp->i_pitch],
1610 &curp->p_pixels[y * curp->i_pitch],
1611 &nextp->p_pixels[y * nextp->i_pitch],
1612 dstp->i_visible_pitch,
1617 /* We duplicate the first and last lines */
1619 vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1620 else if( y == dstp->i_visible_lines - 2 )
1621 vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1625 p_sys->i_frame_offset = 1; /* p_curr will be rendered at next frame, too */
1629 else if( !p_prev && !p_cur && p_next )
1631 /* NOTE: For the first frame, we use the default frame offset
1632 as set by Open() or SetFilterMethod(). It is always 0. */
1634 /* FIXME not good as it does not use i_order/i_field */
1635 RenderX( p_dst, p_next );
1640 p_sys->i_frame_offset = 1; /* p_curr will be rendered at next frame */
1642 return VLC_EGENERIC;
1646 /*****************************************************************************
1647 * video filter2 functions
1648 *****************************************************************************/
1649 #define DEINTERLACE_DST_SIZE 3
1650 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
1652 filter_sys_t *p_sys = p_filter->p_sys;
1653 picture_t *p_dst[DEINTERLACE_DST_SIZE];
1655 /* Request output picture */
1656 p_dst[0] = filter_NewPicture( p_filter );
1657 if( p_dst[0] == NULL )
1659 picture_Release( p_pic );
1662 picture_CopyProperties( p_dst[0], p_pic );
1664 /* Any unused p_dst pointers must be NULL, because they are used to check how many output frames we have. */
1665 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
1668 /* Update the input frame history, if the currently active algorithm needs it. */
1669 if( p_sys->b_use_frame_history )
1671 /* Duplicate the picture
1672 * TODO when the vout rework is finished, picture_Hold() might be enough
1673 * but becarefull, the pitches must match */
1674 picture_t *p_dup = picture_NewFromFormat( &p_pic->format );
1676 picture_Copy( p_dup, p_pic );
1678 /* Slide the history */
1679 if( p_sys->pp_history[0] )
1680 picture_Release( p_sys->pp_history[0] );
1681 for( int i = 1; i < HISTORY_SIZE; i++ )
1682 p_sys->pp_history[i-1] = p_sys->pp_history[i];
1683 p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
1686 /* Slide the metadata history. */
1687 for( int i = 1; i < METADATA_SIZE; i++ )
1689 p_sys->meta.pi_date[i-1] = p_sys->meta.pi_date[i];
1690 p_sys->meta.pi_nb_fields[i-1] = p_sys->meta.pi_nb_fields[i];
1691 p_sys->meta.pb_top_field_first[i-1] = p_sys->meta.pb_top_field_first[i];
1693 /* The last element corresponds to the current input frame. */
1694 p_sys->meta.pi_date[METADATA_SIZE-1] = p_pic->date;
1695 p_sys->meta.pi_nb_fields[METADATA_SIZE-1] = p_pic->i_nb_fields;
1696 p_sys->meta.pb_top_field_first[METADATA_SIZE-1] = p_pic->b_top_field_first;
1698 /* Remember the frame offset that we should use for this frame.
1699 The value in p_sys will be updated to reflect the correct value
1700 for the *next* frame when we call the renderer. */
1701 int i_frame_offset = p_sys->i_frame_offset;
1702 int i_meta_idx = (METADATA_SIZE-1) - i_frame_offset;
1704 /* These correspond to the current *outgoing* frame. */
1705 bool b_top_field_first;
1707 if( i_frame_offset != CUSTOM_PTS )
1709 /* Pick the correct values from the history. */
1710 b_top_field_first = p_sys->meta.pb_top_field_first[i_meta_idx];
1711 i_nb_fields = p_sys->meta.pi_nb_fields[i_meta_idx];
1715 /* Framerate doublers must not request CUSTOM_PTS, as they need the original field timings,
1716 and need Deinterlace() to allocate the correct number of output frames. */
1717 assert( !p_sys->b_double_rate );
1719 /* NOTE: i_nb_fields is only used for framerate doublers, so it is unused in this case.
1720 b_top_field_first is only passed to the algorithm. We assume that algorithms that
1721 request CUSTOM_PTS will, if necessary, extract the TFF/BFF information themselves.
1723 b_top_field_first = p_pic->b_top_field_first; /* this is not guaranteed to be meaningful */
1724 i_nb_fields = p_pic->i_nb_fields; /* unused */
1727 /* For framerate doublers, determine field duration and allocate output frames. */
1728 mtime_t i_field_dur = 0;
1729 int i_double_rate_alloc_end = 0; /* One past last for allocated output frames in p_dst[].
1730 Used only for framerate doublers. Will be inited below.
1731 Declared here because the PTS logic needs the result. */
1732 if( p_sys->b_double_rate )
1734 /* Calculate one field duration. */
1736 int iend = METADATA_SIZE-1;
1737 /* Find oldest valid logged date. Note: the current input frame doesn't count. */
1738 for( ; i < iend; i++ )
1739 if( p_sys->meta.pi_date[i] > VLC_TS_INVALID )
1743 /* Count how many fields the valid history entries (except the new frame) represent. */
1744 int i_fields_total = 0;
1745 for( int j = i ; j < iend; j++ )
1746 i_fields_total += p_sys->meta.pi_nb_fields[j];
1747 /* One field took this long. */
1748 i_field_dur = (p_pic->date - p_sys->meta.pi_date[i]) / i_fields_total;
1750 /* Note that we default to field duration 0 if it could not be determined.
1751 This behaves the same as the old code - leaving the extra output frame
1752 dates the same as p_pic->date if the last cached date was not valid.
1755 i_double_rate_alloc_end = i_nb_fields;
1756 if( i_nb_fields > DEINTERLACE_DST_SIZE )
1758 /* Note that the effective buffer size depends also on the constant private_picture in vout_wrapper.c,
1759 since that determines the maximum number of output pictures filter_NewPicture() will successfully
1760 allocate for one input frame.
1762 msg_Err( p_filter, "Framerate doubler: output buffer too small; fields = %d, buffer size = %d. Dropping the remaining fields.", i_nb_fields, DEINTERLACE_DST_SIZE );
1763 i_double_rate_alloc_end = DEINTERLACE_DST_SIZE;
1766 /* Allocate output frames. */
1767 for( int i = 1; i < i_double_rate_alloc_end ; ++i )
1769 p_dst[i-1]->p_next =
1770 p_dst[i] = filter_NewPicture( p_filter );
1773 picture_CopyProperties( p_dst[i], p_pic );
1777 msg_Err( p_filter, "Framerate doubler: could not allocate output frame %d", i+1 );
1778 i_double_rate_alloc_end = i; /* Inform the PTS logic about the correct end position. */
1779 break; /* If this happens, the rest of the allocations aren't likely to work, either... */
1782 /* Now we have allocated *up to* the correct number of frames; normally, exactly the correct number.
1783 Upon alloc failure, we may have succeeded in allocating *some* output frames, but fewer than
1784 were desired. In such a case, as many will be rendered as were successfully allocated.
1786 Note that now p_dst[i] != NULL for 0 <= i < i_double_rate_alloc_end. */
1788 assert( p_sys->b_double_rate == true || p_dst[1] == NULL );
1789 assert( i_nb_fields > 2 || p_dst[2] == NULL );
1792 switch( p_sys->i_mode )
1794 case DEINTERLACE_DISCARD:
1795 RenderDiscard( p_filter, p_dst[0], p_pic, 0 );
1798 case DEINTERLACE_BOB:
1799 RenderBob( p_filter, p_dst[0], p_pic, !b_top_field_first );
1801 RenderBob( p_filter, p_dst[1], p_pic, b_top_field_first );
1803 RenderBob( p_filter, p_dst[2], p_pic, !b_top_field_first );
1806 case DEINTERLACE_LINEAR:
1807 RenderLinear( p_filter, p_dst[0], p_pic, !b_top_field_first );
1809 RenderLinear( p_filter, p_dst[1], p_pic, b_top_field_first );
1811 RenderLinear( p_filter, p_dst[2], p_pic, !b_top_field_first );
1814 case DEINTERLACE_MEAN:
1815 RenderMean( p_filter, p_dst[0], p_pic );
1818 case DEINTERLACE_BLEND:
1819 RenderBlend( p_filter, p_dst[0], p_pic );
1823 RenderX( p_dst[0], p_pic );
1826 case DEINTERLACE_YADIF:
1827 if( RenderYadif( p_filter, p_dst[0], p_pic, 0, 0 ) )
1831 case DEINTERLACE_YADIF2X:
1832 if( RenderYadif( p_filter, p_dst[0], p_pic, 0, !b_top_field_first ) )
1835 RenderYadif( p_filter, p_dst[1], p_pic, 1, b_top_field_first );
1837 RenderYadif( p_filter, p_dst[2], p_pic, 2, !b_top_field_first );
1841 /* Set output timestamps, if the algorithm didn't request CUSTOM_PTS for this frame. */
1842 assert( i_frame_offset <= METADATA_SIZE || i_frame_offset == CUSTOM_PTS );
1843 if( i_frame_offset != CUSTOM_PTS )
1845 mtime_t i_base_pts = p_sys->meta.pi_date[i_meta_idx];
1847 /* Note: in the usual case (i_frame_offset = 0 and b_double_rate = false),
1848 this effectively does nothing. This is needed to correct the timestamp
1849 when i_frame_offset > 0. */
1850 p_dst[0]->date = i_base_pts;
1852 if( p_sys->b_double_rate )
1854 /* Processing all actually allocated output frames. */
1855 for( int i = 1; i < i_double_rate_alloc_end; ++i )
1857 /* XXX it's not really good especially for the first picture, but
1858 * I don't think that delaying by one frame is worth it */
1859 if( i_base_pts > VLC_TS_INVALID )
1860 p_dst[i]->date = i_base_pts + i * i_field_dur;
1862 p_dst[i]->date = VLC_TS_INVALID;
1867 p_dst[0]->b_progressive = true;
1868 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
1872 p_dst[i]->b_progressive = true;
1873 p_dst[i]->i_nb_fields = 2;
1877 picture_Release( p_pic );
1881 picture_Release( p_dst[0] );
1882 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
1885 picture_Release( p_dst[i] );
1887 picture_Release( p_pic );
1891 static void Flush( filter_t *p_filter )
1893 filter_sys_t *p_sys = p_filter->p_sys;
1895 for( int i = 0; i < METADATA_SIZE; i++ )
1897 p_sys->meta.pi_date[i] = VLC_TS_INVALID;
1898 p_sys->meta.pi_nb_fields[i] = 2;
1899 p_sys->meta.pb_top_field_first[i] = true;
1901 p_sys->i_frame_offset = 0; /* reset to default value (first frame after flush cannot have offset) */
1902 for( int i = 0; i < HISTORY_SIZE; i++ )
1904 if( p_sys->pp_history[i] )
1905 picture_Release( p_sys->pp_history[i] );
1906 p_sys->pp_history[i] = NULL;
1910 static int Mouse( filter_t *p_filter,
1911 vlc_mouse_t *p_mouse, const vlc_mouse_t *p_old, const vlc_mouse_t *p_new )
1915 if( p_filter->p_sys->b_half_height )
1921 /*****************************************************************************
1923 *****************************************************************************/
1924 static int Open( vlc_object_t *p_this )
1926 filter_t *p_filter = (filter_t*)p_this;
1927 filter_sys_t *p_sys;
1929 if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
1930 return VLC_EGENERIC;
1933 p_sys = p_filter->p_sys = malloc( sizeof( *p_sys ) );
1937 p_sys->i_mode = DEINTERLACE_BLEND;
1938 p_sys->b_double_rate = false;
1939 p_sys->b_half_height = true;
1940 p_sys->b_use_frame_history = false;
1941 for( int i = 0; i < METADATA_SIZE; i++ )
1943 p_sys->meta.pi_date[i] = VLC_TS_INVALID;
1944 p_sys->meta.pi_nb_fields[i] = 2;
1945 p_sys->meta.pb_top_field_first[i] = true;
1947 p_sys->i_frame_offset = 0; /* start with default value (first-ever frame cannot have offset) */
1948 for( int i = 0; i < HISTORY_SIZE; i++ )
1949 p_sys->pp_history[i] = NULL;
1951 #if defined(CAN_COMPILE_C_ALTIVEC)
1952 if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
1954 p_sys->pf_merge = MergeAltivec;
1955 p_sys->pf_end_merge = NULL;
1959 #if defined(CAN_COMPILE_SSE)
1960 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1962 p_sys->pf_merge = MergeSSE2;
1963 p_sys->pf_end_merge = EndMMX;
1967 #if defined(CAN_COMPILE_MMXEXT)
1968 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
1970 p_sys->pf_merge = MergeMMXEXT;
1971 p_sys->pf_end_merge = EndMMX;
1975 #if defined(CAN_COMPILE_3DNOW)
1976 if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
1978 p_sys->pf_merge = Merge3DNow;
1979 p_sys->pf_end_merge = End3DNow;
1983 #if defined __ARM_NEON__
1984 if( vlc_CPU() & CPU_CAPABILITY_NEON )
1986 p_sys->pf_merge = MergeNEON;
1987 p_sys->pf_end_merge = NULL;
1992 p_sys->pf_merge = MergeGeneric;
1993 p_sys->pf_end_merge = NULL;
1997 config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
2000 char *psz_mode = var_GetNonEmptyString( p_filter, FILTER_CFG_PREFIX "mode" );
2001 SetFilterMethod( p_filter, psz_mode, p_filter->fmt_in.video.i_chroma );
2006 GetOutputFormat( p_filter, &fmt, &p_filter->fmt_in.video );
2007 if( !p_filter->b_allow_fmt_out_change &&
2008 ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
2009 fmt.i_height != p_filter->fmt_in.video.i_height ) )
2011 Close( VLC_OBJECT(p_filter) );
2012 return VLC_EGENERIC;
2014 p_filter->fmt_out.video = fmt;
2015 p_filter->fmt_out.i_codec = fmt.i_chroma;
2016 p_filter->pf_video_filter = Deinterlace;
2017 p_filter->pf_video_flush = Flush;
2018 p_filter->pf_video_mouse = Mouse;
2020 msg_Dbg( p_filter, "deinterlacing" );
2025 /*****************************************************************************
2026 * Close: clean up the filter
2027 *****************************************************************************/
2028 static void Close( vlc_object_t *p_this )
2030 filter_t *p_filter = (filter_t*)p_this;
2033 free( p_filter->p_sys );