1 /*****************************************************************************
2 * deinterlace.c : deinterlacer plugin for vlc
3 *****************************************************************************
4 * Copyright (C) 2000-2011 the VideoLAN team
7 * Author: Sam Hocevar <sam@zoy.org>
8 * Juha Jeronen <juha.jeronen@jyu.fi> (Phosphor and IVTC modes)
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
34 #include <stdint.h> /* int_fast32_t */
40 #include <vlc_common.h>
41 #include <vlc_plugin.h>
42 #include <vlc_filter.h>
45 #ifdef CAN_COMPILE_MMXEXT
49 #define DEINTERLACE_DISCARD 1
50 #define DEINTERLACE_MEAN 2
51 #define DEINTERLACE_BLEND 3
52 #define DEINTERLACE_BOB 4
53 #define DEINTERLACE_LINEAR 5
54 #define DEINTERLACE_X 6
55 #define DEINTERLACE_YADIF 7
56 #define DEINTERLACE_YADIF2X 8
57 #define DEINTERLACE_PHOSPHOR 9
58 #define DEINTERLACE_IVTC 10
60 /*****************************************************************************
62 *****************************************************************************/
63 static int Open ( vlc_object_t * );
64 static void Close( vlc_object_t * );
66 #define MODE_TEXT N_("Deinterlace mode")
67 #define MODE_LONGTEXT N_("Deinterlace method to use for local playback.")
69 #define SOUT_MODE_TEXT N_("Streaming deinterlace mode")
70 #define SOUT_MODE_LONGTEXT N_("Deinterlace method to use for streaming.")
72 #define FILTER_CFG_PREFIX "sout-deinterlace-"
74 static const char *const mode_list[] = {
75 "discard", "blend", "mean", "bob", "linear", "x",
76 "yadif", "yadif2x", "phosphor", "ivtc" };
77 static const char *const mode_list_text[] = {
78 N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X",
79 "Yadif", "Yadif (2x)", N_("Phosphor"), N_("Film NTSC (IVTC)") };
81 /* Tooltips drop linefeeds (at least in the Qt GUI);
82 thus the space before each set of consecutive \n. */
83 #define PHOSPHOR_CHROMA_TEXT N_("Phosphor chroma mode for 4:2:0 input")
84 #define PHOSPHOR_CHROMA_LONGTEXT N_("Choose handling for colours in those "\
85 "output frames that fall across input "\
86 "frame boundaries. \n"\
88 "Latest: take chroma from new (bright) "\
89 "field only. Good for interlaced input, "\
90 "such as videos from a camcorder. \n"\
92 "AltLine: take chroma line 1 from top "\
93 "field, line 2 from bottom field, etc. \n"\
94 "Default, good for NTSC telecined input "\
95 "(anime DVDs, etc.). \n"\
97 "Blend: average input field chromas. "\
98 "May distort the colours of the new "\
99 "(bright) field, too. \n"\
101 "Upconvert: output in 4:2:2 format "\
102 "(independent chroma for each field). "\
103 "Best simulation, but requires more CPU "\
104 "and memory bandwidth.")
106 #define PHOSPHOR_DIMMER_TEXT N_("Phosphor old field dimmer strength")
107 #define PHOSPHOR_DIMMER_LONGTEXT N_("This controls the strength of the "\
108 "darkening filter that simulates CRT TV "\
109 "phosphor light decay for the old field "\
110 "in the Phosphor framerate doubler. "\
113 /* These numbers, and phosphor_chroma_list[], should be in the same order
114 as phosphor_chroma_list_text[]. The value 0 is reserved, because
115 var_GetInteger() returns 0 in case of error. */
116 typedef enum { PC_LATEST = 1, PC_ALTLINE = 2,
117 PC_BLEND = 3, PC_UPCONVERT = 4 } phosphor_chroma_t;
118 static const int phosphor_chroma_list[] = { PC_LATEST, PC_ALTLINE,
119 PC_BLEND, PC_UPCONVERT };
120 static const char *const phosphor_chroma_list_text[] = { N_("Latest"),
125 /* Same here. Same order as in phosphor_dimmer_list_text[],
126 and the value 0 is reserved for config error. */
127 static const int phosphor_dimmer_list[] = { 1, 2, 3, 4 };
128 static const char *const phosphor_dimmer_list_text[] = { N_("Off"),
134 set_description( N_("Deinterlacing video filter") )
135 set_shortname( N_("Deinterlace" ))
136 set_capability( "video filter2", 0 )
137 set_category( CAT_VIDEO )
138 set_subcategory( SUBCAT_VIDEO_VFILTER )
140 add_string( FILTER_CFG_PREFIX "mode", "blend", SOUT_MODE_TEXT,
141 SOUT_MODE_LONGTEXT, false )
142 change_string_list( mode_list, mode_list_text, 0 )
144 add_integer( FILTER_CFG_PREFIX "phosphor-chroma", 2, PHOSPHOR_CHROMA_TEXT,
145 PHOSPHOR_CHROMA_LONGTEXT, true )
146 change_integer_list( phosphor_chroma_list, phosphor_chroma_list_text )
148 add_integer( FILTER_CFG_PREFIX "phosphor-dimmer", 2, PHOSPHOR_DIMMER_TEXT,
149 PHOSPHOR_DIMMER_LONGTEXT, true )
150 change_integer_list( phosphor_dimmer_list, phosphor_dimmer_list_text )
152 add_shortcut( "deinterlace" )
153 set_callbacks( Open, Close )
157 /*****************************************************************************
159 *****************************************************************************/
160 static void RenderDiscard ( filter_t *, picture_t *, picture_t *, int );
161 static void RenderBob ( filter_t *, picture_t *, picture_t *, int );
162 static void RenderMean ( filter_t *, picture_t *, picture_t * );
163 static void RenderBlend ( filter_t *, picture_t *, picture_t * );
164 static void RenderLinear ( filter_t *, picture_t *, picture_t *, int );
165 static void RenderX ( picture_t *, picture_t * );
166 static int RenderYadif ( filter_t *, picture_t *, picture_t *, int, int );
167 static int RenderPhosphor( filter_t *, picture_t *, picture_t *, int, int );
168 static int RenderIVTC ( filter_t *, picture_t *, picture_t * );
170 static void MergeGeneric ( void *, const void *, const void *, size_t );
171 #if defined(CAN_COMPILE_C_ALTIVEC)
172 static void MergeAltivec ( void *, const void *, const void *, size_t );
174 #if defined(CAN_COMPILE_MMXEXT)
175 static void MergeMMXEXT ( void *, const void *, const void *, size_t );
177 #if defined(CAN_COMPILE_3DNOW)
178 static void Merge3DNow ( void *, const void *, const void *, size_t );
180 #if defined(CAN_COMPILE_SSE)
181 static void MergeSSE2 ( void *, const void *, const void *, size_t );
183 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
184 static void EndMMX ( void );
186 #if defined(CAN_COMPILE_3DNOW)
187 static void End3DNow ( void );
189 #if defined __ARM_NEON__
190 static void MergeNEON (void *, const void *, const void *, size_t);
193 /* Converts a full-frame plane_t to a field plane_t */
194 static void FieldFromPlane( plane_t *p_dst, const plane_t *p_src,
197 /* Composes a frame from the given field pair */
198 typedef enum { CC_ALTLINE, CC_UPCONVERT, CC_SOURCE_TOP, CC_SOURCE_BOTTOM,
199 CC_MERGE } compose_chroma_t;
200 static void ComposeFrame( filter_t *, picture_t *, picture_t *, picture_t *,
203 static const char *const ppsz_filter_options[] = {
204 "mode", "phosphor-chroma", "phosphor-dimmer",
208 /* Used for framerate doublers */
209 #define METADATA_SIZE (3)
211 mtime_t pi_date[METADATA_SIZE];
212 int pi_nb_fields[METADATA_SIZE];
213 bool pb_top_field_first[METADATA_SIZE];
214 } metadata_history_t;
216 /* Algorithm-specific state */
219 phosphor_chroma_t i_chroma_for_420;
220 int i_dimmer_strength;
224 * Inverse telecine subsystem state.
227 #define IVTC_NUM_FIELD_PAIRS 7
228 #define IVTC_DETECTION_HISTORY_SIZE 3
229 #define IVTC_LATEST (IVTC_DETECTION_HISTORY_SIZE-1)
232 int i_mode; /**< Detecting, hard TC, or soft TC. @see ivtc_mode */
233 int i_old_mode; /**< @see IVTCSoftTelecineDetect() */
235 int i_cadence_pos; /**< Cadence counter, 0..4. Runs when locked on. */
236 int i_tfd; /**< TFF or BFF telecine. Detected from the video. */
238 /** Raw low-level detector output.
240 * @see IVTCLowLevelDetect()
242 int pi_scores[IVTC_NUM_FIELD_PAIRS]; /**< Interlace scores. */
243 int pi_motion[IVTC_DETECTION_HISTORY_SIZE]; /**< 8x8 blocks with motion. */
244 int pi_top_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard top field repeat. */
245 int pi_bot_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard bot field repeat. */
247 /** Interlace scores of outgoing frames, used for judging IVTC output
248 * (detecting cadence breaks).
250 * @see IVTCOutputOrDropFrame()
252 int pi_final_scores[IVTC_DETECTION_HISTORY_SIZE];
254 /** Cadence position detection history (in ivtc_cadence_pos format).
255 * Contains the detected cadence position and a corresponding
256 * reliability flag for each algorithm.
258 * s = scores, interlace scores based algorithm, original to this filter.
259 * v = vektor, hard field repeat based algorithm, inspired by
260 * the TVTime/Xine IVTC filter by Billy Biggs (Vektor).
262 * Each algorithm may also keep internal, opaque data.
264 * @see ivtc_cadence_pos
265 * @see IVTCCadenceDetectAlgoScores()
266 * @see IVTCCadenceDetectAlgoVektor()
268 int pi_s_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
269 bool pb_s_reliable[IVTC_DETECTION_HISTORY_SIZE];
270 int pi_v_raw[IVTC_DETECTION_HISTORY_SIZE]; /**< "vektor" algo internal */
271 int pi_v_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
272 bool pb_v_reliable[IVTC_DETECTION_HISTORY_SIZE];
274 /** Final result, chosen by IVTCCadenceDetectFinalize() from the results
275 * given by the different detection algorithms.
277 * @see IVTCCadenceDetectFinalize()
279 int pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE];
282 * Set by cadence analyzer. Whether the sequence of last
283 * IVTC_DETECTION_HISTORY_SIZE detected positions, stored in
284 * pi_cadence_pos_history, looks like a valid telecine.
286 * @see IVTCCadenceAnalyze()
288 bool b_sequence_valid;
291 * Set by cadence analyzer. True if detected position = "dea".
292 * The three entries of this are used for detecting three progressive
293 * stencil positions in a row, i.e. five progressive frames in a row;
294 * this triggers exit from hard IVTC.
296 * @see IVTCCadenceAnalyze()
298 bool pb_all_progressives[IVTC_DETECTION_HISTORY_SIZE];
301 /* Top-level subsystem state */
302 #define HISTORY_SIZE (3)
303 #define CUSTOM_PTS -1
306 int i_mode; /* Deinterlace mode */
307 bool b_double_rate; /* Shall we double the framerate? */
308 bool b_half_height; /* Shall be divide the height by 2 */
309 bool b_use_frame_history; /* Does the algorithm need the input frame history buffer? */
311 void (*pf_merge) ( void *, const void *, const void *, size_t );
312 void (*pf_end_merge) ( void );
314 /* Metadata history (PTS, nb_fields, TFF). Used for framerate doublers. */
315 metadata_history_t meta;
317 /* Output frame timing / framerate doubler control (see below) */
320 /* Input frame history buffer for algorithms that perform temporal filtering. */
321 picture_t *pp_history[HISTORY_SIZE];
323 /* Algorithm-specific substructures */
324 phosphor_sys_t phosphor;
328 /* NOTE on i_frame_offset:
330 This value indicates the offset between input and output frames in the currently active deinterlace algorithm.
331 See the rationale below for why this is needed and how it is used.
333 Valid range: 0 <= i_frame_offset < METADATA_SIZE, or i_frame_offset = CUSTOM_PTS.
334 The special value CUSTOM_PTS is only allowed if b_double_rate is false.
336 If CUSTOM_PTS is used, the algorithm must compute the outgoing PTSs itself,
337 and additionally, read the TFF/BFF information itself (if it needs it)
338 from the incoming frames.
341 0 = output frame corresponds to the current input frame
342 (no frame offset; default if not set),
343 1 = output frame corresponds to the previous input frame
344 (e.g. Yadif and Yadif2x work like this),
347 If necessary, i_frame_offset should be updated by the active deinterlace algorithm
348 to indicate the correct delay for the *next* input frame. It does not matter at which i_order
349 the algorithm updates this information, but the new value will only take effect upon the
350 next call to Deinterlace() (i.e. at the next incoming frame).
352 The first-ever frame that arrives to the filter after Open() is always handled as having
353 i_frame_offset = 0. For the second and all subsequent frames, each algorithm is responsible
354 for setting the offset correctly. (The default is 0, so if that is correct, there's no need
357 This solution guarantees that i_frame_offset:
358 1) is up to date at the start of each frame,
359 2) does not change (as far as Deinterlace() is concerned) during a frame, and
360 3) does not need a special API for setting the value at the start of each input frame,
361 before the algorithm starts rendering the (first) output frame for that input frame.
363 The deinterlace algorithm is allowed to behave differently for different input frames.
364 This is especially important for startup, when full history (as defined by each algorithm)
365 is not yet available. During the first-ever input frame, it is clear that it is the
366 only possible source for information, so i_frame_offset = 0 is necessarily correct.
367 After that, what to do is up to each algorithm.
369 Having the correct offset at the start of each input frame is critically important in order to:
370 1) Allocate the correct number of output frames for framerate doublers, and to
371 2) Pass correct TFF/BFF information to the algorithm.
373 These points are important for proper soft field repeat support. This feature is used in some
374 streams originating from film. In soft NTSC telecine, the number of fields alternates as 3,2,3,2,...
375 and the video field dominance flips every two frames (after every "3"). Also, some streams
376 request an occasional field repeat (nb_fields = 3), after which the video field dominance flips.
377 To render such streams correctly, the nb_fields and TFF/BFF information must be taken from
378 the specific input frame that the algorithm intends to render.
380 Additionally, the output PTS is automatically computed by Deinterlace() from i_frame_offset and i_order.
382 It is possible to use the special value CUSTOM_PTS to indicate that the algorithm computes
383 the output PTSs itself. In this case, Deinterlace() will pass them through. This special value
384 is not valid for framerate doublers, as by definition they are field renderers, so they need to
385 use the original field timings to work correctly. Basically, this special value is only intended
386 for algorithms that need to perform nontrivial framerate conversions (such as IVTC).
390 /*****************************************************************************
391 * SetFilterMethod: setup the deinterlace method to use.
392 *****************************************************************************/
393 static void SetFilterMethod( filter_t *p_filter, const char *psz_method, vlc_fourcc_t i_chroma )
395 filter_sys_t *p_sys = p_filter->p_sys;
400 if( !strcmp( psz_method, "mean" ) )
402 p_sys->i_mode = DEINTERLACE_MEAN;
403 p_sys->b_double_rate = false;
404 p_sys->b_half_height = true;
405 p_sys->b_use_frame_history = false;
407 else if( !strcmp( psz_method, "bob" )
408 || !strcmp( psz_method, "progressive-scan" ) )
410 p_sys->i_mode = DEINTERLACE_BOB;
411 p_sys->b_double_rate = true;
412 p_sys->b_half_height = false;
413 p_sys->b_use_frame_history = false;
415 else if( !strcmp( psz_method, "linear" ) )
417 p_sys->i_mode = DEINTERLACE_LINEAR;
418 p_sys->b_double_rate = true;
419 p_sys->b_half_height = false;
420 p_sys->b_use_frame_history = false;
422 else if( !strcmp( psz_method, "x" ) )
424 p_sys->i_mode = DEINTERLACE_X;
425 p_sys->b_double_rate = false;
426 p_sys->b_half_height = false;
427 p_sys->b_use_frame_history = false;
429 else if( !strcmp( psz_method, "yadif" ) )
431 p_sys->i_mode = DEINTERLACE_YADIF;
432 p_sys->b_double_rate = false;
433 p_sys->b_half_height = false;
434 p_sys->b_use_frame_history = true;
436 else if( !strcmp( psz_method, "yadif2x" ) )
438 p_sys->i_mode = DEINTERLACE_YADIF2X;
439 p_sys->b_double_rate = true;
440 p_sys->b_half_height = false;
441 p_sys->b_use_frame_history = true;
443 else if( !strcmp( psz_method, "phosphor" ) )
445 p_sys->i_mode = DEINTERLACE_PHOSPHOR;
446 p_sys->b_double_rate = true;
447 p_sys->b_half_height = false;
448 p_sys->b_use_frame_history = true;
450 else if( !strcmp( psz_method, "ivtc" ) )
452 p_sys->i_mode = DEINTERLACE_IVTC;
453 p_sys->b_double_rate = false;
454 p_sys->b_half_height = false;
455 p_sys->b_use_frame_history = true;
457 else if( !strcmp( psz_method, "discard" ) )
459 const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
460 i_chroma == VLC_CODEC_J422;
462 p_sys->i_mode = DEINTERLACE_DISCARD;
463 p_sys->b_double_rate = false;
464 p_sys->b_half_height = !b_i422;
465 p_sys->b_use_frame_history = false;
469 if( strcmp( psz_method, "blend" ) )
471 "no valid deinterlace mode provided, using \"blend\"" );
473 p_sys->i_mode = DEINTERLACE_BLEND;
474 p_sys->b_double_rate = false;
475 p_sys->b_half_height = false;
476 p_sys->b_use_frame_history = false;
479 p_sys->i_frame_offset = 0; /* reset to default when method changes */
481 msg_Dbg( p_filter, "using %s deinterlace method", psz_method );
484 static void GetOutputFormat( filter_t *p_filter,
485 video_format_t *p_dst, const video_format_t *p_src )
487 filter_sys_t *p_sys = p_filter->p_sys;
490 if( p_sys->b_half_height )
492 p_dst->i_height /= 2;
493 p_dst->i_visible_height /= 2;
494 p_dst->i_y_offset /= 2;
495 p_dst->i_sar_den *= 2;
498 if( p_src->i_chroma == VLC_CODEC_I422 ||
499 p_src->i_chroma == VLC_CODEC_J422 )
501 switch( p_sys->i_mode )
503 case DEINTERLACE_MEAN:
504 case DEINTERLACE_LINEAR:
506 case DEINTERLACE_YADIF:
507 case DEINTERLACE_YADIF2X:
508 case DEINTERLACE_PHOSPHOR:
509 case DEINTERLACE_IVTC:
510 p_dst->i_chroma = p_src->i_chroma;
513 p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_I422 ? VLC_CODEC_I420 :
518 else if( p_sys->i_mode == DEINTERLACE_PHOSPHOR &&
519 p_sys->phosphor.i_chroma_for_420 == PC_UPCONVERT )
521 p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_J420 ? VLC_CODEC_J422 :
526 static bool IsChromaSupported( vlc_fourcc_t i_chroma )
528 return i_chroma == VLC_CODEC_I420 ||
529 i_chroma == VLC_CODEC_J420 ||
530 i_chroma == VLC_CODEC_YV12 ||
531 i_chroma == VLC_CODEC_I422 ||
532 i_chroma == VLC_CODEC_J422;
535 /*****************************************************************************
536 * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
537 *****************************************************************************/
538 static void RenderDiscard( filter_t *p_filter,
539 picture_t *p_outpic, picture_t *p_pic, int i_field )
543 /* Copy image and skip lines */
544 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
546 uint8_t *p_in, *p_out_end, *p_out;
549 p_in = p_pic->p[i_plane].p_pixels
550 + i_field * p_pic->p[i_plane].i_pitch;
552 p_out = p_outpic->p[i_plane].p_pixels;
553 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
554 * p_outpic->p[i_plane].i_visible_lines;
556 switch( p_filter->fmt_in.video.i_chroma )
562 for( ; p_out < p_out_end ; )
564 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
566 p_out += p_outpic->p[i_plane].i_pitch;
567 p_in += 2 * p_pic->p[i_plane].i_pitch;
574 i_increment = 2 * p_pic->p[i_plane].i_pitch;
576 if( i_plane == Y_PLANE )
578 for( ; p_out < p_out_end ; )
580 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
581 p_out += p_outpic->p[i_plane].i_pitch;
582 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
583 p_out += p_outpic->p[i_plane].i_pitch;
589 for( ; p_out < p_out_end ; )
591 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
592 p_out += p_outpic->p[i_plane].i_pitch;
604 /*****************************************************************************
605 * RenderBob: renders a BOB picture - simple copy
606 *****************************************************************************/
607 static void RenderBob( filter_t *p_filter,
608 picture_t *p_outpic, picture_t *p_pic, int i_field )
612 /* Copy image and skip lines */
613 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
615 uint8_t *p_in, *p_out_end, *p_out;
617 p_in = p_pic->p[i_plane].p_pixels;
618 p_out = p_outpic->p[i_plane].p_pixels;
619 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
620 * p_outpic->p[i_plane].i_visible_lines;
622 switch( p_filter->fmt_in.video.i_chroma )
627 /* For BOTTOM field we need to add the first line */
630 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
631 p_in += p_pic->p[i_plane].i_pitch;
632 p_out += p_outpic->p[i_plane].i_pitch;
635 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
637 for( ; p_out < p_out_end ; )
639 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
641 p_out += p_outpic->p[i_plane].i_pitch;
643 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
645 p_in += 2 * p_pic->p[i_plane].i_pitch;
646 p_out += p_outpic->p[i_plane].i_pitch;
649 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
651 /* For TOP field we need to add the last line */
654 p_in += p_pic->p[i_plane].i_pitch;
655 p_out += p_outpic->p[i_plane].i_pitch;
656 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
662 /* For BOTTOM field we need to add the first line */
665 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
666 p_in += p_pic->p[i_plane].i_pitch;
667 p_out += p_outpic->p[i_plane].i_pitch;
670 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
672 if( i_plane == Y_PLANE )
674 for( ; p_out < p_out_end ; )
676 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
678 p_out += p_outpic->p[i_plane].i_pitch;
680 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
682 p_in += 2 * p_pic->p[i_plane].i_pitch;
683 p_out += p_outpic->p[i_plane].i_pitch;
688 for( ; p_out < p_out_end ; )
690 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
692 p_out += p_outpic->p[i_plane].i_pitch;
693 p_in += 2 * p_pic->p[i_plane].i_pitch;
697 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
699 /* For TOP field we need to add the last line */
702 p_in += p_pic->p[i_plane].i_pitch;
703 p_out += p_outpic->p[i_plane].i_pitch;
704 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
711 #define Merge p_filter->p_sys->pf_merge
712 #define EndMerge if(p_filter->p_sys->pf_end_merge) p_filter->p_sys->pf_end_merge
714 /*****************************************************************************
715 * RenderLinear: BOB with linear interpolation
716 *****************************************************************************/
717 static void RenderLinear( filter_t *p_filter,
718 picture_t *p_outpic, picture_t *p_pic, int i_field )
722 /* Copy image and skip lines */
723 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
725 uint8_t *p_in, *p_out_end, *p_out;
727 p_in = p_pic->p[i_plane].p_pixels;
728 p_out = p_outpic->p[i_plane].p_pixels;
729 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
730 * p_outpic->p[i_plane].i_visible_lines;
732 /* For BOTTOM field we need to add the first line */
735 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
736 p_in += p_pic->p[i_plane].i_pitch;
737 p_out += p_outpic->p[i_plane].i_pitch;
740 p_out_end -= 2 * p_outpic->p[i_plane].i_pitch;
742 for( ; p_out < p_out_end ; )
744 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
746 p_out += p_outpic->p[i_plane].i_pitch;
748 Merge( p_out, p_in, p_in + 2 * p_pic->p[i_plane].i_pitch,
749 p_pic->p[i_plane].i_pitch );
751 p_in += 2 * p_pic->p[i_plane].i_pitch;
752 p_out += p_outpic->p[i_plane].i_pitch;
755 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
757 /* For TOP field we need to add the last line */
760 p_in += p_pic->p[i_plane].i_pitch;
761 p_out += p_outpic->p[i_plane].i_pitch;
762 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
768 static void RenderMean( filter_t *p_filter,
769 picture_t *p_outpic, picture_t *p_pic )
773 /* Copy image and skip lines */
774 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
776 uint8_t *p_in, *p_out_end, *p_out;
778 p_in = p_pic->p[i_plane].p_pixels;
780 p_out = p_outpic->p[i_plane].p_pixels;
781 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
782 * p_outpic->p[i_plane].i_visible_lines;
784 /* All lines: mean value */
785 for( ; p_out < p_out_end ; )
787 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
788 p_pic->p[i_plane].i_pitch );
790 p_out += p_outpic->p[i_plane].i_pitch;
791 p_in += 2 * p_pic->p[i_plane].i_pitch;
797 static void RenderBlend( filter_t *p_filter,
798 picture_t *p_outpic, picture_t *p_pic )
802 /* Copy image and skip lines */
803 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
805 uint8_t *p_in, *p_out_end, *p_out;
807 p_in = p_pic->p[i_plane].p_pixels;
809 p_out = p_outpic->p[i_plane].p_pixels;
810 p_out_end = p_out + p_outpic->p[i_plane].i_pitch
811 * p_outpic->p[i_plane].i_visible_lines;
813 switch( p_filter->fmt_in.video.i_chroma )
818 /* First line: simple copy */
819 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
820 p_out += p_outpic->p[i_plane].i_pitch;
822 /* Remaining lines: mean value */
823 for( ; p_out < p_out_end ; )
825 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
826 p_pic->p[i_plane].i_pitch );
828 p_out += p_outpic->p[i_plane].i_pitch;
829 p_in += p_pic->p[i_plane].i_pitch;
835 /* First line: simple copy */
836 vlc_memcpy( p_out, p_in, p_pic->p[i_plane].i_pitch );
837 p_out += p_outpic->p[i_plane].i_pitch;
839 /* Remaining lines: mean value */
840 if( i_plane == Y_PLANE )
842 for( ; p_out < p_out_end ; )
844 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
845 p_pic->p[i_plane].i_pitch );
847 p_out += p_outpic->p[i_plane].i_pitch;
848 p_in += p_pic->p[i_plane].i_pitch;
854 for( ; p_out < p_out_end ; )
856 Merge( p_out, p_in, p_in + p_pic->p[i_plane].i_pitch,
857 p_pic->p[i_plane].i_pitch );
859 p_out += p_outpic->p[i_plane].i_pitch;
860 p_in += 2*p_pic->p[i_plane].i_pitch;
869 static void MergeGeneric( void *_p_dest, const void *_p_s1,
870 const void *_p_s2, size_t i_bytes )
872 uint8_t* p_dest = (uint8_t*)_p_dest;
873 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
874 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
875 uint8_t* p_end = p_dest + i_bytes - 8;
877 while( p_dest < p_end )
879 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
880 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
881 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
882 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
883 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
884 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
885 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
886 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
891 while( p_dest < p_end )
893 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
897 #if defined(CAN_COMPILE_MMXEXT)
898 static void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
901 uint8_t* p_dest = (uint8_t*)_p_dest;
902 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
903 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
904 uint8_t* p_end = p_dest + i_bytes - 8;
905 while( p_dest < p_end )
907 __asm__ __volatile__( "movq %2,%%mm1;"
909 "movq %%mm1, %0" :"=m" (*p_dest):
919 while( p_dest < p_end )
921 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
926 #if defined(CAN_COMPILE_3DNOW)
927 static void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
930 uint8_t* p_dest = (uint8_t*)_p_dest;
931 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
932 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
933 uint8_t* p_end = p_dest + i_bytes - 8;
934 while( p_dest < p_end )
936 __asm__ __volatile__( "movq %2,%%mm1;"
938 "movq %%mm1, %0" :"=m" (*p_dest):
948 while( p_dest < p_end )
950 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
955 #if defined(CAN_COMPILE_SSE)
956 static void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
959 uint8_t* p_dest = (uint8_t*)_p_dest;
960 const uint8_t *p_s1 = (const uint8_t *)_p_s1;
961 const uint8_t *p_s2 = (const uint8_t *)_p_s2;
963 while( (uintptr_t)p_s1 % 16 )
965 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
967 p_end = p_dest + i_bytes - 16;
968 while( p_dest < p_end )
970 __asm__ __volatile__( "movdqu %2,%%xmm1;"
972 "movdqu %%xmm1, %0" :"=m" (*p_dest):
982 while( p_dest < p_end )
984 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
989 #if defined(CAN_COMPILE_MMXEXT) || defined(CAN_COMPILE_SSE)
990 static void EndMMX( void )
992 __asm__ __volatile__( "emms" :: );
996 #if defined(CAN_COMPILE_3DNOW)
997 static void End3DNow( void )
999 __asm__ __volatile__( "femms" :: );
1003 #ifdef CAN_COMPILE_C_ALTIVEC
1004 static void MergeAltivec( void *_p_dest, const void *_p_s1,
1005 const void *_p_s2, size_t i_bytes )
1007 uint8_t *p_dest = (uint8_t *)_p_dest;
1008 uint8_t *p_s1 = (uint8_t *)_p_s1;
1009 uint8_t *p_s2 = (uint8_t *)_p_s2;
1010 uint8_t *p_end = p_dest + i_bytes - 15;
1012 /* Use C until the first 16-bytes aligned destination pixel */
1013 while( (uintptr_t)p_dest & 0xF )
1015 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1018 if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
1020 /* Unaligned source */
1021 vector unsigned char s1v, s2v, destv;
1022 vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
1023 vector unsigned char perm1v, perm2v;
1025 perm1v = vec_lvsl( 0, p_s1 );
1026 perm2v = vec_lvsl( 0, p_s2 );
1027 s1oldv = vec_ld( 0, p_s1 );
1028 s2oldv = vec_ld( 0, p_s2 );
1030 while( p_dest < p_end )
1032 s1newv = vec_ld( 16, p_s1 );
1033 s2newv = vec_ld( 16, p_s2 );
1034 s1v = vec_perm( s1oldv, s1newv, perm1v );
1035 s2v = vec_perm( s2oldv, s2newv, perm2v );
1038 destv = vec_avg( s1v, s2v );
1039 vec_st( destv, 0, p_dest );
1048 /* Aligned source */
1049 vector unsigned char s1v, s2v, destv;
1051 while( p_dest < p_end )
1053 s1v = vec_ld( 0, p_s1 );
1054 s2v = vec_ld( 0, p_s2 );
1055 destv = vec_avg( s1v, s2v );
1056 vec_st( destv, 0, p_dest );
1066 while( p_dest < p_end )
1068 *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
1074 static void MergeNEON (void *restrict out, const void *in1,
1075 const void *in2, size_t n)
1077 uint8_t *outp = out;
1078 const uint8_t *in1p = in1;
1079 const uint8_t *in2p = in2;
1080 size_t mis = ((uintptr_t)outp) & 15;
1084 MergeGeneric (outp, in1p, in2p, mis);
1091 uint8_t *end = outp + (n & ~15);
1093 if ((((uintptr_t)in1p)|((uintptr_t)in2p)) & 15)
1096 "vld1.u8 {q0-q1}, [%[in1]]!\n"
1097 "vld1.u8 {q2-q3}, [%[in2]]!\n"
1098 "vhadd.u8 q4, q0, q2\n"
1099 "vld1.u8 {q6-q7}, [%[in1]]!\n"
1100 "vhadd.u8 q5, q1, q3\n"
1101 "vld1.u8 {q8-q9}, [%[in2]]!\n"
1102 "vhadd.u8 q10, q6, q8\n"
1103 "vhadd.u8 q11, q7, q9\n"
1104 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
1105 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
1106 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1108 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1109 "q8", "q9", "q10", "q11", "memory");
1113 "vld1.u8 {q0-q1}, [%[in1],:128]!\n"
1114 "vld1.u8 {q2-q3}, [%[in2],:128]!\n"
1115 "vhadd.u8 q4, q0, q2\n"
1116 "vld1.u8 {q6-q7}, [%[in1],:128]!\n"
1117 "vhadd.u8 q5, q1, q3\n"
1118 "vld1.u8 {q8-q9}, [%[in2],:128]!\n"
1119 "vhadd.u8 q10, q6, q8\n"
1120 "vhadd.u8 q11, q7, q9\n"
1121 "vst1.u8 {q4-q5}, [%[out],:128]!\n"
1122 "vst1.u8 {q10-q11}, [%[out],:128]!\n"
1123 : [out] "+r" (outp), [in1] "+r" (in1p), [in2] "+r" (in2p)
1125 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1126 "q8", "q9", "q10", "q11", "memory");
1129 MergeGeneric (outp, in1p, in2p, n);
1133 /*****************************************************************************
1134 * RenderX: This algo works on a 8x8 block basic, it copies the top field
1135 * and apply a process to recreate the bottom field :
1136 * If a 8x8 block is classified as :
1137 * - progressive: it applies a small blend (1,6,1)
1139 * * in the MMX version: we do a ME between the 2 fields, if there is a
1140 * good match we use MC to recreate the bottom field (with a small
1142 * * otherwise: it recreates the bottom field by an edge oriented
1144 *****************************************************************************/
1146 /* XDeint8x8Detect: detect if a 8x8 block is interlaced.
1147 * XXX: It need to access to 8x10
1148 * We use more than 8 lines to help with scrolling (text)
1149 * (and because XDeint8x8Frame use line 9)
1150 * XXX: smooth/uniform area with noise detection doesn't works well
1151 * but it's not really a problem because they don't have much details anyway
1153 static inline int ssd( int a ) { return a*a; }
1154 static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
1160 /* Detect interlacing */
1162 for( y = 0; y < 7; y += 2 )
1165 for( x = 0; x < 8; x++ )
1167 fr += ssd(src[ x] - src[1*i_src+x]) +
1168 ssd(src[i_src+x] - src[2*i_src+x]);
1169 ff += ssd(src[ x] - src[2*i_src+x]) +
1170 ssd(src[i_src+x] - src[3*i_src+x]);
1172 if( ff < 6*fr/8 && fr > 32 )
1178 return fc < 1 ? false : true;
1180 #ifdef CAN_COMPILE_MMXEXT
1181 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
1188 /* Detect interlacing */
1190 pxor_r2r( mm7, mm7 );
1191 for( y = 0; y < 9; y += 2 )
1194 pxor_r2r( mm5, mm5 );
1195 pxor_r2r( mm6, mm6 );
1196 for( x = 0; x < 8; x+=4 )
1198 movd_m2r( src[ x], mm0 );
1199 movd_m2r( src[1*i_src+x], mm1 );
1200 movd_m2r( src[2*i_src+x], mm2 );
1201 movd_m2r( src[3*i_src+x], mm3 );
1203 punpcklbw_r2r( mm7, mm0 );
1204 punpcklbw_r2r( mm7, mm1 );
1205 punpcklbw_r2r( mm7, mm2 );
1206 punpcklbw_r2r( mm7, mm3 );
1208 movq_r2r( mm0, mm4 );
1210 psubw_r2r( mm1, mm0 );
1211 psubw_r2r( mm2, mm4 );
1213 psubw_r2r( mm1, mm2 );
1214 psubw_r2r( mm1, mm3 );
1216 pmaddwd_r2r( mm0, mm0 );
1217 pmaddwd_r2r( mm4, mm4 );
1218 pmaddwd_r2r( mm2, mm2 );
1219 pmaddwd_r2r( mm3, mm3 );
1220 paddd_r2r( mm0, mm2 );
1221 paddd_r2r( mm4, mm3 );
1222 paddd_r2r( mm2, mm5 );
1223 paddd_r2r( mm3, mm6 );
1226 movq_r2r( mm5, mm0 );
1227 psrlq_i2r( 32, mm0 );
1228 paddd_r2r( mm0, mm5 );
1229 movd_r2m( mm5, fr );
1231 movq_r2r( mm6, mm0 );
1232 psrlq_i2r( 32, mm0 );
1233 paddd_r2r( mm0, mm6 );
1234 movd_r2m( mm6, ff );
1236 if( ff < 6*fr/8 && fr > 32 )
1245 static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
1246 uint8_t *src1, int i_src1,
1247 uint8_t *src2, int i_src2 )
1252 for( y = 0; y < 8; y += 2 )
1254 memcpy( dst, src1, 8 );
1257 for( x = 0; x < 8; x++ )
1258 dst[x] = (src1[x] + 6*src2[x] + src1[i_src1+x] + 4 ) >> 3;
1266 #ifdef CAN_COMPILE_MMXEXT
1267 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
1268 uint8_t *src1, int i_src1,
1269 uint8_t *src2, int i_src2 )
1271 static const uint64_t m_4 = INT64_C(0x0004000400040004);
1275 pxor_r2r( mm7, mm7 );
1276 for( y = 0; y < 8; y += 2 )
1278 for( x = 0; x < 8; x +=4 )
1280 movd_m2r( src1[x], mm0 );
1281 movd_r2m( mm0, dst[x] );
1283 movd_m2r( src2[x], mm1 );
1284 movd_m2r( src1[i_src1+x], mm2 );
1286 punpcklbw_r2r( mm7, mm0 );
1287 punpcklbw_r2r( mm7, mm1 );
1288 punpcklbw_r2r( mm7, mm2 );
1289 paddw_r2r( mm1, mm1 );
1290 movq_r2r( mm1, mm3 );
1291 paddw_r2r( mm3, mm3 );
1292 paddw_r2r( mm2, mm0 );
1293 paddw_r2r( mm3, mm1 );
1294 paddw_m2r( m_4, mm1 );
1295 paddw_r2r( mm1, mm0 );
1296 psraw_i2r( 3, mm0 );
1297 packuswb_r2r( mm7, mm0 );
1298 movd_r2m( mm0, dst[i_dst+x] );
1309 static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v )
1312 for( y = 0; y < 8; y++ )
1313 memset( &dst[y*i_dst], v, 8 );
1316 /* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a
1319 * TODO: a better one for the inner part.
1321 static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
1322 uint8_t *src, int i_src )
1327 for( y = 0; y < 8; y += 2 )
1329 memcpy( dst, src, 8 );
1332 for( x = 0; x < 8; x++ )
1333 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1338 #ifdef CAN_COMPILE_MMXEXT
1339 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
1340 uint8_t *src, int i_src )
1345 for( y = 0; y < 8; y += 2 )
1347 movq_m2r( src[0], mm0 );
1348 movq_r2m( mm0, dst[0] );
1351 movq_m2r( src[2*i_src], mm1 );
1352 pavgb_r2r( mm1, mm0 );
1354 movq_r2m( mm0, dst[0] );
1362 /* XDeint8x8Field: Edge oriented interpolation
1363 * (Need -4 and +5 pixels H, +1 line)
1365 static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
1366 uint8_t *src, int i_src )
1371 for( y = 0; y < 8; y += 2 )
1373 memcpy( dst, src, 8 );
1376 for( x = 0; x < 8; x++ )
1378 uint8_t *src2 = &src[2*i_src];
1379 /* I use 8 pixels just to match the MMX version, but it's overkill
1380 * 5 would be enough (less isn't good) */
1381 const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) +
1382 abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) +
1383 abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) +
1384 abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]);
1386 const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) +
1387 abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) +
1388 abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) +
1389 abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]);
1391 const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) +
1392 abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) +
1393 abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) +
1394 abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]);
1396 if( c0 < c1 && c1 <= c2 )
1397 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1398 else if( c2 < c1 && c1 <= c0 )
1399 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1401 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1408 #ifdef CAN_COMPILE_MMXEXT
1409 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
1410 uint8_t *src, int i_src )
1415 for( y = 0; y < 8; y += 2 )
1417 memcpy( dst, src, 8 );
1420 for( x = 0; x < 8; x++ )
1422 uint8_t *src2 = &src[2*i_src];
1425 movq_m2r( src[x-2], mm0 );
1426 movq_m2r( src[x-3], mm1 );
1427 movq_m2r( src[x-4], mm2 );
1429 psadbw_m2r( src2[x-4], mm0 );
1430 psadbw_m2r( src2[x-3], mm1 );
1431 psadbw_m2r( src2[x-2], mm2 );
1433 movd_r2m( mm0, c2 );
1434 movd_r2m( mm1, c1 );
1435 movd_r2m( mm2, c0 );
1437 if( c0 < c1 && c1 <= c2 )
1438 dst[x] = (src[x-1] + src2[x+1]) >> 1;
1439 else if( c2 < c1 && c1 <= c0 )
1440 dst[x] = (src[x+1] + src2[x-1]) >> 1;
1442 dst[x] = (src[x+0] + src2[x+0]) >> 1;
1451 /* NxN arbitray size (and then only use pixel in the NxN block)
1453 static inline int XDeintNxNDetect( uint8_t *src, int i_src,
1454 int i_height, int i_width )
1461 /* Detect interlacing */
1462 /* FIXME way too simple, need to be more like XDeint8x8Detect */
1465 for( y = 0; y < i_height - 2; y += 2 )
1467 const uint8_t *s = &src[y*i_src];
1468 for( x = 0; x < i_width; x++ )
1470 fr += ssd(s[ x] - s[1*i_src+x]);
1471 ff += ssd(s[ x] - s[2*i_src+x]);
1473 if( ff < fr && fr > i_width / 2 )
1477 return fc < 2 ? false : true;
1480 static inline void XDeintNxNFrame( uint8_t *dst, int i_dst,
1481 uint8_t *src, int i_src,
1482 int i_width, int i_height )
1487 for( y = 0; y < i_height; y += 2 )
1489 memcpy( dst, src, i_width );
1492 if( y < i_height - 2 )
1494 for( x = 0; x < i_width; x++ )
1495 dst[x] = (src[x] + 2*src[1*i_src+x] + src[2*i_src+x] + 2 ) >> 2;
1499 /* Blend last line */
1500 for( x = 0; x < i_width; x++ )
1501 dst[x] = (src[x] + src[1*i_src+x] ) >> 1;
1508 static inline void XDeintNxNField( uint8_t *dst, int i_dst,
1509 uint8_t *src, int i_src,
1510 int i_width, int i_height )
1515 for( y = 0; y < i_height; y += 2 )
1517 memcpy( dst, src, i_width );
1520 if( y < i_height - 2 )
1522 for( x = 0; x < i_width; x++ )
1523 dst[x] = (src[x] + src[2*i_src+x] ) >> 1;
1527 /* Blend last line */
1528 for( x = 0; x < i_width; x++ )
1529 dst[x] = (src[x] + src[i_src+x]) >> 1;
1536 static inline void XDeintNxN( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
1537 int i_width, int i_height )
1539 if( XDeintNxNDetect( src, i_src, i_width, i_height ) )
1540 XDeintNxNField( dst, i_dst, src, i_src, i_width, i_height );
1542 XDeintNxNFrame( dst, i_dst, src, i_src, i_width, i_height );
1546 static inline int median( int a, int b, int c )
1548 int min = a, max =a;
1559 return a + b + c - min - max;
1565 static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
1566 uint8_t *src, int i_src,
1567 const int i_mbx, int i_modx )
1571 for( x = 0; x < i_mbx; x++ )
1574 if( ( s = XDeint8x8DetectC( src, i_src ) ) )
1576 if( x == 0 || x == i_mbx - 1 )
1577 XDeint8x8FieldEC( dst, i_dst, src, i_src );
1579 XDeint8x8FieldC( dst, i_dst, src, i_src );
1583 XDeint8x8MergeC( dst, i_dst,
1584 &src[0*i_src], 2*i_src,
1585 &src[1*i_src], 2*i_src );
1593 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1595 #ifdef CAN_COMPILE_MMXEXT
1596 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
1597 uint8_t *src, int i_src,
1598 const int i_mbx, int i_modx )
1602 /* Reset current line */
1603 for( x = 0; x < i_mbx; x++ )
1606 if( ( s = XDeint8x8DetectMMXEXT( src, i_src ) ) )
1608 if( x == 0 || x == i_mbx - 1 )
1609 XDeint8x8FieldEMMXEXT( dst, i_dst, src, i_src );
1611 XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src );
1615 XDeint8x8MergeMMXEXT( dst, i_dst,
1616 &src[0*i_src], 2*i_src,
1617 &src[1*i_src], 2*i_src );
1625 XDeintNxN( dst, i_dst, src, i_src, i_modx, 8 );
1629 static void RenderX( picture_t *p_outpic, picture_t *p_pic )
1632 unsigned u_cpu = vlc_CPU();
1634 /* Copy image and skip lines */
1635 for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
1637 const int i_mby = ( p_outpic->p[i_plane].i_visible_lines + 7 )/8 - 1;
1638 const int i_mbx = p_outpic->p[i_plane].i_visible_pitch/8;
1640 const int i_mody = p_outpic->p[i_plane].i_visible_lines - 8*i_mby;
1641 const int i_modx = p_outpic->p[i_plane].i_visible_pitch - 8*i_mbx;
1643 const int i_dst = p_outpic->p[i_plane].i_pitch;
1644 const int i_src = p_pic->p[i_plane].i_pitch;
1648 for( y = 0; y < i_mby; y++ )
1650 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1651 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1653 #ifdef CAN_COMPILE_MMXEXT
1654 if( u_cpu & CPU_CAPABILITY_MMXEXT )
1655 XDeintBand8x8MMXEXT( dst, i_dst, src, i_src, i_mbx, i_modx );
1658 XDeintBand8x8C( dst, i_dst, src, i_src, i_mbx, i_modx );
1661 /* Last line (C only)*/
1664 uint8_t *dst = &p_outpic->p[i_plane].p_pixels[8*y*i_dst];
1665 uint8_t *src = &p_pic->p[i_plane].p_pixels[8*y*i_src];
1667 for( x = 0; x < i_mbx; x++ )
1669 XDeintNxN( dst, i_dst, src, i_src, 8, i_mody );
1676 XDeintNxN( dst, i_dst, src, i_src, i_modx, i_mody );
1680 #ifdef CAN_COMPILE_MMXEXT
1681 if( u_cpu & CPU_CAPABILITY_MMXEXT )
1686 /*****************************************************************************
1687 * Yadif (Yet Another DeInterlacing Filter).
1688 *****************************************************************************/
1692 * 0: Output 1 frame for each frame.
1693 * 1: Output 1 frame for each field.
1694 * 2: Like 0 but skips spatial interlacing check.
1695 * 3: Like 1 but skips spatial interlacing check.
1697 * In vlc, only & 0x02 has meaning, as we do the & 0x01 ourself.
1702 /* I am unsure it is the right one */
1703 typedef intptr_t x86_reg;
1705 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
1706 #define FFMAX(a,b) __MAX(a,b)
1707 #define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
1708 #define FFMIN(a,b) __MIN(a,b)
1709 #define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
1711 /* yadif.h comes from vf_yadif.c of mplayer project */
1714 static int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
1718 filter_sys_t *p_sys = p_filter->p_sys;
1721 assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
1722 assert( i_field == 0 || i_field == 1 );
1724 /* As the pitches must match, use ONLY pictures coming from picture_New()! */
1725 picture_t *p_prev = p_sys->pp_history[0];
1726 picture_t *p_cur = p_sys->pp_history[1];
1727 picture_t *p_next = p_sys->pp_history[2];
1729 /* Account for soft field repeat.
1731 The "parity" parameter affects the algorithm like this (from yadif.h):
1732 uint8_t *prev2= parity ? prev : cur ;
1733 uint8_t *next2= parity ? cur : next;
1735 The original parity expression that was used here is:
1736 (i_field ^ (i_order == i_field)) & 1
1739 i_field = 0, i_order = 0 => 1
1740 i_field = 1, i_order = 1 => 0
1741 i_field = 1, i_order = 0 => 1
1742 i_field = 0, i_order = 1 => 0
1744 => equivalent with e.g. (1 - i_order) or (i_order + 1) % 2
1746 Thus, in a normal two-field frame,
1747 parity 1 = first field (i_order == 0)
1748 parity 0 = second field (i_order == 1)
1750 Now, with three fields, where the third is a copy of the first,
1751 i_order = 0 => parity 1 (as usual)
1752 i_order = 1 => due to the repeat, prev = cur, but also next = cur.
1753 Because in such a case there is no motion (otherwise field repeat makes no sense),
1754 we don't actually need to invoke Yadif's filter(). Thus, set "parity" to 2,
1755 and use this to bypass the filter.
1756 i_order = 2 => parity 0 (as usual)
1759 if( p_cur && p_cur->i_nb_fields > 2 )
1760 yadif_parity = (i_order + 1) % 3; /* 1, *2*, 0; where 2 is a special value meaning "bypass filter". */
1762 yadif_parity = (i_order + 1) % 2; /* 1, 0 */
1764 /* Filter if we have all the pictures we need */
1765 if( p_prev && p_cur && p_next )
1768 void (*filter)(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity);
1769 #if defined(HAVE_YADIF_SSE2)
1770 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
1771 filter = yadif_filter_line_mmx2;
1774 filter = yadif_filter_line_c;
1776 for( int n = 0; n < p_dst->i_planes; n++ )
1778 const plane_t *prevp = &p_prev->p[n];
1779 const plane_t *curp = &p_cur->p[n];
1780 const plane_t *nextp = &p_next->p[n];
1781 plane_t *dstp = &p_dst->p[n];
1783 for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
1785 if( (y % 2) == i_field || yadif_parity == 2 )
1787 vlc_memcpy( &dstp->p_pixels[y * dstp->i_pitch],
1788 &curp->p_pixels[y * curp->i_pitch], dstp->i_visible_pitch );
1792 struct vf_priv_s cfg;
1793 /* Spatial checks only when enough data */
1794 cfg.mode = (y >= 2 && y < dstp->i_visible_lines - 2) ? 0 : 2;
1796 assert( prevp->i_pitch == curp->i_pitch && curp->i_pitch == nextp->i_pitch );
1798 &dstp->p_pixels[y * dstp->i_pitch],
1799 &prevp->p_pixels[y * prevp->i_pitch],
1800 &curp->p_pixels[y * curp->i_pitch],
1801 &nextp->p_pixels[y * nextp->i_pitch],
1802 dstp->i_visible_pitch,
1807 /* We duplicate the first and last lines */
1809 vlc_memcpy(&dstp->p_pixels[(y-1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1810 else if( y == dstp->i_visible_lines - 2 )
1811 vlc_memcpy(&dstp->p_pixels[(y+1) * dstp->i_pitch], &dstp->p_pixels[y * dstp->i_pitch], dstp->i_pitch);
1815 p_sys->i_frame_offset = 1; /* p_curr will be rendered at next frame, too */
1819 else if( !p_prev && !p_cur && p_next )
1821 /* NOTE: For the first frame, we use the default frame offset
1822 as set by Open() or SetFilterMethod(). It is always 0. */
1824 /* FIXME not good as it does not use i_order/i_field */
1825 RenderX( p_dst, p_next );
1830 p_sys->i_frame_offset = 1; /* p_curr will be rendered at next frame */
1832 return VLC_EGENERIC;
1836 /*****************************************************************************
1837 * Phosphor - a framerate doubler that simulates gradual light decay of a CRT.
1838 *****************************************************************************/
1841 * This function converts a normal (full frame) plane_t into a field plane_t.
1843 * Field plane_t's can be used e.g. for a weaving copy operation from two
1844 * source frames into one destination frame.
1846 * The pixels themselves will not be touched; only the metadata is generated.
1847 * The same pixel data is shared by both the original plane_t and the field
1848 * plane_t. Note, however, that the bottom field's data starts from the
1849 * second line, so for the bottom field, the actual pixel pointer value
1850 * does not exactly match the original plane pixel pointer value. (It points
1851 * one line further down.)
1853 * The caller must allocate p_dst (creating a local variable is fine).
1855 * @param p_dst Field plane_t is written here. Must be non-NULL.
1856 * @param p_src Original full-frame plane_t. Must be non-NULL.
1857 * @param i_field Extract which field? 0 = top field, 1 = bottom field.
1858 * @see plane_CopyPixels()
1859 * @see ComposeFrame()
1860 * @see RenderPhosphor()
1862 static void FieldFromPlane( plane_t *p_dst, const plane_t *p_src, int i_field )
1864 assert( p_dst != NULL );
1865 assert( p_src != NULL );
1866 assert( i_field == 0 || i_field == 1 );
1868 /* Start with a copy of the metadata, and then update it to refer
1871 We utilize the fact that plane_CopyPixels() differentiates between
1872 visible_pitch and pitch.
1874 The other field will be defined as the "margin" by doubling the pitch.
1875 The visible pitch will be left as in the original.
1877 (*p_dst) = (*p_src);
1878 p_dst->i_lines /= 2;
1879 p_dst->i_visible_lines /= 2;
1880 p_dst->i_pitch *= 2;
1881 /* For the bottom field, skip the first line in the pixel data. */
1883 p_dst->p_pixels += p_src->i_pitch;
1887 * Helper function: composes a frame from the given field pair.
1889 * Caller must manage allocation/deallocation of p_outpic.
1891 * The inputs are full pictures (frames); only one field
1892 * will be used from each.
1894 * Chroma formats of the inputs must match. It is also desirable that the
1895 * visible pitches of both inputs are the same, so that this will do something
1896 * sensible. The pitch or visible pitch of the output does not need to match
1897 * with the input; the compatible (smaller) part of the visible pitch will
1900 * The i_output_chroma parameter must always be supplied, but it is only used
1901 * when the chroma format of the input is detected as 4:2:0. Available modes:
1902 * - CC_ALTLINE: Alternate line copy, like for luma. Chroma line 0
1903 * comes from top field picture, chroma line 1 comes
1904 * from bottom field picture, chroma line 2 from top
1905 * field picture, and so on. This is usually the right
1906 * choice for IVTCing NTSC DVD material, but rarely
1907 * for any other use cases.
1908 * - CC_UPCONVERT: The output will have 4:2:2 chroma. All 4:2:0 chroma
1909 * data from both input fields will be used to generate
1910 * the 4:2:2 chroma data of the output. Each output line
1911 * will thus have independent chroma. This is a good
1912 * choice for most purposes except IVTC, if the machine
1913 * can handle the increased throughput. (Make sure to
1914 * allocate a 4:2:2 output picture first!)
1915 * This mode can also be used for converting a 4:2:0
1916 * frame to 4:2:2 format (by passing the same input
1917 * picture for both input fields).
1918 * Conversions: I420, YV12 --> I422
1920 * - CC_SOURCE_TOP: Copy chroma of source top field picture.
1921 * Ignore chroma of source bottom field picture.
1922 * - CC_SOURCE_BOTTOM: Copy chroma of source bottom field picture.
1923 * Ignore chroma of source top field picture.
1924 * - CC_MERGE: Average the chroma of the input field pictures.
1925 * (Note that this has no effect if the input fields
1926 * come from the same frame.)
1928 * @param p_outpic Composed picture is written here. Allocated by caller.
1929 * @param p_inpic_top Picture to extract the top field from.
1930 * @param p_inpic_bottom Picture to extract the bottom field from.
1931 * @param i_output_chroma Chroma operation mode for 4:2:0 (see function doc)
1932 * @see compose_chroma_t
1933 * @see RenderPhosphor()
1935 static void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
1936 picture_t *p_inpic_top, picture_t *p_inpic_bottom,
1937 compose_chroma_t i_output_chroma )
1939 assert( p_filter != NULL );
1940 assert( p_outpic != NULL );
1941 assert( p_inpic_top != NULL );
1942 assert( p_inpic_bottom != NULL );
1944 /* Valid 4:2:0 chroma handling modes. */
1945 assert( i_output_chroma == CC_ALTLINE ||
1946 i_output_chroma == CC_UPCONVERT ||
1947 i_output_chroma == CC_SOURCE_TOP ||
1948 i_output_chroma == CC_SOURCE_BOTTOM ||
1949 i_output_chroma == CC_MERGE );
1951 const int i_chroma = p_filter->fmt_in.video.i_chroma;
1952 const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
1953 i_chroma == VLC_CODEC_J422;
1954 const bool b_upconvert_chroma = ( !b_i422 &&
1955 i_output_chroma == CC_UPCONVERT );
1957 for( int i_plane = 0 ; i_plane < p_inpic_top->i_planes ; i_plane++ )
1959 bool b_is_chroma_plane = ( i_plane == U_PLANE || i_plane == V_PLANE );
1961 /* YV12 is YVU, but I422 is YUV. For such input, swap chroma planes
1962 in output when converting to 4:2:2. */
1964 if( b_is_chroma_plane && b_upconvert_chroma &&
1965 i_chroma == VLC_CODEC_YV12 )
1967 if( i_plane == U_PLANE )
1968 i_out_plane = V_PLANE;
1970 i_out_plane = U_PLANE;
1974 i_out_plane = i_plane;
1977 /* Copy luma or chroma, alternating between input fields. */
1978 if( !b_is_chroma_plane || b_i422 || i_output_chroma == CC_ALTLINE )
1980 /* Do an alternating line copy. This is always done for luma,
1981 and for 4:2:2 chroma. It can be requested for 4:2:0 chroma
1982 using CC_ALTLINE (see function doc).
1984 Note that when we get here, the number of lines matches
1985 in input and output.
1991 FieldFromPlane( &dst_top, &p_outpic->p[i_out_plane], 0 );
1992 FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane], 1 );
1993 FieldFromPlane( &src_top, &p_inpic_top->p[i_plane], 0 );
1994 FieldFromPlane( &src_bottom, &p_inpic_bottom->p[i_plane], 1 );
1996 /* Copy each field from the corresponding source. */
1997 plane_CopyPixels( &dst_top, &src_top );
1998 plane_CopyPixels( &dst_bottom, &src_bottom );
2000 else /* Input 4:2:0, on a chroma plane, and not in altline mode. */
2002 if( i_output_chroma == CC_UPCONVERT )
2004 /* Upconverting copy - use all data from both input fields.
2006 This produces an output picture with independent chroma
2007 for each field. It can be used for general input when
2008 the two input frames are different.
2010 The output is 4:2:2, but the input is 4:2:0. Thus the output
2011 has twice the lines of the input, and each full chroma plane
2012 in the input corresponds to a field chroma plane in the
2017 FieldFromPlane( &dst_top, &p_outpic->p[i_out_plane], 0 );
2018 FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane], 1 );
2020 /* Copy each field from the corresponding source. */
2021 plane_CopyPixels( &dst_top, &p_inpic_top->p[i_plane] );
2022 plane_CopyPixels( &dst_bottom, &p_inpic_bottom->p[i_plane] );
2024 else if( i_output_chroma == CC_SOURCE_TOP )
2026 /* Copy chroma of input top field. Ignore chroma of input
2027 bottom field. Input and output are both 4:2:0, so we just
2028 copy the whole plane. */
2029 plane_CopyPixels( &p_outpic->p[i_out_plane],
2030 &p_inpic_top->p[i_plane] );
2032 else if( i_output_chroma == CC_SOURCE_BOTTOM )
2034 /* Copy chroma of input bottom field. Ignore chroma of input
2035 top field. Input and output are both 4:2:0, so we just
2036 copy the whole plane. */
2037 plane_CopyPixels( &p_outpic->p[i_out_plane],
2038 &p_inpic_bottom->p[i_plane] );
2040 else /* i_output_chroma == CC_MERGE */
2042 /* Average the chroma of the input fields.
2043 Input and output are both 4:2:0. */
2044 uint8_t *p_in_top, *p_in_bottom, *p_out_end, *p_out;
2045 p_in_top = p_inpic_top->p[i_plane].p_pixels;
2046 p_in_bottom = p_inpic_bottom->p[i_plane].p_pixels;
2047 p_out = p_outpic->p[i_out_plane].p_pixels;
2048 p_out_end = p_out + p_outpic->p[i_out_plane].i_pitch
2049 * p_outpic->p[i_out_plane].i_visible_lines;
2051 int w = FFMIN3( p_inpic_top->p[i_plane].i_visible_pitch,
2052 p_inpic_bottom->p[i_plane].i_visible_pitch,
2053 p_outpic->p[i_plane].i_visible_pitch );
2055 for( ; p_out < p_out_end ; )
2057 Merge( p_out, p_in_top, p_in_bottom, w );
2058 p_out += p_outpic->p[i_out_plane].i_pitch;
2059 p_in_top += p_inpic_top->p[i_plane].i_pitch;
2060 p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
2071 * Helper function: dims (darkens) the given field of the given picture.
2073 * This is used for simulating CRT light output decay in RenderPhosphor().
2075 * The strength "1" is recommended. It's a matter of taste,
2076 * so it's parametrized.
2078 * Note on chroma formats:
2079 * - If input is 4:2:2, all planes are processed.
2080 * - If input is 4:2:0, only the luma plane is processed, because both fields
2081 * have the same chroma. This will distort colours, especially for high
2082 * filter strengths, especially for pixels whose U and/or V values are
2083 * far away from the origin (which is at 128 in uint8 format).
2085 * @param p_dst Input/output picture. Will be modified in-place.
2086 * @param i_field Darken which field? 0 = top, 1 = bottom.
2087 * @param i_strength Strength of effect: 1, 2 or 3 (division by 2, 4 or 8).
2088 * @see RenderPhosphor()
2089 * @see ComposeFrame()
2091 static void DarkenField( picture_t *p_dst, const int i_field,
2092 const int i_strength )
2094 assert( p_dst != NULL );
2095 assert( i_field == 0 || i_field == 1 );
2096 assert( i_strength >= 1 && i_strength <= 3 );
2098 unsigned u_cpu = vlc_CPU();
2100 /* Bitwise ANDing with this clears the i_strength highest bits
2102 #ifdef CAN_COMPILE_MMXEXT
2103 uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
2106 const uint8_t remove_high_u8 = 0xFF >> i_strength;
2107 const uint64_t remove_high_u64 = remove_high_u8 *
2108 INT64_C(0x0101010101010101);
2112 For luma, the operation is just a shift + bitwise AND, so we vectorize
2113 even in the C version.
2115 There is an MMX version, too, because it performs about twice faster.
2117 int i_plane = Y_PLANE;
2118 uint8_t *p_out, *p_out_end;
2119 int w = p_dst->p[i_plane].i_visible_pitch;
2120 p_out = p_dst->p[i_plane].p_pixels;
2121 p_out_end = p_out + p_dst->p[i_plane].i_pitch
2122 * p_dst->p[i_plane].i_visible_lines;
2124 /* skip first line for bottom field */
2126 p_out += p_dst->p[i_plane].i_pitch;
2128 int wm8 = w % 8; /* remainder */
2129 int w8 = w - wm8; /* part of width that is divisible by 8 */
2130 for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
2132 uint64_t *po = (uint64_t *)p_out;
2133 #ifdef CAN_COMPILE_MMXEXT
2134 if( u_cpu & CPU_CAPABILITY_MMXEXT )
2136 movq_m2r( i_strength_u64, mm1 );
2137 movq_m2r( remove_high_u64, mm2 );
2138 for( int x = 0 ; x < w8; x += 8 )
2140 movq_m2r( (*po), mm0 );
2142 psrlq_r2r( mm1, mm0 );
2143 pand_r2r( mm2, mm0 );
2145 movq_r2m( mm0, (*po++) );
2151 for( int x = 0 ; x < w8; x += 8, ++po )
2152 (*po) = ( ((*po) >> i_strength) & remove_high_u64 );
2153 #ifdef CAN_COMPILE_MMXEXT
2156 /* handle the width remainder */
2159 uint8_t *po_temp = (uint8_t *)po;
2160 for( int x = 0 ; x < wm8; ++x, ++po_temp )
2161 (*po_temp) = ( ((*po_temp) >> i_strength) & remove_high_u8 );
2165 /* Process chroma if the field chromas are independent.
2167 The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
2168 The chroma processing is a bit more complicated than luma,
2169 and needs MMX for vectorization.
2171 if( p_dst->format.i_chroma == VLC_CODEC_I422 ||
2172 p_dst->format.i_chroma == VLC_CODEC_J422 )
2174 for( i_plane = 0 ; i_plane < p_dst->i_planes ; i_plane++ )
2176 if( i_plane == Y_PLANE )
2177 continue; /* luma already handled */
2179 int w = p_dst->p[i_plane].i_visible_pitch;
2180 #ifdef CAN_COMPILE_MMXEXT
2181 int wm8 = w % 8; /* remainder */
2182 int w8 = w - wm8; /* part of width that is divisible by 8 */
2184 p_out = p_dst->p[i_plane].p_pixels;
2185 p_out_end = p_out + p_dst->p[i_plane].i_pitch
2186 * p_dst->p[i_plane].i_visible_lines;
2188 /* skip first line for bottom field */
2190 p_out += p_dst->p[i_plane].i_pitch;
2192 for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
2194 #ifdef CAN_COMPILE_MMXEXT
2195 /* See also easy-to-read C version below. */
2196 if( u_cpu & CPU_CAPABILITY_MMXEXT )
2198 static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
2199 movq_m2r( b128, mm5 );
2200 movq_m2r( i_strength_u64, mm6 );
2201 movq_m2r( remove_high_u64, mm7 );
2203 uint64_t *po = (uint64_t *)p_out;
2204 for( int x = 0 ; x < w8; x += 8 )
2206 movq_m2r( (*po), mm0 );
2208 movq_r2r( mm5, mm2 ); /* 128 */
2209 movq_r2r( mm0, mm1 ); /* copy of data */
2210 psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
2211 psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
2214 psrlq_r2r( mm6, mm1 );
2215 psrlq_r2r( mm6, mm2 );
2216 pand_r2r( mm7, mm1 );
2217 pand_r2r( mm7, mm2 );
2219 /* collect results from pos./neg. parts */
2220 psubb_r2r( mm2, mm1 );
2221 paddb_r2r( mm5, mm1 );
2223 movq_r2m( mm1, (*po++) );
2226 /* handle the width remainder */
2229 /* The output is closer to 128 than the input;
2230 the result always fits in uint8. */
2231 uint8_t *po8 = (uint8_t *)po;
2232 for( int x = 0 ; x < wm8; ++x, ++po8 )
2233 (*po8) = 128 + ( ((*po8) - 128) /
2234 (1 << i_strength) );
2240 /* 4:2:2 chroma handler, C version */
2241 uint8_t *po = p_out;
2242 for( int x = 0 ; x < w; ++x, ++po )
2243 (*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
2244 #ifdef CAN_COMPILE_MMXEXT
2247 } /* for p_out... */
2248 } /* for i_plane... */
2251 #ifdef CAN_COMPILE_MMXEXT
2252 if( u_cpu & CPU_CAPABILITY_MMXEXT )
2258 * Deinterlace filter. Simulates an interlaced CRT TV (to some extent).
2260 * The main use case for this filter is anime for which IVTC is not applicable.
2261 * This is the case, if 24fps telecined material has been mixed with 60fps
2262 * interlaced effects, such as in Sol Bianca or Silent Mobius. It can also
2263 * be used for true interlaced video, such as most camcorder recordings.
2265 * The filter has several modes for handling 4:2:0 chroma for those output
2266 * frames that fall across input frame temporal boundaries (i.e. fields come
2267 * from different frames). Upconvert (to 4:2:2) provides the most accurate
2268 * CRT simulation, but requires more CPU and memory bandwidth than the other
2269 * modes. The other modes keep the chroma at 4:2:0.
2271 * About these modes: telecined input (such as NTSC anime DVDs) works better
2272 * with AltLine, while true interlaced input works better with Latest.
2273 * Merge is a compromise, which may or may not look acceptable.
2274 * The mode can be set in the VLC advanced configuration,
2275 * All settings > Video > Filters > Deinterlace
2277 * Technically speaking, this is an interlaced field renderer targeted for
2278 * progressive displays. It works by framerate doubling, and simulating one
2279 * step of light output decay of the "old" field during the "new" field,
2280 * until the next new field comes in to replace the "old" one.
2282 * While playback is running, the simulated light decay gives the picture an
2283 * appearance of visible "scanlines", much like on a real TV. Only when the
2284 * video is paused, it is clearly visible that one of the fields is actually
2285 * brighter than the other.
2287 * The main differences to the Bob algorithm are:
2288 * - in addition to the current field, the previous one (fading out)
2290 * - some horizontal lines don't seem to flicker as much
2291 * - scanline visual effect (adjustable; the dimmer strength can be set
2292 * in the VLC advanced configuration)
2293 * - the picture appears 25%, 38% or 44% darker on average (for dimmer
2294 * strengths 1, 2 and 3)
2295 * - if the input has 4:2:0 chroma, the colours may look messed up in some
2296 * output frames. This is a limitation of the 4:2:0 chroma format, and due
2297 * to the fact that both fields are present in each output picture. Usually
2298 * this doesn't matter in practice, but see the 4:2:0 chroma mode setting
2299 * in the configuration if needed (it may help a bit).
2301 * In addition, when this filter is used on an LCD computer monitor,
2302 * the main differences to a real CRT TV are:
2303 * - Pixel shape and grid layout; CRT TVs were designed for interlaced
2304 * field rendering, while LCD monitors weren't.
2305 * - No scan flicker even though the display runs (usually) at 60Hz.
2306 * (This at least is a good thing.)
2308 * The output vertical resolution should be large enough for the scaling
2309 * not to have a too adverse effect on the regular scanline pattern.
2310 * In practice, NTSC video can be acceptably rendered already at 1024x600
2311 * if fullscreen even on an LCD. PAL video requires more.
2313 * Just like Bob, this filter works properly only if the input framerate
2314 * is stable. Otherwise the scanline effect breaks down and the picture
2317 * Soft field repeat (repeat_pict) is supported. Note that the generated
2318 * "repeated" output picture is unique because of the simulated light decay.
2319 * Its "old" field comes from the same input frame as the "new" one, unlike
2320 * the first output picture of the same frame.
2322 * As many output frames should be requested for each input frame as is
2323 * indicated by p_src->i_nb_fields. This is done by calling this function
2324 * several times, first with i_order = 0, and then with all other parameters
2325 * the same, but a new p_dst, increasing i_order (1 for second field,
2326 * and then if i_nb_fields = 3, also i_order = 2 to get the repeated first
2327 * field), and alternating i_field (starting, at i_order = 0, with the field
2328 * according to p_src->b_top_field_first). See Deinterlace() for an example.
2330 * @param p_filter The filter instance. Must be non-NULL.
2331 * @param p_dst Output frame. Must be allocated by caller.
2332 * @param p_src Input frame. Must exist.
2333 * @param i_order Temporal field number: 0 = first, 1 = second, 2 = rep. first.
2334 * @param i_field Render which field? 0 = top field, 1 = bottom field.
2335 * @return VLC error code (int).
2336 * @retval VLC_SUCCESS The requested field was rendered into p_dst.
2337 * @retval VLC_EGENERIC No pictures in history buffer, cannot render.
2339 * @see RenderLinear()
2340 * @see Deinterlace()
2342 static int RenderPhosphor( filter_t *p_filter,
2343 picture_t *p_dst, picture_t *p_src,
2344 int i_order, int i_field )
2346 assert( p_filter != NULL );
2347 assert( p_dst != NULL );
2348 assert( p_src != NULL );
2349 assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
2350 assert( i_field == 0 || i_field == 1 );
2352 filter_sys_t *p_sys = p_filter->p_sys;
2354 /* Last two input frames */
2355 picture_t *p_in = p_sys->pp_history[HISTORY_SIZE-1];
2356 picture_t *p_old = p_sys->pp_history[HISTORY_SIZE-2];
2358 /* Use the same input picture as "old" at the first frame after startup */
2362 /* If the history mechanism has failed, we can't do anything. */
2364 return VLC_EGENERIC;
2366 assert( p_old != NULL );
2367 assert( p_in != NULL );
2369 /* Decide sources for top & bottom fields of output. */
2370 picture_t *p_in_top = p_in;
2371 picture_t *p_in_bottom = p_in;
2372 /* For the first output field this frame,
2373 grab "old" field from previous frame. */
2376 if( i_field == 0 ) /* rendering top field */
2377 p_in_bottom = p_old;
2378 else /* i_field == 1, rendering bottom field */
2382 compose_chroma_t cc;
2383 switch( p_sys->phosphor.i_chroma_for_420 )
2391 else /* i_field == 1 */
2392 cc = CC_SOURCE_BOTTOM;
2401 /* The above are the only possibilities, if there are no bugs. */
2406 ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc );
2408 /* Simulate phosphor light output decay for the old field.
2410 The dimmer can also be switched off in the configuration, but that is
2411 more of a technical curiosity or an educational toy for advanced users
2412 than a useful deinterlacer mode (although it does make telecined
2413 material look slightly better than without any filtering).
2415 In most use cases the dimmer is used.
2417 if( p_sys->phosphor.i_dimmer_strength > 0 )
2418 DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength );
2423 /*****************************************************************************
2424 * Inverse telecine (IVTC) filter (a.k.a. "film mode", "3:2 reverse pulldown")
2425 *****************************************************************************/
2429 * Deinterlace filter. Performs inverse telecine.
2431 * Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
2433 * This filter attempts to reconstruct the original film frames from an
2434 * NTSC telecined signal. It is intended for 24fps progressive material
2435 * that was telecined to NTSC 60i. For example, most NTSC anime DVDs
2438 * @param p_filter The filter instance.
2439 * @param[in] p_src Input frame.
2440 * @param[out] p_dst Output frame. Must be allocated by caller.
2441 * @return VLC error code (int).
2442 * @retval VLC_SUCCESS A film frame was reconstructed to p_dst.
2443 * @retval VLC_EGENERIC Frame dropped as part of normal IVTC operation.
2444 * @see Deinterlace()
2445 * @see ComposeFrame()
2446 * @see CalculateInterlaceScore()
2447 * @see EstimateNumBlocksWithMotion()
2449 * Overall explanation:
2451 * This filter attempts to do in realtime what Transcode's
2452 * ivtc->decimate->32detect chain does offline. Additionally, it removes
2453 * soft telecine. It is an original design, based on some ideas from
2454 * Transcode, some from TVTime, and some original.
2456 * If the input material is pure NTSC telecined film, inverse telecine
2457 * will (ideally) exactly recover the original progressive film frames.
2458 * The output will run at 4/5 of the original framerate with no loss of
2459 * information. Interlacing artifacts are removed, and motion becomes
2460 * as smooth as it was on the original film. For soft-telecined material,
2461 * on the other hand, the progressive frames alredy exist, so only the
2462 * timings are changed such that the output becomes smooth 24fps (or would,
2463 * if the output device had an infinite framerate).
2465 * Put in simple terms, this filter is targeted for NTSC movies and
2466 * especially anime. Virtually all 1990s and early 2000s anime is
2467 * hard-telecined. Because the source material is like that,
2468 * IVTC is needed for also virtually all official R1 (US) anime DVDs.
2470 * Note that some anime from the turn of the century (e.g. Silent Mobius
2471 * and Sol Bianca) is a hybrid of telecined film and true interlaced
2472 * computer-generated effects and camera pans. In this case, applying IVTC
2473 * will effectively attempt to reconstruct the frames based on the film
2474 * component, but even if this is successful, the framerate reduction will
2475 * cause the computer-generated effects to stutter. This is mathematically
2476 * unavoidable. Instead of IVTC, a framerate doubling deinterlacer is
2477 * recommended for such material. Try "Phosphor", "Bob", or "Linear".
2479 * Fortunately, 30fps true progressive anime is on the rise (e.g. ARIA,
2480 * Black Lagoon, Galaxy Angel, Ghost in the Shell: Solid State Society,
2481 * Mai Otome, Last Exile, and Rocket Girls). This type requires no
2482 * deinterlacer at all.
2484 * Another recent trend is using 24fps computer-generated effects and
2485 * telecining them along with the cels (e.g. Kiddy Grade, Str.A.In. and
2486 * The Third: The Girl with the Blue Eye). For this group, IVTC is the
2487 * correct way to deinterlace, and works properly.
2489 * Soft telecined anime, while rare, also exists. Stellvia of the Universe
2490 * and Angel Links are examples of this. Stellvia constantly alternates
2491 * between soft and hard telecine - pure CGI sequences are soft-telecined,
2492 * while sequences incorporating cel animation are hard-telecined.
2493 * This makes it very hard for the cadence detector to lock on,
2494 * and indeed Stellvia gives some trouble for the filter.
2496 * To finish the list of different material types, Azumanga Daioh deserves
2497 * a special mention. The OP and ED sequences are both 30fps progressive,
2498 * while the episodes themselves are hard-telecined. This filter should
2499 * mostly work correctly with such material, too. (The beginning of the OP
2500 * shows some artifacts, but otherwise both the OP and ED are indeed
2501 * rendered progressive. The technical reason is that the filter has been
2502 * designed to aggressively reconstruct film frames, which helps in many
2503 * cases with hard-telecined material. In very rare cases, this approach may
2504 * go wrong, regardless of whether the input is telecined or progressive.)
2506 * Finally, note also that IVTC is the only correct way to deinterlace NTSC
2507 * telecined material. Simply applying an interpolating deinterlacing filter
2508 * (with no framerate doubling) is harmful for two reasons. First, even if
2509 * the filter does not damage already progressive frames, it will lose half
2510 * of the available vertical resolution of those frames that are judged
2511 * interlaced. Some algorithms combining data from multiple frames may be
2512 * able to counter this to an extent, effectively performing something akin
2513 * to the frame reconstruction part of IVTC. A more serious problem is that
2514 * any motion will stutter, because (even in the ideal case) one out of
2515 * every four film frames will be shown twice, while the other three will
2516 * be shown only once. Duplicate removal and framerate reduction - which are
2517 * part of IVTC - are also needed to properly play back telecined material
2518 * on progressive displays at a non-doubled framerate.
2520 * So, try this filter on your NTSC anime DVDs. It just might help.
2523 * Technical details:
2526 * First, NTSC hard telecine in a nutshell:
2528 * Film is commonly captured at 24 fps. The framerate must be raised from
2529 * 24 fps to 59.94 fields per second, This starts by pretending that the
2530 * original framerate is 23.976 fps. When authoring, the audio can be
2531 * slowed down by 0.1% to match. Now 59.94 = 5/4 * (2*23.976), which gives
2532 * a nice ratio made out of small integers.
2534 * Thus, each group of four film frames must become five frames in the NTSC
2535 * video stream. One cannot simply repeat one frame of every four, because
2536 * this would result in jerky motion. To slightly soften the jerkiness,
2537 * the extra frame is split into two extra fields, inserted at different
2538 * times. The content of the extra fields is (in classical telecine)
2539 * duplicated as-is from existing fields.
2541 * The field duplication technique is called "3:2 pulldown". The pattern
2542 * is called the cadence. The output from 3:2 pulldown looks like this
2543 * (if the telecine is TFF, top field first):
2545 * a b c d e Telecined frame (actual frames stored on DVD)
2546 * T1 T1 T2 T3 T4 *T*op field content
2547 * B1 B2 B3 B3 B4 *B*ottom field content
2549 * Numbers 1-4 denote the original film frames. E.g. T1 = top field of
2550 * original film frame 1. The field Tb, and one of either Bc or Bd, are
2551 * the extra fields inserted in the telecine. With exact duplication, it
2552 * of course doesn't matter whether Bc or Bd is the extra field, but
2553 * with "full field blended" material (see below) this will affect how to
2554 * correctly wxtract film frame 3.
2556 * See the following web pages for illustrations and discussion:
2557 * http://neuron2.net/LVG/telecining1.html
2558 * http://arbor.ee.ntu.edu.tw/~jackeikuo/dvd2avi/ivtc/
2560 * Note that film frame 2 has been stored "half and half" into two telecined
2561 * frames (b and c). Note also that telecine produces a sequence of
2562 * 3 progressive frames (d, e and a) followed by 2 interlaced frames
2565 * The output may also look like this (BFF telecine, bottom field first):
2571 * Now field Bb', and one of either Tc' or Td', are the extra fields.
2572 * Again, film frame 2 is stored "half and half" (into b' and c').
2574 * Whether the pattern is like abcde or a'b'c'd'e', depends on the telecine
2575 * field dominance (TFF or BFF). This must match the video field dominance,
2576 * but is conceptually different. Importantly, there is no temporal
2577 * difference between those fields that came from the same film frame.
2578 * Also, see the section on soft telecine below.
2580 * In a hard telecine, the TFD and VFD must match for field renderers
2581 * (e.g. traditional DVD player + CRT TV) to work correctly; this should be
2582 * fairly obvious by considering the above telecine patterns and how a
2583 * field renderer displays the material (one field at a time, dominant
2586 * The VFD may, *correctly*, flip mid-stream, if soft field repeats
2587 * (repeat_pict) have been used. They are commonly used in soft telecine
2588 * (see below), but also occasional lone field repeats exist in some streams,
2592 * http://www.cambridgeimaging.co.uk/downloads/Telecine%20field%20dominance.pdf
2593 * for discussion. The document discusses mostly PAL, but includes some notes
2596 * The reason for the words "classical telecine" above, when field
2597 * duplication was first mentioned, is that there exists a
2598 * "full field blended" version, where the added fields are not exact
2599 * duplicates, but are blends of the original film frames. This is rare
2600 * in NTSC, but some material like this reportedly exists. See
2601 * http://www.animemusicvideos.org/guides/avtech/videogetb2a.html
2602 * In these cases, the additional fields are a (probably 50%) blend of the
2603 * frames between which they have been inserted. Which one of the two
2604 * possibilites is the extra field then becomes important.
2605 * This filter does NOT support "full field blended" material.
2607 * To summarize, the 3:2 pulldown sequence produces a group of ten fields
2608 * out of every four film frames. Only eight of these fields are unique.
2609 * To remove the telecine, the duplicate fields must be removed, and the
2610 * original progressive frames restored. Additionally, the presentation
2611 * timestamps (PTS) must be adjusted, and one frame out of five (containing
2612 * no new information) dropped. The duration of each frame in the output
2613 * becomes 5/4 of that in the input, i.e. 25% longer.
2615 * Theoretically, this whole mess could be avoided by soft telecining, if the
2616 * original material is pure 24fps progressive. By using the stream flags
2617 * correctly, the original progressive frames can be stored on the DVD.
2618 * In such cases, the DVD player will apply "soft" 3:2 pulldown. See the
2619 * following section.
2621 * Also, the mess with cadence detection for hard telecine (see below) could
2622 * be avoided by using the progressive frame flag and a five-frame future
2623 * buffer, but no one ever sets the flag correctly for hard-telecined
2624 * streams. All frames are marked as interlaced, regardless of their cadence
2625 * position. This is evil, but sort-of-understandable, given that video
2626 * editors often come with "progressive" and "interlaced" editing modes,
2627 * but no separate "telecined" mode that could correctly handle this
2630 * In practice, most material with its origins in Asia (including virtually
2631 * all official US (R1) anime DVDs) is hard-telecined. Combined with the
2632 * turn-of-the-century practice of rendering true interlaced effects
2633 * on top of the hard-telecined stream, we have what can only be described
2634 * as a monstrosity. Fortunately, recent material is much more consistent,
2635 * even though still almost always hard-telecined.
2637 * Finally, note that telecined video is often edited directly in interlaced
2638 * form, disregarding safe cut positions as pertains to the telecine sequence
2639 * (there are only two: between "d" and "e", or between "e" and the
2640 * next "a"). Thus, the telecine sequence will in practice jump erratically
2641 * at cuts [**]. An aggressive detection strategy is needed to cope with
2644 * [**] http://users.softlab.ece.ntua.gr/~ttsiod/ivtc.html
2647 * Note about chroma formats: 4:2:0 is very common at least on anime DVDs.
2648 * In the interlaced frames in a hard telecine, the chroma alternates
2649 * every chroma line, even if the chroma format is 4:2:0! This means that
2650 * if the interlaced picture is viewed as-is, the luma alternates every line,
2651 * while the chroma alternates only every two lines of the picture.
2653 * That is, an interlaced frame in a 4:2:0 telecine looks like this
2654 * (numbers indicate which film frame the data comes from):
2656 * luma stored 4:2:0 chroma displayed chroma
2663 * The deinterlace filter sees the stored 4:2:0 chroma. The "displayed chroma"
2664 * is only generated later in the filter chain (probably when YUV is converted
2665 * to the display format, if the display does not accept YUV 4:2:0 directly).
2668 * Next, how NTSC soft telecine works:
2670 * a b c d Frame index (actual frames stored on DVD)
2671 * T1 T2 T3 T4 *T*op field content
2672 * B1 B2 B3 B4 *B*ottom field content
2674 * Here the progressive frames are stored as-is. The catch is in the stream
2675 * flags. For hard telecine, which was explained above, we have
2676 * VFD = constant and nb_fields = 2, just like in a true progressive or
2677 * true interlaced stream. Soft telecine, on the other hand, looks like this:
2681 * T B B T *Video* field dominance (for TFF telecine)
2682 * B T T B *Video* field dominance (for BFF telecine)
2684 * Now the video field dominance flipflops every two frames!
2686 * Note that nb_fields = 3 means the frame duration will be 1.5x that of a
2687 * normal frame. Often, soft-telecined frames are correctly flagged as
2690 * Here the telecining is expected to be done by the player, utilizing the
2691 * soft field repeat (repeat_pict) feature. This is indeed what a field
2692 * renderer (traditional interlaced equipment, or a framerate doubler)
2693 * should do with such a stream.
2695 * In the IVTC filter, our job is to even out the frame durations, but
2696 * disregard video field dominance and just pass the progressive pictures
2699 * Fortunately, for soft telecine to work at all, the stream flags must be
2700 * set correctly. Thus this type can be detected reliably by reading
2701 * nb_fields from three consecutive frames:
2703 * Let P = previous, C = current, N = next. If the frame to be rendered is C,
2704 * there are only three relevant nb_fields flag patterns for the three-frame
2705 * stencil concerning soft telecine:
2707 * P C N What is happening:
2708 * 2 3 2 Entering soft telecine at frame C, or running inside it already.
2709 * 3 2 3 Running inside soft telecine.
2710 * 3 2 2 Exiting soft telecine at frame C. C is the last frame that should
2711 * be handled as soft-telecined. (If we do timing adjustments to the
2712 * "3"s only, we can already exit soft telecine mode when we see
2715 * Note that the same stream may alternate between soft and hard telecine,
2716 * but these cannot occur at the same time. The start and end of the
2717 * soft-telecined parts can be read off the stream flags, and the rest of
2718 * the stream can be handed to the hard IVTC part of the filter for analysis.
2720 * Finally, note also that a stream may also request a lone field repeat
2721 * (a sudden "3" surrounded by "2"s). Fortunately, these can be handled as
2722 * a two-frame soft telecine, as they match the first and third
2723 * flag patterns above.
2725 * Combinations with several "3"s in a row are not valid for soft or hard
2726 * telecine, so if they occur, the frames can be passed through as-is.
2729 * Cadence detection for hard telecine:
2731 * Consider viewing the TFF and BFF hard telecine sequences through a
2732 * three-frame stencil. Again, let P = previous, C = current, N = next.
2733 * A brief analysis leads to the following cadence tables.
2735 * PCN = stencil position (Previous Current Next),
2736 * Dups. = duplicate fields,
2737 * Best field pairs... = combinations of fields which correctly reproduce
2738 * the original progressive frames,
2739 * * = see timestamp considerations below for why
2740 * this particular arrangement.
2744 * PCN Dups. Best field pairs for progressive (correct, theoretical)
2745 * abc TP = TC TPBP = frame 1, TCBP = frame 1, TNBC = frame 2
2746 * bcd BC = BN TCBP = frame 2, TNBC = frame 3, TNBN = frame 3
2747 * cde BP = BC TCBP = frame 3, TCBC = frame 3, TNBN = frame 4
2748 * dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
2749 * eab TC = TN TPBP = frame 4, TCBC = frame 1, TNBC = frame 1
2752 * PCN Progressive output*
2753 * abc frame 2 = TNBC (compose TN+BC)
2754 * bcd frame 3 = TNBN (copy N)
2755 * cde frame 4 = TNBN (copy N)
2757 * eab frame 1 = TCBC (copy C), or TNBC (compose TN+BC)
2759 * On the rows "dea" and "eab", frame 1 refers to a frame from the next
2760 * group of 4. "Compose TN+BC" means to construct a frame using the
2761 * top field of N, and the bottom field of C. See ComposeFrame().
2763 * For BFF, swap all B and T, and rearrange the symbol pairs to again
2764 * read "TxBx". We have:
2766 * PCN Dups. Best field pairs for progressive (correct, theoretical)
2767 * abc BP = BC TPBP = frame 1, TPBC = frame 1, TCBN = frame 2
2768 * bcd TC = TN TPBC = frame 2, TCBN = frame 3, TNBN = frame 3
2769 * cde TP = TC TPBC = frame 3, TCBC = frame 3, TNBN = frame 4
2770 * dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
2771 * eab BC = BN TPBP = frame 4, TCBC = frame 1, TCBN = frame 1
2774 * PCN Progressive output*
2775 * abc frame 2 = TCBN (compose TC+BN)
2776 * bcd frame 3 = TNBN (copy N)
2777 * cde frame 4 = TNBN (copy N)
2779 * eab frame 1 = TCBC (copy C), or TCBN (compose TC+BN)
2781 * From these cadence tables we can extract two strategies for
2782 * cadence detection. We use both.
2784 * Strategy 1: duplicated fields ("vektor").
2786 * Consider that each stencil position has a unique duplicate field
2787 * condition. In one unique position, "dea", there is no match; in all
2788 * other positions, exactly one. By conservatively filtering the
2789 * possibilities based on detected hard field repeats (identical fields
2790 * in successive input frames), it is possible to gradually lock on
2791 * to the cadence. This kind of strategy is used by the classic IVTC filter
2792 * in TVTime/Xine by Billy Biggs (Vektor), hence the name.
2794 * "Conservative" here means that we do not rule anything out, but start at
2795 * each stencil position by suggesting the position "dea", and then only add
2796 * to the list of possibilities based on field repeats that are detected at
2797 * the present stencil position. This estimate is then filtered by ANDing
2798 * against a shifted (time-advanced) version of the estimate from the
2799 * previous stencil position. Once the detected position becomes unique,
2800 * the filter locks on. If the new detection is inconsistent with the
2801 * previous one, the detector resets itself and starts from scratch.
2803 * The strategy is very reliable, as it only requires running (fuzzy)
2804 * duplicate field detection against the input. It is very good at staying
2805 * locked on once it acquires the cadence, and it does so correctly very
2806 * often. These are indeed characteristics that can be observed in the
2807 * behaviour of the TVTime/Xine filter.
2809 * Note especially that 8fps/12fps animation, common in anime, will cause
2810 * spurious hard-repeated fields. The conservative nature of the method
2811 * makes it very good at dealing with this - any spurious repeats will only
2812 * slow down the lock-on, not completely confuse it. It should also be good
2813 * at detecting the presence of a telecine, as neither true interlaced nor
2814 * true progressive material should contain any hard field repeats.
2815 * (This, however, has not been tested yet.)
2817 * The disadvantages are that at times the method may lock on slowly,
2818 * because the detection must be filtered against the history until
2819 * a unique solution is found. Resets, if they happen, will also
2820 * slow down the lock-on.
2822 * The hard duplicate detection required by this strategy can be made
2823 * data-adaptive in several ways. TVTime uses a running average of motion
2824 * scores for its history buffer. We utilize a different, original approach.
2825 * It is rare, if not nonexistent, that only one field changes between
2826 * two valid frames. Thus, if one field changes "much more" than the other
2827 * in fieldwise motion detection, the less changed one is probably a
2828 * duplicate. Importantly, this works with telecined input, too - the field
2829 * that changes "much" may be part of another film frame, while the "less"
2830 * changed one is actually a duplicate from the previous film frame.
2831 * If both fields change "about as much", then no hard field repeat
2835 * Strategy 2: progressive/interlaced field combinations ("scores").
2837 * We can also form a second strategy, which is not as reliable in practice,
2838 * but which locks on faster when it does. This is original to this filter.
2840 * Consider all possible field pairs from two successive frames: TCBC, TCBN,
2841 * TNBC, TNBN. After one frame, these become TPBP, TPBC, TCBP, TCBC.
2842 * These eight pairs (seven unique, disregarding the duplicate TCBC)
2843 * are the exhaustive list of possible field pairs from two successive
2844 * frames in the three-frame PCN stencil.
2846 * The above tables list triplets of field pair combinations for each cadence
2847 * position, which should produce progressive frames. All the given triplets
2848 * are unique in each table alone, although the one at "dea" is
2849 * indistinguishable from the case of pure progressive material. It is also
2850 * the only one which is not unique across both tables.
2852 * Thus, all sequences of two neighboring triplets are unique across both
2853 * tables. (For "neighboring", each table is considered to wrap around from
2854 * "eab" back to "abc", i.e. from the last row back to the first row.)
2855 * Furthermore, each sequence of three neighboring triplets is redundantly
2856 * unique (i.e. is unique, and reduces the chance of false positives).
2857 * (In practice, though, we already know which table to consider, from the fact
2858 * that TFD and VFD must match. Checking only the relevant table makes the
2859 * strategy slightly more robust.)
2861 * The important idea is: *all other* field pair combinations should produce
2862 * frames that look interlaced. This includes those combinations present in
2863 * the "wrong" (i.e. not current position) rows of the table (insofar as
2864 * those combinations are not also present in the "correct" row; by the
2865 * uniqueness property, *every* "wrong" row will always contain at least one
2866 * combination that differs from those in the "correct" row).
2868 * We generate the artificial frames TCBC, TCBN, TNBC and TNBN (virtually;
2869 * no data is actually moved). Two of these are just the frames C and N,
2870 * which already exist; the two others correspond to composing the given
2871 * field pairs. We then compute the interlace score for each of these frames.
2872 * The interlace scores of what are now TPBP, TPBC and TCBP, also needed,
2873 * were computed by this same mechanism during the previous input frame.
2874 * These can be slided in history and reused.
2876 * We then check, using the computed interlace scores, and taking into
2877 * account the video field dominance information, which field combination
2878 * triplet given in the appropriate table produces the smallest sum of
2879 * interlace scores. Unless we are at PCN = "dea" (which could also be pure
2880 * progressive!), this immediately gives us the most likely current cadence
2881 * position. Combined with a two-step history, the sequence of three most
2882 * likely positions found this way always allows us to make a more or less
2883 * reliable detection. (That is, when a reliable detection is possible; if the
2884 * video has no motion at all, every detection will report the position "dea".
2885 * In anime, still shots are common. Thus we must augment this with a
2886 * full-frame motion detection that switches the detector off if no motion
2889 * The detection seems to need four full-frame interlace analyses per frame.
2890 * Actually, three are enough, because the previous N is the new C, so we can
2891 * slide the already computed result. Also during initialization, we only
2892 * need to compute TNBN on the first frame; this has become TPBP when the
2893 * third frame is reached. Similarly, we compute TNBN, TNBC and TCBN during
2894 * the second frame (just before the filter starts), and these get slided
2895 * into TCBC, TCBP and TPBC when the third frame is reached. At that point,
2896 * initialization is complete.
2898 * Because we only compare interlace scores against each other, no threshold
2899 * is needed in the cadence detector. Thus it, trivially, adapts to the
2900 * material automatically.
2902 * The weakness of this approach is that any comb metric detects incorrectly
2903 * every now and then. Especially slow vertical camera pans often get treated
2904 * wrong, because the messed-up field combination looks less interlaced
2905 * according to the comb metric (especially in anime) than the correct one
2906 * (which contains, correctly, one-pixel thick cartoon outlines, parts of
2907 * which often perfectly horizontal).
2909 * The advantage is that this strategy catches horizontal camera pans
2910 * immediately and reliably, while the other strategy may still be trying
2914 * Frame reconstruction:
2916 * We utilize a hybrid approach. If a valid cadence is locked on, we use the
2917 * operation table to decide what to do. This handles those cases correctly,
2918 * which would be difficult for the interlace detector alone (e.g. vertical
2919 * camera pans). Note that the operations that must be performed for IVTC
2920 * include timestamp mangling and frame dropping, which can only be done
2921 * reliably on a valid cadence.
2923 * When the cadence fails (we detect this from a sudden upward jump in the
2924 * interlace scores of the constructed frames), we reset the "vektor"
2925 * detector strategy and fall back to an emergency frame composer, where we
2926 * use ideas from Transcode's IVTC.
2928 * In this emergency mode, we simply output the least interlaced frame out of
2929 * the combinations TNBN, TNBC and TCBN (where only one of the last two is
2930 * tested, based on the stream TFF/BFF information). In this mode, we do not
2931 * touch the timestamps, and just pass all five frames from each group right
2932 * through. This introduces some stutter, but in practice it is often not
2933 * noticeable. This is because the kind of material that is likely to trip up
2934 * the cadence detector usually includes irregular 8fps/12fps motion. With
2935 * true 24fps motion, the cadence quickly locks on, and stays locked on.
2937 * Once the cadence locks on again, we resume normal operation based on
2938 * the operation table.
2941 * Timestamp mangling:
2943 * To make five into four we need to extend frame durations by 25%.
2944 * Consider the following diagram (times given in 90kHz ticks, rounded to
2945 * integers; this is just for illustration, and for comparison with the
2946 * "scratch paper" comments in pulldown.c of TVTime/Xine):
2948 * NTSC input (29.97 fps)
2949 * a b c d e a (from next group) ...
2950 * 0 3003 6006 9009 12012 15015
2951 * 0 3754 7508 11261 15015
2952 * 1 2 3 4 1 (from next group) ...
2953 * Film output (23.976 fps)
2955 * Three of the film frames have length 3754, and one has 3753
2956 * (it is 1/90000 sec shorter). This rounding was chosen so that the lengths
2957 * of the group of four sum to the original 15015.
2959 * From the diagram we get these deltas for presentation timestamp adjustment
2960 * (in 90 kHz ticks, for illustration):
2961 * (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
2962 * 0 +751 +1502 +2252 (skip) 0 ...
2964 * In fractions of (p_next->date - p_cur->date), regardless of actual
2965 * time unit, the deltas are:
2966 * (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
2967 * 0 +0.25 +0.50 +0.75 (skip) 0 ...
2969 * This is what we actually use. In our implementation, the values are stored
2970 * multiplied by 4, as integers.
2972 * The "current" frame should be displayed at [original time + delta].
2973 * E.g., when "current" = b (i.e. PCN = abc), start displaying film frame 2
2974 * at time [original time of b + 751 ticks]. So, when we catch the cadence,
2975 * we will start mangling the timestamps according to the cadence position
2976 * of the "current" frame, using the deltas given above. This will cause
2977 * a one-time jerk, most noticeable if the cadence happens to catch at
2978 * position "d". (Alternatively, upon lock-on, we could wait until we are
2979 * at "a" before switching on IVTC, but this makes the maximal delay
2980 * [max. detection + max. wait] = 3 + 4 = 7 input frames, which comes to
2981 * 7/30 ~ 0.23 seconds instead of the 3/30 = 0.10 seconds from purely
2982 * the detection. The one-time jerk is simpler to implement and gives the
2985 * It is clear that "e" is a safe choice for the dropped frame. This can be
2986 * seen from the timings and the cadence tables. First, consider the timings.
2987 * If we have only one future frame, "e" is the only one whose PTS, comparing
2988 * to the film frames, allows dropping it safely. To see this, consider which
2989 * film frame needs to be rendered as each new input frame arrives. Secondly,
2990 * consider the cadence tables. It is ok to drop "e", because the same
2991 * film frame "1" is available also at the next PCN position "eab".
2992 * (As a side note, it is interesting that Vektor's filter drops "b".
2993 * See the TVTime sources.)
2995 * When the filter falls out of film mode, the timestamps of the incoming
2996 * frames are left untouched. Thus, the output from this filter has a
2997 * variable framerate: 4/5 of the input framerate when IVTC is active
2998 * (whether hard or soft), and the same framerate as input when it is not
2999 * (or when in emergency mode).
3002 * For other open-source IVTC codes, which may be a useful source for ideas,
3003 * see the following:
3005 * The classic filter by Billy Biggs (Vektor). Written in 2001-2003 for
3006 * TVTime, and adapted into Xine later. In xine-lib 1.1.19, it is at
3007 * src/post/deinterlace/pulldown.*. Also needed are tvtime.*, and speedy.*.
3009 * Transcode's ivtc->decimate->32detect chain by Thanassis Tsiodras.
3010 * Written in 2002, added in Transcode 0.6.12. This probably has something
3011 * to do with the same chain in MPlayer, considering that MPlayer acquired
3012 * an IVTC filter around the same time. In Transcode 1.1.5, the IVTC part is
3013 * at filter/filter_ivtc.c. Transcode 1.1.5 sources can be downloaded from
3014 * http://developer.berlios.de/project/showfiles.php?group_id=10094
3018 * Helper function: estimates "how much interlaced" the given field pair is.
3020 * It is allowed that p_pic_top == p_pic_bottom.
3022 * If p_pic_top != p_pic_bot (fields come from different pictures), you can use
3023 * ComposeFrame() to actually construct the picture if needed.
3025 * Number of planes, and number of lines in each plane, in p_pic_top and
3026 * p_pic_bot must match. If the visible pitches differ, only the compatible
3027 * (smaller) part will be tested.
3029 * Luma and chroma planes are tested in the same way. This is correct for
3030 * telecined input, where in the interlaced frames also chroma alternates
3031 * every chroma line, even if the chroma format is 4:2:0!
3033 * This is just a raw detector that produces a score. The overall score
3034 * indicating a progressive or interlaced frame may vary wildly, depending on
3035 * the material, especially in anime. The scores should be compared to
3036 * each other locally (in the temporal sense) to make meaningful decisions
3037 * about progressive or interlaced frames.
3039 * @param p_pic_top Picture to take the top field from.
3040 * @param p_pic_bot Picture to take the bottom field from (same or different).
3041 * @return Interlace score, >= 0. Higher values mean more interlaced.
3042 * @retval -1 Error: incompatible input pictures.
3044 * @see ComposeFrame()
3046 static int CalculateInterlaceScore( const picture_t* p_pic_top,
3047 const picture_t* p_pic_bot )
3050 We use the comb metric from the IVTC filter of Transcode 1.1.5.
3051 This was found to work better for the particular purpose of IVTC
3052 than RenderX()'s comb metric.
3054 Note that we *must not* subsample at all in order to catch interlacing
3055 in telecined frames with localized motion (e.g. anime with characters
3056 talking, where only mouths move and everything else stays still.)
3059 assert( p_pic_top != NULL );
3060 assert( p_pic_bot != NULL );
3062 if( p_pic_top->i_planes != p_pic_bot->i_planes )
3065 unsigned u_cpu = vlc_CPU();
3067 /* Amount of bits must be known for MMX, thus int32_t.
3068 Doesn't hurt the C implementation. */
3069 int32_t i_score = 0;
3071 #ifdef CAN_COMPILE_MMXEXT
3072 if( u_cpu & CPU_CAPABILITY_MMXEXT )
3073 pxor_r2r( mm7, mm7 ); /* we will keep score in mm7 */
3076 for( int i_plane = 0 ; i_plane < p_pic_top->i_planes ; ++i_plane )
3079 if( p_pic_top->p[i_plane].i_visible_lines !=
3080 p_pic_bot->p[i_plane].i_visible_lines )
3083 const int i_lasty = p_pic_top->p[i_plane].i_visible_lines-1;
3084 const int w = FFMIN( p_pic_top->p[i_plane].i_visible_pitch,
3085 p_pic_bot->p[i_plane].i_visible_pitch );
3086 const int wm8 = w % 8; /* remainder */
3087 const int w8 = w - wm8; /* part of width that is divisible by 8 */
3089 /* Current line / neighbouring lines picture pointers */
3090 const picture_t *cur = p_pic_bot;
3091 const picture_t *ngh = p_pic_top;
3092 int wc = cur->p[i_plane].i_pitch;
3093 int wn = ngh->p[i_plane].i_pitch;
3095 /* Transcode 1.1.5 only checks every other line. Checking every line
3096 works better for anime, which may contain horizontal,
3097 one pixel thick cartoon outlines.
3099 for( int y = 1; y < i_lasty; ++y )
3101 uint8_t *p_c = &cur->p[i_plane].p_pixels[y*wc]; /* this line */
3102 uint8_t *p_p = &ngh->p[i_plane].p_pixels[(y-1)*wn]; /* prev line */
3103 uint8_t *p_n = &ngh->p[i_plane].p_pixels[(y+1)*wn]; /* next line */
3107 /* Threshold (value from Transcode 1.1.5) */
3109 #ifdef CAN_COMPILE_MMXEXT
3110 /* Easy-to-read C version further below.
3112 Assumptions: 0 < T < 127
3113 # of pixels < (2^32)/255
3114 Note: calculates score * 255
3116 if( u_cpu & CPU_CAPABILITY_MMXEXT )
3118 static const mmx_t b0 = { .uq = 0x0000000000000000ULL };
3119 static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
3120 static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } };
3122 for( ; x < w8; x += 8 )
3124 movq_m2r( *((int64_t*)p_c), mm0 );
3125 movq_m2r( *((int64_t*)p_p), mm1 );
3126 movq_m2r( *((int64_t*)p_n), mm2 );
3128 psubb_m2r( b128, mm0 );
3129 psubb_m2r( b128, mm1 );
3130 psubb_m2r( b128, mm2 );
3132 psubsb_r2r( mm0, mm1 );
3133 psubsb_r2r( mm0, mm2 );
3135 pxor_r2r( mm3, mm3 );
3136 pxor_r2r( mm4, mm4 );
3137 pxor_r2r( mm5, mm5 );
3138 pxor_r2r( mm6, mm6 );
3140 punpcklbw_r2r( mm1, mm3 );
3141 punpcklbw_r2r( mm2, mm4 );
3142 punpckhbw_r2r( mm1, mm5 );
3143 punpckhbw_r2r( mm2, mm6 );
3145 pmulhw_r2r( mm3, mm4 );
3146 pmulhw_r2r( mm5, mm6 );
3148 packsswb_r2r(mm4, mm6);
3149 pcmpgtb_m2r( bT, mm6 );
3150 psadbw_m2r( b0, mm6 );
3151 paddd_r2r( mm6, mm7 );
3161 /* Worst case: need 17 bits for "comb". */
3162 int_fast32_t C = *p_c;
3163 int_fast32_t P = *p_p;
3164 int_fast32_t N = *p_n;
3166 /* Comments in Transcode's filter_ivtc.c attribute this
3167 combing metric to Gunnar Thalin.
3169 The idea is that if the picture is interlaced, both
3170 expressions will have the same sign, and this comes
3171 up positive. The value T = 100 has been chosen such
3172 that a pixel difference of 10 (on average) will
3173 trigger the detector.
3175 int_fast32_t comb = (P - C) * (N - C);
3184 /* Now the other field - swap current and neighbour pictures */
3185 const picture_t *tmp = cur;
3194 #ifdef CAN_COMPILE_MMXEXT
3195 if( u_cpu & CPU_CAPABILITY_MMXEXT )
3197 movd_r2m( mm7, i_score );
3208 * Internal helper function for EstimateNumBlocksWithMotion():
3209 * estimates whether there is motion in the given 8x8 block on one plane
3210 * between two images. The block as a whole and its fields are evaluated
3211 * separately, and use different motion thresholds.
3213 * This is a low-level function only used by EstimateNumBlocksWithMotion().
3214 * There is no need to call this function manually.
3216 * For interpretation of pi_top and pi_bot, it is assumed that the block
3217 * starts on an even-numbered line (belonging to the top field).
3219 * The b_mmx parameter avoids the need to call vlc_CPU() separately
3222 * @param[in] p_pix_p Base pointer to the block in previous picture
3223 * @param[in] p_pix_c Base pointer to the same block in current picture
3224 * @param i_pitch_prev i_pitch of previous picture
3225 * @param i_pitch_curr i_pitch of current picture
3226 * @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false.
3227 * @param[out] pi_top 1 if top field of the block had motion, 0 if no
3228 * @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no
3229 * @return 1 if the block had motion, 0 if no
3230 * @see EstimateNumBlocksWithMotion()
3232 static inline int TestForMotionInBlock( uint8_t *p_pix_p, uint8_t *p_pix_c,
3233 int i_pitch_prev, int i_pitch_curr,
3235 int* pi_top, int* pi_bot )
3237 /* Pixel luma/chroma difference threshold to detect motion. */
3240 int32_t i_motion = 0;
3241 int32_t i_top_motion = 0;
3242 int32_t i_bot_motion = 0;
3244 /* See below for the C version to see more quickly what this does. */
3245 #ifdef CAN_COMPILE_MMXEXT
3248 static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } };
3249 pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */
3250 movq_m2r( bT, mm5 );
3252 pxor_r2r( mm3, mm3 ); /* score (top field) */
3253 pxor_r2r( mm4, mm4 ); /* score (bottom field) */
3254 for( int y = 0; y < 8; y+=2 )
3257 movq_m2r( *((uint64_t*)p_pix_c), mm0 );
3258 movq_m2r( *((uint64_t*)p_pix_p), mm1 );
3259 movq_r2r( mm0, mm2 );
3260 psubusb_r2r( mm1, mm2 );
3261 psubusb_r2r( mm0, mm1 );
3263 pcmpgtb_r2r( mm5, mm2 );
3264 pcmpgtb_r2r( mm5, mm1 );
3265 psadbw_r2r( mm6, mm2 );
3266 psadbw_r2r( mm6, mm1 );
3268 paddd_r2r( mm2, mm1 );
3269 paddd_r2r( mm1, mm3 ); /* add to top field score */
3271 p_pix_c += i_pitch_curr;
3272 p_pix_p += i_pitch_prev;
3274 /* bottom field - handling identical to top field, except... */
3275 movq_m2r( *((uint64_t*)p_pix_c), mm0 );
3276 movq_m2r( *((uint64_t*)p_pix_p), mm1 );
3277 movq_r2r( mm0, mm2 );
3278 psubusb_r2r( mm1, mm2 );
3279 psubusb_r2r( mm0, mm1 );
3281 pcmpgtb_r2r( mm5, mm2 );
3282 pcmpgtb_r2r( mm5, mm1 );
3283 psadbw_r2r( mm6, mm2 );
3284 psadbw_r2r( mm6, mm1 );
3286 paddd_r2r( mm2, mm1 );
3287 paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */
3289 p_pix_c += i_pitch_curr;
3290 p_pix_p += i_pitch_prev;
3292 movq_r2r( mm3, mm7 ); /* score (total) */
3293 paddd_r2r( mm4, mm7 );
3294 movd_r2m( mm3, i_top_motion );
3295 movd_r2m( mm4, i_bot_motion );
3296 movd_r2m( mm7, i_motion );
3298 /* The loop counts actual score * 255. */
3299 i_top_motion /= 255;
3300 i_bot_motion /= 255;
3308 for( int y = 0; y < 8; ++y )
3310 uint8_t *pc = p_pix_c;
3311 uint8_t *pp = p_pix_p;
3313 for( int x = 0; x < 8; ++x )
3315 int_fast16_t C = abs((*pc) - (*pp));
3325 i_top_motion += score;
3327 i_bot_motion += score;
3329 p_pix_c += i_pitch_curr;
3330 p_pix_p += i_pitch_prev;
3334 /* Field motion thresholds.
3336 Empirical value - works better in practice than the "4" that
3337 would be consistent with the full-block threshold.
3339 Especially the opening scene of The Third ep. 1 (just after the OP)
3340 works better with this. It also fixes some talking scenes in
3341 Stellvia ep. 1, where the cadence would otherwise catch on incorrectly,
3342 leading to more interlacing artifacts than by just using the emergency
3343 mode frame composer.
3345 (*pi_top) = ( i_top_motion >= 8 );
3346 (*pi_bot) = ( i_bot_motion >= 8 );
3348 /* Full-block threshold = (8*8)/8: motion is detected if 1/8 of the block
3349 changes "enough". */
3350 return (i_motion >= 8);
3355 * Helper function: Estimates the number of 8x8 blocks which have motion
3356 * between the given pictures. Needed for various detectors in RenderIVTC().
3358 * Number of planes and visible lines in each plane, in the inputs must match.
3359 * If the visible pitches do not match, only the compatible (smaller)
3360 * part will be tested.
3362 * Note that the return value is NOT simply *pi_top + *pi_bot, because
3363 * the fields and the full block use different motion thresholds.
3365 * If you do not want the separate field scores, pass NULL for pi_top and
3366 * pi_bot. This does not affect computation speed, and is only provided as
3367 * a syntactic convenience.
3369 * Motion in each picture plane (Y, U, V) counts separately.
3370 * The sum of number of blocks with motion across all planes is returned.
3372 * For 4:2:0 chroma, even-numbered chroma lines make up the "top field" for
3373 * chroma, and odd-numbered chroma lines the "bottom field" for chroma.
3374 * This is correct for IVTC purposes.
3376 * @param[in] p_prev Previous picture
3377 * @param[in] p_curr Current picture
3378 * @param[out] pi_top Number of 8x8 blocks where top field has motion.
3379 * @param[out] pi_bot Number of 8x8 blocks where bottom field has motion.
3380 * @return Number of 8x8 blocks that have motion.
3381 * @retval -1 Error: incompatible input pictures.
3382 * @see TestForMotionInBlock()
3385 static int EstimateNumBlocksWithMotion( const picture_t* p_prev,
3386 const picture_t* p_curr,
3387 int *pi_top, int *pi_bot)
3389 assert( p_prev != NULL );
3390 assert( p_curr != NULL );
3392 int i_score_top = 0;
3393 int i_score_bot = 0;
3395 if( p_prev->i_planes != p_curr->i_planes )
3398 /* We must tell our inline helper whether to use MMX acceleration. */
3399 #ifdef CAN_COMPILE_MMXEXT
3400 bool b_mmx = ( vlc_CPU() & CPU_CAPABILITY_MMXEXT );
3406 for( int i_plane = 0 ; i_plane < p_prev->i_planes ; i_plane++ )
3409 if( p_prev->p[i_plane].i_visible_lines !=
3410 p_curr->p[i_plane].i_visible_lines )
3413 const int i_pitch_prev = p_prev->p[i_plane].i_pitch;
3414 const int i_pitch_curr = p_curr->p[i_plane].i_pitch;
3416 /* Last pixels and lines (which do not make whole blocks) are ignored.
3417 Shouldn't really matter for our purposes. */
3418 const int i_mby = p_prev->p[i_plane].i_visible_lines / 8;
3419 const int w = FFMIN( p_prev->p[i_plane].i_visible_pitch,
3420 p_curr->p[i_plane].i_visible_pitch );
3421 const int i_mbx = w / 8;
3423 for( int by = 0; by < i_mby; ++by )
3425 uint8_t *p_pix_p = &p_prev->p[i_plane].p_pixels[i_pitch_prev*8*by];
3426 uint8_t *p_pix_c = &p_curr->p[i_plane].p_pixels[i_pitch_curr*8*by];
3428 for( int bx = 0; bx < i_mbx; ++bx )
3430 int i_top_temp, i_bot_temp;
3431 i_score += TestForMotionInBlock( p_pix_p, p_pix_c,
3432 i_pitch_prev, i_pitch_curr,
3434 &i_top_temp, &i_bot_temp );
3435 i_score_top += i_top_temp;
3436 i_score_bot += i_bot_temp;
3445 (*pi_top) = i_score_top;
3447 (*pi_bot) = i_score_bot;
3452 /* Fasten your seatbelt - lots of IVTC constants follow... */
3455 * IVTC filter modes.
3457 * Hard telecine: burned into video stream.
3458 * Soft telecine: stream consists of progressive frames;
3459 * telecining handled by stream flags.
3464 typedef enum { IVTC_MODE_DETECTING = 0,
3465 IVTC_MODE_TELECINED_NTSC_HARD = 1,
3466 IVTC_MODE_TELECINED_NTSC_SOFT = 2 } ivtc_mode;
3469 * Field pair combinations from successive frames in the PCN stencil.
3470 * T = top, B = bottom, P = previous, C = current, N = next
3471 * These are used as array indices; hence the explicit numbering.
3473 typedef enum { FIELD_PAIR_TPBP = 0, FIELD_PAIR_TPBC = 1,
3474 FIELD_PAIR_TCBP = 2, FIELD_PAIR_TCBC = 3,
3475 FIELD_PAIR_TCBN = 4, FIELD_PAIR_TNBC = 5,
3476 FIELD_PAIR_TNBN = 6 } ivtc_field_pair;
3478 /* Note: only valid ones count for NUM */
3479 #define NUM_CADENCE_POS 9
3481 * Cadence positions for the PCN stencil (PCN, Previous Current Next).
3483 * Note that "dea" in both cadence tables and a pure progressive signal
3484 * are indistinguishable.
3486 * Used as array indices except the -1.
3488 * This is a combined raw position containing both i_cadence_pos
3489 * and telecine field dominance.
3490 * @see pi_detected_pos_to_cadence_pos
3491 * @see pi_detected_pos_to_tfd
3493 typedef enum { CADENCE_POS_INVALID = -1,
3494 CADENCE_POS_PROGRESSIVE = 0,
3495 CADENCE_POS_TFF_ABC = 1,
3496 CADENCE_POS_TFF_BCD = 2,
3497 CADENCE_POS_TFF_CDE = 3,
3498 CADENCE_POS_TFF_EAB = 4,
3499 CADENCE_POS_BFF_ABC = 5,
3500 CADENCE_POS_BFF_BCD = 6,
3501 CADENCE_POS_BFF_CDE = 7,
3502 CADENCE_POS_BFF_EAB = 8 } ivtc_cadence_pos;
3503 /* First and one-past-end for TFF-only and BFF-only raw positions. */
3504 #define CADENCE_POS_TFF_FIRST 1
3505 #define CADENCE_POS_TFF_END 5
3506 #define CADENCE_POS_BFF_FIRST 5
3507 #define CADENCE_POS_BFF_END 9
3510 * For the "vektor" cadence detector algorithm.
3512 * The algorithm produces a set of possible positions instead of a unique
3513 * position, until it locks on. The set is represented as a bitmask.
3515 * The bitmask is stored in a word, and its layout is:
3516 * blank blank BFF_CARRY BFF4 BFF3 BFF2 BFF1 BFF0 (high byte)
3517 * blank blank TFF_CARRY TFF4 TFF3 TFF2 TFF1 TFF0 (low byte)
3519 * This allows predicting the next position by left-shifting the previous
3520 * result by one bit, copying the CARRY bits to the respective zeroth position,
3521 * and ANDing with 0x1F1F.
3523 * This table is indexed with a valid ivtc_cadence_pos.
3524 * @see ivtc_cadence_pos
3526 const int pi_detected_pos_to_bitmask[NUM_CADENCE_POS] = { 0x0808, /* prog. */
3527 0x0001, /* TFF ABC */
3528 0x0002, /* TFF BCD */
3529 0x0004, /* TFF CDE */
3530 0x0010, /* TFF EAB */
3531 0x0100, /* BFF ABC */
3532 0x0200, /* BFF BCD */
3533 0x0400, /* BFF CDE */
3534 0x1000, /* BFF EAB */
3536 #define VEKTOR_CADENCE_POS_ALL 0x1F1F
3537 #define VEKTOR_CADENCE_POS_TFF 0x00FF
3538 #define VEKTOR_CADENCE_POS_BFF 0xFF00
3539 #define VEKTOR_CADENCE_POS_TFF_HIGH 0x0010
3540 #define VEKTOR_CADENCE_POS_TFF_LOW 0x0001
3541 #define VEKTOR_CADENCE_POS_BFF_HIGH 0x1000
3542 #define VEKTOR_CADENCE_POS_BFF_LOW 0x0100
3544 /* Telecine field dominance */
3545 typedef enum { TFD_INVALID = -1, TFD_TFF = 0, TFD_BFF = 1 } ivtc_tfd;
3548 * Position detection table for the "scores" cadence detector algorithm.
3550 * These are the (only) field pair combinations that should give progressive
3551 * frames. There are three for each position.
3553 * First index: ivtc_cadence_pos
3555 static const ivtc_field_pair pi_best_field_pairs[NUM_CADENCE_POS][3] = {
3556 {FIELD_PAIR_TPBP, FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* prog. */
3558 {FIELD_PAIR_TPBP, FIELD_PAIR_TCBP, FIELD_PAIR_TNBC}, /* TFF ABC */
3559 {FIELD_PAIR_TCBP, FIELD_PAIR_TNBC, FIELD_PAIR_TNBN}, /* TFF BCD */
3560 {FIELD_PAIR_TCBP, FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* TFF CDE */
3561 {FIELD_PAIR_TPBP, FIELD_PAIR_TCBC, FIELD_PAIR_TNBC}, /* TFF EAB */
3563 {FIELD_PAIR_TPBP, FIELD_PAIR_TPBC, FIELD_PAIR_TCBN}, /* BFF ABC */
3564 {FIELD_PAIR_TPBC, FIELD_PAIR_TCBN, FIELD_PAIR_TNBN}, /* BFF BCD */
3565 {FIELD_PAIR_TPBC, FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* BFF CDE */
3566 {FIELD_PAIR_TPBP, FIELD_PAIR_TCBC, FIELD_PAIR_TCBN}, /* BFF EAB */
3570 * Alternative position detection table for the "scores" cadence detector
3573 * These field pair combinations should give only interlaced frames.
3574 * There are four for each position.
3576 * First index: ivtc_cadence_pos
3578 * Currently unused. During development it was tested that whether we detect
3579 * best or worst, the resulting detected cadence positions are identical
3580 * (neither strategy performs any different from the other).
3582 static const ivtc_field_pair pi_worst_field_pairs[NUM_CADENCE_POS][4] = {
3583 {FIELD_PAIR_TPBC, FIELD_PAIR_TCBP,
3584 FIELD_PAIR_TCBN, FIELD_PAIR_TNBC}, /* prog. */
3586 {FIELD_PAIR_TPBC, FIELD_PAIR_TCBC,
3587 FIELD_PAIR_TCBN, FIELD_PAIR_TNBN}, /* TFF ABC */
3588 {FIELD_PAIR_TPBP, FIELD_PAIR_TPBC,
3589 FIELD_PAIR_TCBC, FIELD_PAIR_TCBN}, /* TFF BCD */
3590 {FIELD_PAIR_TPBP, FIELD_PAIR_TPBC,
3591 FIELD_PAIR_TCBN, FIELD_PAIR_TNBC}, /* TFF CDE */
3592 {FIELD_PAIR_TPBC, FIELD_PAIR_TCBP,
3593 FIELD_PAIR_TCBN, FIELD_PAIR_TNBN}, /* TFF EAB */
3595 {FIELD_PAIR_TCBP, FIELD_PAIR_TCBC,
3596 FIELD_PAIR_TNBC, FIELD_PAIR_TNBN}, /* BFF ABC */
3597 {FIELD_PAIR_TPBP, FIELD_PAIR_TCBP,
3598 FIELD_PAIR_TCBC, FIELD_PAIR_TNBC}, /* BFF BCD */
3599 {FIELD_PAIR_TPBP, FIELD_PAIR_TCBP,
3600 FIELD_PAIR_TNBC, FIELD_PAIR_TCBN}, /* BFF CDE */
3601 {FIELD_PAIR_TCBP, FIELD_PAIR_TPBC,
3602 FIELD_PAIR_TNBC, FIELD_PAIR_TNBN}, /* BFF EAB */
3606 * Table for extracting the i_cadence_pos part of detected cadence position
3607 * (ivtc_cadence_pos).
3609 * The counter goes from 0 to 4, where "abc" = 0, "bcd" = 1, ...
3611 * @see ivtc_cadence_pos
3613 static const int pi_detected_pos_to_cadence_pos[NUM_CADENCE_POS] = {
3626 * Table for extracting the telecine field dominance part of detected
3627 * cadence position (ivtc_cadence_pos).
3629 * The position "dea" does not provide TFF/BFF information, because it is
3630 * indistinguishable from progressive.
3632 * @see ivtc_cadence_pos
3634 static const int pi_detected_pos_to_tfd[NUM_CADENCE_POS] = {
3635 TFD_INVALID, /* prog. */
3636 TFD_TFF, /* TFF ABC */
3637 TFD_TFF, /* TFF BCD */
3638 TFD_TFF, /* TFF CDE */
3639 TFD_TFF, /* TFF EAB */
3640 TFD_BFF, /* BFF ABC */
3641 TFD_BFF, /* BFF BCD */
3642 TFD_BFF, /* BFF CDE */
3643 TFD_BFF, /* BFF EAB */
3646 /* Valid telecine sequences (TFF and BFF). Indices: [TFD][i_cadence_pos] */
3647 /* Currently unused and left here for documentation only.
3648 There is an easier way - just decode the i_cadence_pos part of the
3649 detected position using the pi_detected_pos_to_cadence_pos table,
3650 and check that it is successive mod 5. See IVTCCadenceAnalyze(). */
3651 /*static const int pi_valid_cadences[2][5] = { {CADENCE_POS_TFF_ABC,
3652 CADENCE_POS_TFF_BCD,
3653 CADENCE_POS_TFF_CDE,
3654 CADENCE_POS_PROGRESSIVE,
3655 CADENCE_POS_TFF_EAB},
3657 {CADENCE_POS_BFF_ABC,
3658 CADENCE_POS_BFF_BCD,
3659 CADENCE_POS_BFF_CDE,
3660 CADENCE_POS_PROGRESSIVE,
3661 CADENCE_POS_BFF_EAB},
3666 * Operations needed in film frame reconstruction.
3668 typedef enum { IVTC_OP_DROP_FRAME,
3671 IVTC_OP_COMPOSE_TNBC,
3672 IVTC_OP_COMPOSE_TCBN } ivtc_op;
3674 /* Note: During hard IVTC, we must avoid COPY_C and do a compose instead.
3675 If we COPY_C, some subtitles will flicker badly, even if we use the
3676 cadence-based film frame reconstruction. Try the first scene in
3677 Kanon (2006) vol. 3 to see the problem.
3679 COPY_C can be used without problems when it is used consistently
3680 (not constantly mixed in with COPY_N and compose operations),
3681 for example in soft IVTC.
3684 * Operation table for film frame reconstruction depending on cadence position.
3685 * Indices: [TFD][i_cadence_pos]
3686 * @see pi_detected_pos_to_tfd
3687 * @see pi_detected_pos_to_cadence_pos
3689 static const ivtc_op pi_reconstruction_ops[2][5] = { /* TFF */
3690 {IVTC_OP_COMPOSE_TNBC,
3694 IVTC_OP_COMPOSE_TNBC},
3697 {IVTC_OP_COMPOSE_TCBN,
3701 IVTC_OP_COMPOSE_TCBN},
3705 * Timestamp mangling table.
3707 * This is used in the 29.97 -> 23.976 fps conversion.
3709 * Index: i_cadence_pos, 0..4.
3711 * Valid values are nonnegative. The -1 corresponds to the dropped frame
3712 * and is never used, except for a debug assert.
3714 * The unit of the values is 1/4 of frame duration.
3715 * See the function documentation of RenderIVTC() for an explanation.
3716 * @see ivtc_cadence_pos
3717 * @see pi_detected_pos_to_cadence_pos
3718 * @see pi_reconstruction_ops
3721 static const int pi_timestamp_deltas[5] = { 1, 2, 3, -1, 0 };
3724 * Internal helper function for RenderIVTC(): performs initialization
3725 * at the start of a new frame.
3727 * In practice, this slides detector histories.
3729 * This function should only perform initialization that does NOT require
3730 * the input frame history buffer. This runs at every frame, including
3733 * This is an internal function only used by RenderIVTC().
3734 * There is no need to call this function manually.
3736 * @param p_filter The filter instance.
3739 static inline void IVTCFrameInit( filter_t *p_filter )
3741 assert( p_filter != NULL );
3743 filter_sys_t *p_sys = p_filter->p_sys;
3744 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
3746 /* Slide detector histories */
3747 for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
3749 p_ivtc->pi_top_rep[i-1] = p_ivtc->pi_top_rep[i];
3750 p_ivtc->pi_bot_rep[i-1] = p_ivtc->pi_bot_rep[i];
3751 p_ivtc->pi_motion[i-1] = p_ivtc->pi_motion[i];
3753 p_ivtc->pi_s_cadence_pos[i-1] = p_ivtc->pi_s_cadence_pos[i];
3754 p_ivtc->pb_s_reliable[i-1] = p_ivtc->pb_s_reliable[i];
3755 p_ivtc->pi_v_cadence_pos[i-1] = p_ivtc->pi_v_cadence_pos[i];
3756 p_ivtc->pi_v_raw[i-1] = p_ivtc->pi_v_raw[i];
3757 p_ivtc->pb_v_reliable[i-1] = p_ivtc->pb_v_reliable[i];
3759 p_ivtc->pi_cadence_pos_history[i-1]
3760 = p_ivtc->pi_cadence_pos_history[i];
3762 p_ivtc->pb_all_progressives[i-1] = p_ivtc->pb_all_progressives[i];
3764 /* The latest position has not been detected yet. */
3765 p_ivtc->pi_s_cadence_pos[IVTC_LATEST] = CADENCE_POS_INVALID;
3766 p_ivtc->pb_s_reliable[IVTC_LATEST] = false;
3767 p_ivtc->pi_v_cadence_pos[IVTC_LATEST] = CADENCE_POS_INVALID;
3768 p_ivtc->pi_v_raw[IVTC_LATEST] = VEKTOR_CADENCE_POS_ALL;
3769 p_ivtc->pb_v_reliable[IVTC_LATEST] = false;
3770 p_ivtc->pi_cadence_pos_history[IVTC_LATEST] = CADENCE_POS_INVALID;
3771 p_ivtc->pi_top_rep[IVTC_LATEST] = 0;
3772 p_ivtc->pi_bot_rep[IVTC_LATEST] = 0;
3773 p_ivtc->pi_motion[IVTC_LATEST] = -1;
3774 p_ivtc->pb_all_progressives[IVTC_LATEST] = false;
3776 /* Slide history of field pair interlace scores */
3777 p_ivtc->pi_scores[FIELD_PAIR_TPBP] = p_ivtc->pi_scores[FIELD_PAIR_TCBC];
3778 p_ivtc->pi_scores[FIELD_PAIR_TPBC] = p_ivtc->pi_scores[FIELD_PAIR_TCBN];
3779 p_ivtc->pi_scores[FIELD_PAIR_TCBP] = p_ivtc->pi_scores[FIELD_PAIR_TNBC];
3780 p_ivtc->pi_scores[FIELD_PAIR_TCBC] = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
3781 /* These have not been detected yet */
3782 p_ivtc->pi_scores[FIELD_PAIR_TCBN] = 0;
3783 p_ivtc->pi_scores[FIELD_PAIR_TNBC] = 0;
3784 p_ivtc->pi_scores[FIELD_PAIR_TNBN] = 0;
3788 * Internal helper function for RenderIVTC(): computes various raw detector
3789 * data at the start of a new frame.
3791 * This function requires the input frame history buffer.
3792 * IVTCFrameInit() must have been called first.
3793 * Last two frames must be available in the history buffer.
3795 * This is an internal function only used by RenderIVTC().
3796 * There is no need to call this function manually.
3798 * @param p_filter The filter instance.
3800 * @see IVTCFrameInit()
3802 static inline void IVTCLowLevelDetect( filter_t *p_filter )
3804 assert( p_filter != NULL );
3806 filter_sys_t *p_sys = p_filter->p_sys;
3807 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
3808 picture_t *p_curr = p_sys->pp_history[1];
3809 picture_t *p_next = p_sys->pp_history[2];
3811 assert( p_next != NULL );
3812 assert( p_curr != NULL );
3814 /* Compute interlace scores for TNBN, TNBC and TCBN.
3815 Note that p_next contains TNBN. */
3816 p_ivtc->pi_scores[FIELD_PAIR_TNBN] = CalculateInterlaceScore( p_next,
3818 p_ivtc->pi_scores[FIELD_PAIR_TNBC] = CalculateInterlaceScore( p_next,
3820 p_ivtc->pi_scores[FIELD_PAIR_TCBN] = CalculateInterlaceScore( p_curr,
3823 int i_top = 0, i_bot = 0;
3824 int i_motion = EstimateNumBlocksWithMotion(p_curr, p_next, &i_top, &i_bot);
3825 p_ivtc->pi_motion[IVTC_LATEST] = i_motion;
3827 /* If one field changes "clearly more" than the other, we know the
3828 less changed one is a likely duplicate.
3830 Threshold 1/2 is too low for some scenes (e.g. pan of the space junk
3831 at beginning of The Third ep. 1, right after the OP). Thus, we use 2/3,
3832 which seems to work.
3834 p_ivtc->pi_top_rep[IVTC_LATEST] = (i_top <= 2*i_bot/3);
3835 p_ivtc->pi_bot_rep[IVTC_LATEST] = (i_bot <= 2*i_top/3);
3839 * Internal helper function for RenderIVTC(): using raw detector data,
3840 * detect cadence position by an interlace scores based algorithm ("scores").
3842 * IVTCFrameInit() and IVTCLowLevelDetect() must have been called first.
3843 * Last frame must be available in the history buffer.
3845 * This is an internal function only used by RenderIVTC().
3846 * There is no need to call this function manually.
3848 * @param p_filter The filter instance.
3850 * @see IVTCFrameInit()
3851 * @see IVTCLowLevelDetect()
3852 * @see IVTCCadenceDetectFinalize()
3854 static inline void IVTCCadenceDetectAlgoScores( filter_t *p_filter )
3856 assert( p_filter != NULL );
3858 filter_sys_t *p_sys = p_filter->p_sys;
3859 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
3860 picture_t *p_next = p_sys->pp_history[2];
3862 assert( p_next != NULL );
3864 /* Detect likely cadence position according to the tables,
3865 using the tabulated combinations of all 7 available interlace scores.
3867 int pi_ivtc_scores[NUM_CADENCE_POS];
3868 for( int i = 0; i < NUM_CADENCE_POS; i++ )
3869 pi_ivtc_scores[i] = p_ivtc->pi_scores[ pi_best_field_pairs[i][0] ]
3870 + p_ivtc->pi_scores[ pi_best_field_pairs[i][1] ]
3871 + p_ivtc->pi_scores[ pi_best_field_pairs[i][2] ];
3873 int j = CADENCE_POS_PROGRESSIVE; /* valid regardless of TFD */
3874 int minscore = pi_ivtc_scores[j];
3875 /* A TFF (respectively BFF) stream may only have TFF (respectively BFF)
3876 telecine. Don't bother looking at the wrong table. */
3877 int imin = CADENCE_POS_TFF_FIRST; /* first TFF-only entry */
3878 int iend = CADENCE_POS_TFF_END; /* one past last TFF-only entry */
3879 if( !p_next->b_top_field_first )
3881 imin = CADENCE_POS_BFF_FIRST; /* first BFF-only entry */
3882 iend = CADENCE_POS_BFF_END; /* one past last BFF-only entry */
3884 for( int i = imin; i < iend; i++ )
3886 if( pi_ivtc_scores[i] < minscore )
3888 minscore = pi_ivtc_scores[i];
3893 /* Now "j" contains the most likely position according to the tables,
3894 accounting also for video TFF/BFF. */
3895 p_ivtc->pi_s_cadence_pos[IVTC_LATEST] = j;
3897 /* Estimate reliability of detector result.
3899 We do this by checking if the winner is an outlier at least
3900 to some extent. For anyone better versed in statistics,
3901 feel free to improve this.
3904 /* Compute sample mean with the winner included and without.
3906 Sample mean is defined as mu = sum( x_i, i ) / N ,
3907 where N is the number of samples.
3909 int mean = pi_ivtc_scores[CADENCE_POS_PROGRESSIVE];
3910 int mean_except_min = 0;
3911 if( j != CADENCE_POS_PROGRESSIVE )
3912 mean_except_min = pi_ivtc_scores[CADENCE_POS_PROGRESSIVE];
3913 for( int i = imin; i < iend; i++ )
3915 mean += pi_ivtc_scores[i];
3917 mean_except_min += pi_ivtc_scores[i];
3919 /* iend points one past end, but progressive counts as the +1. */
3920 mean /= (iend - imin + 1);
3921 mean_except_min /= (iend - imin);
3923 /* Check how much excluding the winner changes the mean. */
3924 double mean_ratio = (double)mean_except_min / (double)mean;
3926 /* Let's pretend that the detected position is a stochastic variable.
3927 Compute sample variance with the winner included and without.
3929 var = sum( (x_i - mu)^2, i ) / N ,
3931 where mu is the sample mean.
3933 Note that we really need int64_t; the numbers are pretty large.
3935 int64_t diff = (int64_t)(pi_ivtc_scores[CADENCE_POS_PROGRESSIVE] - mean);
3936 int64_t var = diff*diff;
3937 int64_t var_except_min = 0;
3938 if( j != CADENCE_POS_PROGRESSIVE )
3940 int64_t diff_exm = (int64_t)(pi_ivtc_scores[CADENCE_POS_PROGRESSIVE]
3942 var_except_min = diff_exm*diff_exm;
3944 for( int i = imin; i < iend; i++ )
3946 diff = (int64_t)(pi_ivtc_scores[i] - mean);
3950 int64_t diff_exm = (int64_t)(pi_ivtc_scores[i] - mean_except_min);
3951 var_except_min += (diff_exm*diff_exm);
3954 /* iend points one past end, but progressive counts as the +1. */
3955 var /= (uint64_t)(iend - imin + 1);
3956 var_except_min /= (uint64_t)(iend - imin);
3958 /* Extract cadence counter part of detected positions for the
3961 Note that for the previous frame, we use the final detected cadence
3962 position, which was not necessarily produced by this algorithm.
3963 It is the result that was judged the most reliable.
3965 int j_curr = p_ivtc->pi_cadence_pos_history[IVTC_LATEST-1];
3966 int pos_next = pi_detected_pos_to_cadence_pos[j];
3968 /* Be optimistic when unsure. We bias the detection toward accepting
3969 the next "correct" position, even if the variance check comes up bad.
3971 bool b_expected = false;
3972 if( j_curr != CADENCE_POS_INVALID )
3974 int pos_curr = pi_detected_pos_to_cadence_pos[j_curr];
3975 b_expected = (pos_next == (pos_curr + 1) % 5);
3978 /* Use motion detect result as a final sanity check.
3979 If no motion, the result from this algorithm cannot be reliable.
3981 int i_blocks_with_motion = p_ivtc->pi_motion[IVTC_LATEST];
3983 /* The numbers given here are empirical constants that have been tuned
3984 through trial and error. The test material used was NTSC anime DVDs.
3986 Easy-to-detect parts seem to give variance boosts of 40-70%, but
3987 hard-to-detect parts sometimes only 18%. Anything with a smaller boost
3988 in variance doesn't seem reliable for catching a new lock-on,
3990 Additionally, it seems that if the mean changes by less than 0.5%,
3991 the result is not reliable.
3993 Note that the numbers given are only valid for the pi_best_field_pairs
3996 For motion detection, the detector seems good enough so that
3997 we can threshold at zero.
3999 bool b_result_reliable =
4000 ( i_blocks_with_motion > 0 &&
4001 mean_ratio > 1.005 &&
4002 ( b_expected || ( (double)var > 1.17*(double)var_except_min ) )
4004 p_ivtc->pb_s_reliable[IVTC_LATEST] = b_result_reliable;
4008 * Internal helper function for RenderIVTC(): using raw detector data,
4009 * detect cadence position by a hard field repeat based algorithm ("vektor").
4011 * This algorithm is inspired by the classic TVTime/Xine IVTC filter
4012 * by Billy Biggs (Vektor); hence the name. There are however some
4013 * differences between this and the TVTime/Xine filter.
4015 * IVTCFrameInit() and IVTCLowLevelDetect() must have been called first.
4016 * Last frame must be available in the history buffer.
4018 * This is an internal function only used by RenderIVTC().
4019 * There is no need to call this function manually.
4021 * @param p_filter The filter instance.
4023 * @see IVTCFrameInit()
4024 * @see IVTCLowLevelDetect()
4025 * @see IVTCCadenceDetectFinalize()
4027 static inline void IVTCCadenceDetectAlgoVektor( filter_t *p_filter )
4029 assert( p_filter != NULL );
4031 filter_sys_t *p_sys = p_filter->p_sys;
4032 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
4034 picture_t *p_next = p_sys->pp_history[2];
4036 assert( p_next != NULL );
4038 /* This algorithm is based on detecting hard-repeated fields (by motion
4039 detection), and conservatively estimating what the seen repeats could
4040 mean for the cadence position.
4042 "Conservative" means that we do not rule out possibilities if repeats
4043 are *not* seen, but only *add* possibilities based on what repeats
4044 *are* seen. This is important. Otherwise full-frame repeats in the
4045 original film (8fps or 12fps animation is very common in anime),
4046 causing spurious field repeats, would mess up the detection.
4047 With this strategy, spurious repeats will only slow down the lock-on,
4048 and will not break an existing lock-on once acquired.
4050 Several possibilities are kept open until the sequence gives enough
4051 information to make a unique detection. When the sequence becomes
4052 inconsistent (e.g. bad cut), the detector resets itself.
4054 The main ideas taken from the TVTime/Xine algorithm are:
4055 1) Conservatively using information from detected field repeats,
4056 2) Cadence counting the earlier detection results and combining with
4057 the new detection result, and
4058 3) The observation that video TFF/BFF uniquely determines TFD.
4060 The main differences are
4061 1) Different motion detection (see EstimateNumBlocksWithMotion()).
4062 Vektor's original estimates the average top/bottom field diff
4063 over the last 3 frames, while ours uses a block-based approach
4064 for diffing and just compares the field diffs between "curr" and
4065 "next" against each other (see IVTCLowLevelDetect()).
4066 Both approaches are adaptive, but in a different way.
4067 2) The specific detection logic used is a bit different (see both
4068 codes for details; the original is in xine-lib, function
4069 determine_pulldown_offset_short_history_new() in pulldown.c;
4070 ours is obviously given below). I think the one given here
4073 Note that we don't have to worry about getting a detection in all cases.
4074 It's enough if we work reliably, say, 99% of the time, and the other 1%
4075 of the time just admit that we don't know the cadence position.
4076 (This mostly happens after a bad cut, when the new scene has
4077 "difficult" motion characteristics, such as repeated film frames.)
4078 Our frame composer is built to handle also cases where we have no
4079 reliable detection of the cadence position; see IVTCOutputOrDropFrame().
4080 More important is to never lock on incorrectly, as this would both
4081 generate interlacing artifacts where none existed, and cause motion
4082 to stutter (because duplicate frames would be shown and unique ones
4086 /* Progressive requires no repeats, so it is always a possibility.
4087 Filtering will drop it out if we know that the current position
4091 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_PROGRESSIVE ];
4093 /* Add in other possibilities depending on field repeats seen during the
4094 last three input frames (i.e. two transitions between input frames).
4095 See the "Dups." column in the cadence tables.
4097 bool b_top_rep = p_ivtc->pi_top_rep[IVTC_LATEST];
4098 bool b_bot_rep = p_ivtc->pi_bot_rep[IVTC_LATEST];
4099 bool b_old_top_rep = p_ivtc->pi_top_rep[IVTC_LATEST-1];
4100 bool b_old_bot_rep = p_ivtc->pi_bot_rep[IVTC_LATEST-1];
4103 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_EAB ];
4104 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_BCD ];
4108 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_ABC ];
4109 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_CDE ];
4113 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_BCD ];
4114 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_EAB ];
4118 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_CDE ];
4119 detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_ABC ];
4122 /* A TFF stream may only have TFF telecine, and similarly for BFF.
4123 Discard the possibility we know to be incorrect for this stream.
4124 (The stream may flipflop between the possibilities if it contains
4125 soft-telecined sequences or lone field repeats, so we must keep
4126 detecting this for each incoming frame.)
4128 bool b_tff = p_next->b_top_field_first;
4130 detected &= VEKTOR_CADENCE_POS_TFF;
4132 detected &= VEKTOR_CADENCE_POS_BFF;
4134 /* Predict possible next positions based on our last detection.
4135 Begin with a shift and carry. */
4136 int predicted = p_ivtc->pi_v_raw[IVTC_LATEST-1];
4137 bool b_wrap_tff = false;
4138 bool b_wrap_bff = false;
4139 if( predicted & VEKTOR_CADENCE_POS_TFF_HIGH )
4141 if( predicted & VEKTOR_CADENCE_POS_BFF_HIGH )
4143 /* bump to next position and keep only valid bits */
4144 predicted = (predicted << 1) & VEKTOR_CADENCE_POS_ALL;
4147 predicted |= VEKTOR_CADENCE_POS_TFF_LOW;
4149 predicted |= VEKTOR_CADENCE_POS_BFF_LOW;
4151 /* Filter: narrow down possibilities based on previous detection,
4152 if consistent. If not consistent, reset the detector.
4153 Reset works better than just using the latest raw detection.
4155 if( (detected & predicted) != 0 )
4156 detected = detected & predicted;
4158 detected = VEKTOR_CADENCE_POS_ALL;
4160 /* We're done. Save result to our internal storage so we can use it
4161 for prediction at the next frame.
4163 Note that the outgoing frame check in IVTCOutputOrDropFrame()
4164 has a veto right, resetting our state if it determines that
4165 the cadence has become broken.
4167 p_ivtc->pi_v_raw[IVTC_LATEST] = detected;
4169 /* See if the position has been detected uniquely.
4170 If so, we have acquired a lock-on. */
4171 ivtc_cadence_pos exact = CADENCE_POS_INVALID;
4174 for( int i = 0; i < NUM_CADENCE_POS; i++ )
4176 /* Note that we must use "&" instead of just equality to catch
4177 the progressive case, and also not to trigger on an incomplete
4179 if( detected == (detected & pi_detected_pos_to_bitmask[i]) )
4187 /* If the result was unique, now "exact" contains the detected
4188 cadence position (and otherwise CADENCE_POS_INVALID).
4190 In practice, if the result from this algorithm is unique,
4191 it is always reliable.
4193 p_ivtc->pi_v_cadence_pos[IVTC_LATEST] = exact;
4194 p_ivtc->pb_v_reliable[IVTC_LATEST] = (exact != CADENCE_POS_INVALID);
4198 * Internal helper function for RenderIVTC(): decide the final detected
4199 * cadence position for the current position of the PCN stencil,
4200 * using the results of the different cadence detection algorithms.
4202 * Must be called after all IVTCCadenceDetectAlgo*() functions.
4204 * This is an internal function only used by RenderIVTC().
4205 * There is no need to call this function manually.
4207 * @param p_filter The filter instance.
4209 * @see IVTCCadenceDetectAlgoScores()
4210 * @see IVTCCadenceDetectAlgoVektor()
4212 static inline void IVTCCadenceDetectFinalize( filter_t *p_filter )
4214 assert( p_filter != NULL );
4216 filter_sys_t *p_sys = p_filter->p_sys;
4217 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
4219 /* In practice "vektor" is more reliable than "scores", but it may
4220 take longer to lock on. Thus, we prefer "vektor" if its reliable bit
4221 is set, then "scores", and finally just give up.
4223 For progressive sequences, "vektor" outputs "3, -, 3, -, ...",
4224 because the repeated progressive position is an inconsistent prediction.
4225 In this case, "scores" fills in the blanks. (This particular task
4226 could also be done without another cadence detector, by just
4227 detecting the alternating pattern of "3" and no result.)
4229 int pos = CADENCE_POS_INVALID;
4230 if( p_ivtc->pb_v_reliable[IVTC_LATEST] )
4231 pos = p_ivtc->pi_v_cadence_pos[IVTC_LATEST];
4232 else if( p_ivtc->pb_s_reliable[IVTC_LATEST] )
4233 pos = p_ivtc->pi_s_cadence_pos[IVTC_LATEST];
4234 p_ivtc->pi_cadence_pos_history[IVTC_LATEST] = pos;
4238 * Internal helper function for RenderIVTC(): using stream flags,
4239 * detect soft telecine.
4241 * This function is different from the other detectors; it may enter or exit
4242 * IVTC_MODE_TELECINED_NTSC_SOFT, if it detects that soft telecine has just
4243 * been entered or exited.
4245 * Upon exit from soft telecine, the filter will resume operation in its
4246 * previous mode (which it had when soft telecine was entered).
4248 * Last three frames must be available in the history buffer.
4250 * This is an internal function only used by RenderIVTC().
4251 * There is no need to call this function manually.
4253 * @param p_filter The filter instance.
4256 static inline void IVTCSoftTelecineDetect( filter_t *p_filter )
4258 assert( p_filter != NULL );
4260 filter_sys_t *p_sys = p_filter->p_sys;
4261 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
4262 picture_t *p_prev = p_sys->pp_history[0];
4263 picture_t *p_curr = p_sys->pp_history[1];
4264 picture_t *p_next = p_sys->pp_history[2];
4266 assert( p_next != NULL );
4267 assert( p_curr != NULL );
4268 assert( p_prev != NULL );
4270 /* Soft telecine can be detected from the flag pattern:
4271 nb_fields = 3,2,3,2,... and *video* TFF = true, false, false, true
4272 (TFF telecine) or false, true, true, false (BFF telecine).
4274 We don't particularly care which field goes first, because in soft TC
4275 we're working with progressive frames. And in any case, the video FDs
4276 of successive frames must match any field repeats in order for field
4277 renderers (such as traditional DVD player + CRT TV) to work correctly.
4278 Thus the video TFF/BFF flag provides no additional useful information
4279 for us on top of checking nb_fields.
4281 The only thing to *do* to soft telecine in an IVTC filter is to even
4282 out the outgoing PTS diffs to 2.5 fields each, so that we get
4283 a steady 24fps output. Thus, we can do this processing even if it turns
4284 out that we saw a lone field repeat (which are also sometimes used,
4285 such as in the Silent Mobius OP and in Sol Bianca). We can be aggressive
4286 and don't need to care about false positives - as long as we are equally
4287 aggressive about dropping out of soft telecine mode the moment a "2" is
4288 followed by another "2" and not a "3" as in soft TC.
4290 Finally, we conclude that the one-frame future buffer is enough for us
4291 to make soft TC decisions just in time for rendering the frame in the
4292 "current" position. The flag patterns given below constitute proof
4295 Soft telecine is relatively rare at least in anime, but it exists;
4296 e.g. Angel Links OP, Silent Mobius, and Stellvia of the Universe have
4297 sequences that are soft telecined. Stellvia, especially, alternates
4298 between soft and hard telecine all the time.
4301 /* Valid stream flag patterns for soft telecine. There are three: */
4303 /* Entering soft telecine at frame curr, or running inside it already */
4304 bool b_soft_telecine_1 = (p_prev->i_nb_fields == 2) &&
4305 (p_curr->i_nb_fields == 3) &&
4306 (p_next->i_nb_fields == 2);
4307 /* Running inside soft telecine */
4308 bool b_soft_telecine_2 = (p_prev->i_nb_fields == 3) &&
4309 (p_curr->i_nb_fields == 2) &&
4310 (p_next->i_nb_fields == 3);
4311 /* Exiting soft telecine at frame curr (curr is the last frame
4312 that should be handled as soft TC) */
4313 bool b_soft_telecine_3 = (p_prev->i_nb_fields == 3) &&
4314 (p_curr->i_nb_fields == 2) &&
4315 (p_next->i_nb_fields == 2);
4317 /* Soft telecine is very clear-cut - the moment we see or do not see
4318 a valid flag pattern, we can change the filter mode.
4320 if( b_soft_telecine_1 || b_soft_telecine_2 || b_soft_telecine_3 )
4322 if( p_ivtc->i_mode != IVTC_MODE_TELECINED_NTSC_SOFT )
4324 msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC soft telecine "\
4326 p_ivtc->i_old_mode = p_ivtc->i_mode;
4329 /* Valid flag pattern seen, this frame is soft telecined */
4330 p_ivtc->i_mode = IVTC_MODE_TELECINED_NTSC_SOFT;
4332 /* Only used during IVTC'ing hard telecine. */
4333 p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
4334 p_ivtc->i_tfd = TFD_INVALID;
4336 /* Note: no flag pattern match now */
4337 else if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_SOFT )
4339 msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC soft telecine ended. "\
4340 "Returning to previous mode." );
4342 /* No longer soft telecined, return filter to the mode it had earlier.
4343 This is needed to fix cases where we came in from hard telecine, and
4344 should go back, but can't catch a cadence in time before telecined
4345 frames slip through. Kickstarting back to hard IVTC, using the
4346 emergency frame composer until the cadence locks on again,
4347 fixes the problem. This happens a lot in Stellvia.
4349 p_ivtc->i_mode = p_ivtc->i_old_mode;
4350 p_ivtc->i_cadence_pos = 0; /* Wild guess. The film frame reconstruction
4351 will start in emergency mode, and this
4352 will be filled in by the detector ASAP.*/
4353 /* I suppose video field dominance no longer flipflops. */
4354 p_ivtc->i_tfd = !p_next->b_top_field_first; /* tff <=> TFD == 0 */
4359 * Internal helper function for RenderIVTC(): using the history of detected
4360 * cadence positions, analyze the cadence and enter or exit
4361 * IVTC_MODE_TELECINED_NTSC_HARD when appropriate.
4363 * This also updates b_sequence_valid.
4365 * Last three frames must be available in the history buffer.
4367 * This is an internal function only used by RenderIVTC().
4368 * There is no need to call this function manually.
4370 * @param p_filter The filter instance.
4373 static void IVTCCadenceAnalyze( filter_t *p_filter )
4375 assert( p_filter != NULL );
4377 filter_sys_t *p_sys = p_filter->p_sys;
4378 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
4379 picture_t *p_prev = p_sys->pp_history[0];
4380 picture_t *p_curr = p_sys->pp_history[1];
4381 picture_t *p_next = p_sys->pp_history[2];
4383 assert( p_next != NULL );
4384 assert( p_curr != NULL );
4385 assert( p_prev != NULL );
4387 /* Determine which frames in the buffer qualify for analysis.
4389 Note that hard telecine always has nb_fields = 2 and
4390 video TFF = constant (i.e. the stream flags look no different from
4391 a true interlaced or true progressive stream). Basically, no one ever
4392 sets the progressive frame flag for the input frames d, e, and a -
4393 in practice they're all flagged as interlaced.
4395 A frame may qualify for hard TC analysis if it has no soft field repeat
4396 (i.e. it cannot be part of a soft telecine). The condition
4397 nb_fields == 2 must always match.
4399 Additionally, curr and next must have had motion with respect to the
4400 previous frame, to ensure that the different field combinations have
4401 produced unique pictures.
4403 Alternatively, if there was no motion, but the cadence position was
4404 reliably detected and it was the expected one, we qualify the frame
4405 for analysis (mainly, for TFD voting).
4407 We only proceed with the cadence analysis if all three frames
4408 in the buffer qualify.
4411 /* Note that these are the final detected positions
4412 produced by IVTCCadenceDetectFinalize(). */
4413 int j_next = p_ivtc->pi_cadence_pos_history[IVTC_LATEST];
4414 int j_curr = p_ivtc->pi_cadence_pos_history[IVTC_LATEST-1];
4415 int j_prev = p_ivtc->pi_cadence_pos_history[IVTC_LATEST-2];
4417 bool b_expected = false;
4418 if( j_next != CADENCE_POS_INVALID && j_curr != CADENCE_POS_INVALID )
4420 int pos_next = pi_detected_pos_to_cadence_pos[j_next];
4421 int pos_curr = pi_detected_pos_to_cadence_pos[j_curr];
4422 b_expected = (pos_next == (pos_curr + 1) % 5);
4424 bool b_old_expected = false;
4425 if( j_curr != CADENCE_POS_INVALID && j_prev != CADENCE_POS_INVALID )
4427 int pos_curr = pi_detected_pos_to_cadence_pos[j_curr];
4428 int pos_prev = pi_detected_pos_to_cadence_pos[j_prev];
4429 b_old_expected = (pos_curr == (pos_prev + 1) % 5);
4432 int i_motion = p_ivtc->pi_motion[IVTC_LATEST];
4433 int i_old_motion = p_ivtc->pi_motion[IVTC_LATEST-1];
4435 bool b_prev_valid = (p_prev->i_nb_fields == 2);
4436 bool b_curr_valid = (p_curr->i_nb_fields == 2) &&
4437 (i_old_motion > 0 || b_old_expected);
4438 bool b_next_valid = (p_next->i_nb_fields == 2) &&
4439 (i_motion > 0 || b_expected);
4440 bool b_no_invalids = (b_prev_valid && b_curr_valid && b_next_valid);
4442 /* Final sanity check: see that the detection history has been
4443 completely filled, i.e. the latest three positions of the stencil
4444 have given a result from the cadence detector.
4448 for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
4450 const int i_detected_pos = p_ivtc->pi_cadence_pos_history[i];
4451 if( i_detected_pos == CADENCE_POS_INVALID )
4453 b_no_invalids = false;
4459 /* If still ok, do the analysis. */
4460 p_ivtc->b_sequence_valid = false; /* needed in frame reconstruction */
4463 /* Convert the history elements to cadence position and TFD. */
4464 int pi_tfd[IVTC_DETECTION_HISTORY_SIZE];
4465 int pi_pos[IVTC_DETECTION_HISTORY_SIZE];
4466 for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
4468 const int i_detected_pos = p_ivtc->pi_cadence_pos_history[i];
4469 pi_pos[i] = pi_detected_pos_to_cadence_pos[i_detected_pos];
4470 pi_tfd[i] = pi_detected_pos_to_tfd[i_detected_pos];
4473 /* See if the sequence is valid. The cadence positions must be
4474 successive mod 5. We can't say anything about TFF/BFF yet,
4475 because the progressive-looking position "dea" may be there.
4476 If the sequence otherwise looks valid, we handle that last
4479 We also test for a progressive signal here, so that we know
4480 when to exit IVTC_MODE_TELECINED_NTSC_HARD.
4482 p_ivtc->b_sequence_valid = true;
4483 bool b_all_progressive = (pi_pos[0] == 3);
4485 for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
4487 if( pi_pos[i] != (++j % 5) )
4488 p_ivtc->b_sequence_valid = false;
4489 if( pi_pos[i] != 3 )
4490 b_all_progressive = false;
4492 p_ivtc->pb_all_progressives[IVTC_LATEST] = b_all_progressive;
4494 if( p_ivtc->b_sequence_valid )
4496 /* Determine TFF/BFF. */
4497 int i_vote_invalid = 0;
4500 for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
4502 if( pi_tfd[i] == TFD_INVALID )
4504 else if( pi_tfd[i] == TFD_TFF )
4506 else if( pi_tfd[i] == TFD_BFF )
4510 /* With three entries, two votes for any one item are enough
4511 to decide this conclusively. */
4512 int i_telecine_field_dominance = TFD_INVALID;
4513 if( i_vote_tff >= 2)
4514 i_telecine_field_dominance = TFD_TFF;
4515 else if( i_vote_bff >= 2)
4516 i_telecine_field_dominance = TFD_BFF;
4517 /* In all other cases, "invalid" won or no winner.
4518 This means no NTSC telecine detected. */
4520 /* Lock on to the cadence if it was valid and TFF/BFF was found.
4522 Also, aggressively update the cadence counter from the
4523 lock-on data whenever we can. In practice this has been found
4524 to be a reliable strategy (if the cadence detectors are
4527 if( i_telecine_field_dominance == TFD_TFF )
4529 if( p_ivtc->i_mode != IVTC_MODE_TELECINED_NTSC_HARD )
4530 msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC TFF "\
4531 "hard telecine detected." );
4532 p_ivtc->i_mode = IVTC_MODE_TELECINED_NTSC_HARD;
4533 p_ivtc->i_cadence_pos = pi_pos[IVTC_LATEST];
4534 p_ivtc->i_tfd = TFD_TFF;
4536 else if( i_telecine_field_dominance == TFD_BFF )
4538 if( p_ivtc->i_mode != IVTC_MODE_TELECINED_NTSC_HARD )
4539 msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC BFF "\
4540 "hard telecine detected." );
4541 p_ivtc->i_mode = IVTC_MODE_TELECINED_NTSC_HARD;
4542 p_ivtc->i_cadence_pos = pi_pos[IVTC_LATEST];
4543 p_ivtc->i_tfd = TFD_BFF;
4546 /* No telecine... maybe a progressive signal? */
4547 else if( b_all_progressive )
4549 /* It seems that in practice, three "3"s in a row can still be
4550 a fluke rather often. Four or five usually are not.
4551 This fixes the Stellvia OP. */
4553 bool b_really_all_progressive = true;
4554 for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE ; i++ )
4556 if( p_ivtc->pb_all_progressives[i] == false )
4558 b_really_all_progressive = false;
4563 /* If we still think the signal is progressive... */
4564 if( b_really_all_progressive )
4566 /* ...exit film mode immediately. This does not break
4567 soft TC handling, because for soft TC at least one
4568 of the frames will not qualify (due to i_nb_fields == 3),
4569 and in that case this analysis will not run.
4571 if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_HARD )
4572 msg_Dbg( p_filter, "IVTC: 3:2 pulldown: progressive "\
4573 "signal detected." );
4574 p_ivtc->i_mode = IVTC_MODE_DETECTING;
4575 p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
4576 p_ivtc->i_tfd = TFD_INVALID;
4579 /* Final missing "else": no valid NTSC telecine sequence detected.
4581 Either there is no telecine, or the detector - although it produced
4582 results - had trouble finding it. In this case we do nothing,
4583 as it's not a good idea to act on unreliable data.
4585 Note that if we are already in IVTC_MODE_TELECINED_NTSC_HARD, this
4586 case means that we have lost the lock-on, but are still (probably)
4587 in a hard-telecined stream. This will start the emergency mode
4588 for film frame reconstruction. See IVTCOutputOrDropFrame().
4594 * Internal helper function for RenderIVTC(): render or drop frame,
4595 * whichever needs to be done. This also sets the output frame PTS.
4597 * Last two frames must be available in the history buffer.
4599 * This is an internal function only used by RenderIVTC().
4600 * There is no need to call this function manually.
4602 * @param p_filter The filter instance. Must be non-NULL.
4603 * @param[out] p_dst Frame will be rendered here. Must be non-NULL.
4604 * @return Whether a frame was constructed.
4605 * @retval true Yes, output frame is in p_dst.
4606 * @retval false No, this frame was dropped as part of normal IVTC operation.
4609 static bool IVTCOutputOrDropFrame( filter_t *p_filter, picture_t *p_dst )
4611 assert( p_filter != NULL );
4612 assert( p_dst != NULL );
4614 filter_sys_t *p_sys = p_filter->p_sys;
4615 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
4616 mtime_t t_final = VLC_TS_INVALID; /* for custom timestamp mangling */
4618 picture_t *p_curr = p_sys->pp_history[1];
4619 picture_t *p_next = p_sys->pp_history[2];
4621 assert( p_next != NULL );
4622 assert( p_curr != NULL );
4624 /* Perform IVTC if we're in film mode (either hard or soft telecine).
4626 Note that we don't necessarily have a lock-on, even if we are in
4627 IVTC_MODE_TELECINED_NTSC_HARD. We *may* be locked on, or alternatively,
4628 we have seen a valid cadence some time in the past, but lock-on has
4629 since been lost, and we have not seen a progressive signal after that.
4630 The latter case usually results from bad cuts, which interrupt
4633 Lock-on state is given by p_ivtc->b_sequence_valid.
4635 int i_result_score = -1;
4637 if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_HARD )
4639 /* Decide what to do. The operation table is only enabled
4640 if the cadence seems reliable. Otherwise we use a backup strategy.
4642 if( p_ivtc->b_sequence_valid )
4644 assert( p_ivtc->i_cadence_pos != CADENCE_POS_INVALID );
4645 assert( p_ivtc->i_tfd != TFD_INVALID );
4647 /* Pick correct operation from the operation table. */
4648 op = pi_reconstruction_ops[p_ivtc->i_tfd][p_ivtc->i_cadence_pos];
4650 if( op == IVTC_OP_DROP_FRAME )
4652 /* Bump cadence counter into the next expected position */
4653 p_ivtc->i_cadence_pos = ++p_ivtc->i_cadence_pos % 5;
4655 /* Drop frame. We're done. */
4660 if( op == IVTC_OP_COPY_N )
4661 i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
4662 else if( op == IVTC_OP_COPY_C )
4663 i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TCBC];
4664 else if( op == IVTC_OP_COMPOSE_TNBC )
4665 i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TNBC];
4666 else if( op == IVTC_OP_COMPOSE_TCBN )
4667 i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TCBN];
4669 /* Sanity check the result */
4671 /* Compute running mean of outgoing interlace score.
4672 See below for history mechanism. */
4674 for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++)
4675 i_avg += p_ivtc->pi_final_scores[i];
4676 i_avg /= IVTC_DETECTION_HISTORY_SIZE;
4678 /* Check if the score suddenly became "clearly larger".
4679 Also, filter out spurious peaks at the low end. */
4680 if( i_result_score > 1000 && i_result_score > 2*i_avg )
4682 /* Sequence wasn't reliable after all; we'll use
4683 the Transcode strategy for this frame. */
4684 p_ivtc->b_sequence_valid = false;
4685 msg_Dbg( p_filter, "Rejected cadence-based frame "\
4686 "construction: interlace score %d "\
4687 "(running average %d)",
4688 i_result_score, i_avg );
4690 /* We also reset the detector used in the "vektor"
4691 algorithm, as it depends on having a reliable previous
4692 position. In practice, we continue using the Transcode
4693 strategy until the cadence becomes locked on again.
4694 (At that point, b_sequence_valid will become true again,
4695 and we continue with this strategy.)
4697 p_ivtc->pi_v_raw[IVTC_LATEST] = VEKTOR_CADENCE_POS_ALL;
4702 /* Frame not dropped, and the cadence counter seems unreliable.
4704 Note that this is not an "else" to the previous case. This may
4705 begin with a valid sequence, and then the above logic decides
4706 that it wasn't valid after all.
4708 if( !p_ivtc->b_sequence_valid )
4710 /* In this case, we must proceed with no cadence information.
4711 We use a Transcode-like strategy.
4713 We check which field paired with TN or BN (accounting for
4714 the field dominance) gives the smallest interlace score,
4715 and declare that combination the resulting progressive frame.
4717 This strategy gives good results on average, but often fails
4718 in talking scenes in anime. Those can be handled more reliably
4719 with a locked-on cadence produced by the "vektor" algorithm.
4722 int tnbn = p_ivtc->pi_scores[FIELD_PAIR_TNBN]; /* TFF and BFF */
4723 int tnbc = p_ivtc->pi_scores[FIELD_PAIR_TNBC]; /* TFF only */
4724 int tcbn = p_ivtc->pi_scores[FIELD_PAIR_TCBN]; /* BFF only */
4726 if( p_next->b_top_field_first )
4730 op = IVTC_OP_COPY_N;
4731 i_result_score = tnbn;
4735 op = IVTC_OP_COMPOSE_TNBC;
4736 i_result_score = tnbc;
4743 op = IVTC_OP_COPY_N;
4744 i_result_score = tnbn;
4748 op = IVTC_OP_COMPOSE_TCBN;
4749 i_result_score = tcbn;
4754 /* Mangle timestamps when locked on.
4756 "Current" is the frame that is being extracted now. Use its original
4757 timestamp as the base.
4759 Note that this way there will be no extra delay compared to the
4760 raw stream, even though we look one frame into the future.
4762 if( p_ivtc->b_sequence_valid )
4764 /* Convert 29.97 -> 23.976 fps. We get to this point only if we
4765 didn't drop the frame, so we always get a valid delta.
4767 int i_timestamp_delta = pi_timestamp_deltas[p_ivtc->i_cadence_pos];
4768 assert( i_timestamp_delta >= 0 );
4770 /* FIXME: use field length as measured by Deinterlace()? */
4771 t_final = p_curr->date
4772 + (p_next->date - p_curr->date)*i_timestamp_delta/4;
4774 else /* Do not mangle timestamps (or drop frames, either) if cadence
4775 is not locked on. This causes one of five output frames - if
4776 all are reconstructed correctly - to be a duplicate, but in
4777 practice at least with anime (which is the kind of material
4778 that tends to have this problem) this is less noticeable than
4779 a sudden jump in the cadence. Especially, a consistently wrong
4780 lock-on will cause a very visible stutter, which we wish
4783 t_final = p_curr->date;
4786 /* Bump cadence counter into the next expected position. */
4787 p_ivtc->i_cadence_pos = ++p_ivtc->i_cadence_pos % 5;
4789 else if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_SOFT )
4791 /* Soft telecine. We have the progressive frames already;
4792 even out PTS diffs only. */
4794 /* Pass through the "current" frame. We must choose the frame "current"
4795 in order to be able to detect soft telecine before we have to output
4796 the frame. See IVTCSoftTelecineDetect(). Also, this allows
4797 us to peek at the next timestamp to calculate the duration of
4800 op = IVTC_OP_COPY_C;
4801 i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TCBC];
4803 /* Timestamp mangling for soft telecine: bump "threes" forward by
4804 0.5 field durations. This is more forgiving for the renderer
4805 than bumping the "twos" back (which would require to render
4808 if( p_curr->i_nb_fields == 3 )
4810 /* Approximate field duration from the PTS difference. */
4811 /* FIXME: use field length as measured by Deinterlace()? */
4812 mtime_t i_half_field_dur = ( (p_next->date - p_curr->date)/3 ) / 2;
4813 t_final = p_curr->date + i_half_field_dur;
4815 else /* Otherwise, use original PTS of the outgoing frame. */
4817 t_final = p_curr->date;
4820 else /* Not film mode, timestamp mangling bypassed. */
4822 op = IVTC_OP_COPY_N;
4823 i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
4825 /* Preserve original PTS (note that now, in principle,
4826 "next" is the outgoing frame) */
4827 t_final = p_next->date;
4830 /* There is only one case where we should drop the frame,
4831 and it was already handled above. */
4832 assert( op != IVTC_OP_DROP_FRAME );
4834 /* Render into p_dst according to the final operation chosen. */
4835 if( op == IVTC_OP_COPY_N )
4836 picture_Copy( p_dst, p_next );
4837 else if( op == IVTC_OP_COPY_C )
4838 picture_Copy( p_dst, p_curr );
4839 else if( op == IVTC_OP_COMPOSE_TNBC )
4840 ComposeFrame( p_filter, p_dst, p_next, p_curr, CC_ALTLINE );
4841 else if( op == IVTC_OP_COMPOSE_TCBN )
4842 ComposeFrame( p_filter, p_dst, p_curr, p_next, CC_ALTLINE );
4844 /* Slide history of outgoing interlace scores. This must be done last,
4845 and only if the frame was not dropped, so we do it here.
4847 This is used during the reconstruction to get an idea of what is
4848 (in the temporally local sense) an acceptable interlace score
4849 for a correctly reconstructed frame. See above.
4851 for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
4852 p_ivtc->pi_final_scores[i-1] = p_ivtc->pi_final_scores[i];
4853 p_ivtc->pi_final_scores[IVTC_LATEST] = i_result_score;
4855 /* Note that picture_Copy() copies the PTS, too. Apply timestamp mangling
4856 now, if any was needed.
4858 if( t_final > VLC_TS_INVALID )
4859 p_dst->date = t_final;
4864 /* The top-level routine of the IVTC filter.
4866 See the lengthy comment above for function documentation.
4868 static int RenderIVTC( filter_t *p_filter, picture_t *p_dst, picture_t *p_src )
4870 assert( p_filter != NULL );
4871 assert( p_src != NULL );
4872 assert( p_dst != NULL );
4874 filter_sys_t *p_sys = p_filter->p_sys;
4875 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
4877 picture_t *p_prev = p_sys->pp_history[0];
4878 picture_t *p_curr = p_sys->pp_history[1];
4879 picture_t *p_next = p_sys->pp_history[2];
4881 /* If the history mechanism has failed, we have nothing to do. */
4883 return VLC_EGENERIC;
4885 /* Slide algorithm-specific histories */
4886 IVTCFrameInit( p_filter );
4888 /* Filter if we have all the pictures we need.
4889 Note that we always have p_next at this point. */
4890 if( p_prev && p_curr )
4892 /* Update raw data for motion, field repeats, interlace scores... */
4893 IVTCLowLevelDetect( p_filter );
4895 /* Detect soft telecine.
4897 Enter/exit IVTC_MODE_TELECINED_NTSC_SOFT when needed.
4899 IVTCSoftTelecineDetect( p_filter );
4901 /* Detect hard telecine.
4903 Enter/exit IVTC_MODE_TELECINED_NTSC_HARD when needed.
4905 If we happen to be running in IVTC_MODE_TELECINED_NTSC_SOFT,
4906 we nevertheless let the algorithms see for themselves that
4907 the stream is progressive. This doesn't break anything,
4908 and this way the full filter state gets updated at each frame.
4910 See the individual function docs for details.
4912 IVTCCadenceDetectAlgoScores( p_filter );
4913 IVTCCadenceDetectAlgoVektor( p_filter );
4914 IVTCCadenceDetectFinalize( p_filter ); /* pick winner */
4915 IVTCCadenceAnalyze( p_filter ); /* update filter state */
4918 bool b_have_output_frame = IVTCOutputOrDropFrame( p_filter, p_dst );
4920 /* The next frame will get a custom timestamp, too. */
4921 p_sys->i_frame_offset = CUSTOM_PTS;
4923 if( b_have_output_frame )
4926 return VLC_EGENERIC; /* Signal the caller not to expect a frame */
4928 else if( !p_prev && !p_curr ) /* first frame */
4930 /* Render the first frame as-is, so that a picture appears immediately.
4932 We will also do some init for the filter. This score will become
4933 TPBP by the time the actual filter starts. Note that the sliding of
4934 final scores only starts when the filter has started (third frame).
4936 int i_score = CalculateInterlaceScore( p_next, p_next );
4937 p_ivtc->pi_scores[FIELD_PAIR_TNBN] = i_score;
4938 p_ivtc->pi_final_scores[0] = i_score;
4940 picture_Copy( p_dst, p_next );
4943 else /* second frame */
4945 /* If the history sliding mechanism works correctly,
4946 the only remaining possibility is that: */
4947 assert( p_curr && !p_prev );
4949 /* We need three frames for the cadence detector to work, so we just
4950 do some init for the detector and pass the frame through.
4951 Passthrough for second frame, too, works better than drop
4952 for some still-image DVD menus.
4954 Now that we have two frames, we can run a full IVTCLowLevelDetect().
4956 The interlace scores from here will become TCBC, TCBP and TPBC
4957 when the filter starts. The score for the current TCBC has already
4958 been computed at the first frame, and slid into place at the start
4959 of this frame (by IVTCFrameInit()).
4961 IVTCLowLevelDetect( p_filter );
4963 /* Note that the sliding mechanism for output scores only starts
4964 when the actual filter does.
4966 p_ivtc->pi_final_scores[1] = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
4968 /* At the next frame, the filter starts. The next frame will get
4969 a custom timestamp. */
4970 p_sys->i_frame_offset = CUSTOM_PTS;
4972 picture_Copy( p_dst, p_next );
4978 * Clears the inverse telecine subsystem state.
4980 * Used during initialization and uninitialization.
4982 * @param p_filter The filter instance.
4987 static void IVTCClearState( filter_t *p_filter )
4989 assert( p_filter != NULL );
4991 filter_sys_t *p_sys = p_filter->p_sys;
4992 ivtc_sys_t *p_ivtc = &p_sys->ivtc;
4994 p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
4995 p_ivtc->i_tfd = TFD_INVALID;
4996 p_ivtc->b_sequence_valid = false;
4997 p_ivtc->i_mode = IVTC_MODE_DETECTING;
4998 p_ivtc->i_old_mode = IVTC_MODE_DETECTING;
4999 for( int i = 0; i < IVTC_NUM_FIELD_PAIRS; i++ )
5000 p_ivtc->pi_scores[i] = 0;
5001 for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
5003 p_ivtc->pi_cadence_pos_history[i] = CADENCE_POS_INVALID;
5005 p_ivtc->pi_s_cadence_pos[i] = CADENCE_POS_INVALID;
5006 p_ivtc->pb_s_reliable[i] = false;
5007 p_ivtc->pi_v_cadence_pos[i] = CADENCE_POS_INVALID;
5008 p_ivtc->pb_v_reliable[i] = false;
5010 p_ivtc->pi_v_raw[i] = VEKTOR_CADENCE_POS_ALL;
5012 p_ivtc->pi_top_rep[i] = 0;
5013 p_ivtc->pi_bot_rep[i] = 0;
5014 p_ivtc->pi_motion[i] = -1;
5016 p_ivtc->pb_all_progressives[i] = false;
5018 p_ivtc->pi_final_scores[i] = 0;
5022 /*****************************************************************************
5023 * video filter2 functions
5024 *****************************************************************************/
5025 #define DEINTERLACE_DST_SIZE 3
5026 static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
5028 filter_sys_t *p_sys = p_filter->p_sys;
5029 picture_t *p_dst[DEINTERLACE_DST_SIZE];
5031 /* Request output picture */
5032 p_dst[0] = filter_NewPicture( p_filter );
5033 if( p_dst[0] == NULL )
5035 picture_Release( p_pic );
5038 picture_CopyProperties( p_dst[0], p_pic );
5040 /* Any unused p_dst pointers must be NULL, because they are used to check how many output frames we have. */
5041 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
5044 /* Update the input frame history, if the currently active algorithm needs it. */
5045 if( p_sys->b_use_frame_history )
5047 /* Duplicate the picture
5048 * TODO when the vout rework is finished, picture_Hold() might be enough
5049 * but becarefull, the pitches must match */
5050 picture_t *p_dup = picture_NewFromFormat( &p_pic->format );
5052 picture_Copy( p_dup, p_pic );
5054 /* Slide the history */
5055 if( p_sys->pp_history[0] )
5056 picture_Release( p_sys->pp_history[0] );
5057 for( int i = 1; i < HISTORY_SIZE; i++ )
5058 p_sys->pp_history[i-1] = p_sys->pp_history[i];
5059 p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
5062 /* Slide the metadata history. */
5063 for( int i = 1; i < METADATA_SIZE; i++ )
5065 p_sys->meta.pi_date[i-1] = p_sys->meta.pi_date[i];
5066 p_sys->meta.pi_nb_fields[i-1] = p_sys->meta.pi_nb_fields[i];
5067 p_sys->meta.pb_top_field_first[i-1] = p_sys->meta.pb_top_field_first[i];
5069 /* The last element corresponds to the current input frame. */
5070 p_sys->meta.pi_date[METADATA_SIZE-1] = p_pic->date;
5071 p_sys->meta.pi_nb_fields[METADATA_SIZE-1] = p_pic->i_nb_fields;
5072 p_sys->meta.pb_top_field_first[METADATA_SIZE-1] = p_pic->b_top_field_first;
5074 /* Remember the frame offset that we should use for this frame.
5075 The value in p_sys will be updated to reflect the correct value
5076 for the *next* frame when we call the renderer. */
5077 int i_frame_offset = p_sys->i_frame_offset;
5078 int i_meta_idx = (METADATA_SIZE-1) - i_frame_offset;
5080 /* These correspond to the current *outgoing* frame. */
5081 bool b_top_field_first;
5083 if( i_frame_offset != CUSTOM_PTS )
5085 /* Pick the correct values from the history. */
5086 b_top_field_first = p_sys->meta.pb_top_field_first[i_meta_idx];
5087 i_nb_fields = p_sys->meta.pi_nb_fields[i_meta_idx];
5091 /* Framerate doublers must not request CUSTOM_PTS, as they need the original field timings,
5092 and need Deinterlace() to allocate the correct number of output frames. */
5093 assert( !p_sys->b_double_rate );
5095 /* NOTE: i_nb_fields is only used for framerate doublers, so it is unused in this case.
5096 b_top_field_first is only passed to the algorithm. We assume that algorithms that
5097 request CUSTOM_PTS will, if necessary, extract the TFF/BFF information themselves.
5099 b_top_field_first = p_pic->b_top_field_first; /* this is not guaranteed to be meaningful */
5100 i_nb_fields = p_pic->i_nb_fields; /* unused */
5103 /* For framerate doublers, determine field duration and allocate output frames. */
5104 mtime_t i_field_dur = 0;
5105 int i_double_rate_alloc_end = 0; /* One past last for allocated output frames in p_dst[].
5106 Used only for framerate doublers. Will be inited below.
5107 Declared here because the PTS logic needs the result. */
5108 if( p_sys->b_double_rate )
5110 /* Calculate one field duration. */
5112 int iend = METADATA_SIZE-1;
5113 /* Find oldest valid logged date. Note: the current input frame doesn't count. */
5114 for( ; i < iend; i++ )
5115 if( p_sys->meta.pi_date[i] > VLC_TS_INVALID )
5119 /* Count how many fields the valid history entries (except the new frame) represent. */
5120 int i_fields_total = 0;
5121 for( int j = i ; j < iend; j++ )
5122 i_fields_total += p_sys->meta.pi_nb_fields[j];
5123 /* One field took this long. */
5124 i_field_dur = (p_pic->date - p_sys->meta.pi_date[i]) / i_fields_total;
5126 /* Note that we default to field duration 0 if it could not be determined.
5127 This behaves the same as the old code - leaving the extra output frame
5128 dates the same as p_pic->date if the last cached date was not valid.
5131 i_double_rate_alloc_end = i_nb_fields;
5132 if( i_nb_fields > DEINTERLACE_DST_SIZE )
5134 /* Note that the effective buffer size depends also on the constant private_picture in vout_wrapper.c,
5135 since that determines the maximum number of output pictures filter_NewPicture() will successfully
5136 allocate for one input frame.
5138 msg_Err( p_filter, "Framerate doubler: output buffer too small; fields = %d, buffer size = %d. Dropping the remaining fields.", i_nb_fields, DEINTERLACE_DST_SIZE );
5139 i_double_rate_alloc_end = DEINTERLACE_DST_SIZE;
5142 /* Allocate output frames. */
5143 for( int i = 1; i < i_double_rate_alloc_end ; ++i )
5145 p_dst[i-1]->p_next =
5146 p_dst[i] = filter_NewPicture( p_filter );
5149 picture_CopyProperties( p_dst[i], p_pic );
5153 msg_Err( p_filter, "Framerate doubler: could not allocate output frame %d", i+1 );
5154 i_double_rate_alloc_end = i; /* Inform the PTS logic about the correct end position. */
5155 break; /* If this happens, the rest of the allocations aren't likely to work, either... */
5158 /* Now we have allocated *up to* the correct number of frames; normally, exactly the correct number.
5159 Upon alloc failure, we may have succeeded in allocating *some* output frames, but fewer than
5160 were desired. In such a case, as many will be rendered as were successfully allocated.
5162 Note that now p_dst[i] != NULL for 0 <= i < i_double_rate_alloc_end. */
5164 assert( p_sys->b_double_rate || p_dst[1] == NULL );
5165 assert( i_nb_fields > 2 || p_dst[2] == NULL );
5168 switch( p_sys->i_mode )
5170 case DEINTERLACE_DISCARD:
5171 RenderDiscard( p_filter, p_dst[0], p_pic, 0 );
5174 case DEINTERLACE_BOB:
5175 RenderBob( p_filter, p_dst[0], p_pic, !b_top_field_first );
5177 RenderBob( p_filter, p_dst[1], p_pic, b_top_field_first );
5179 RenderBob( p_filter, p_dst[2], p_pic, !b_top_field_first );
5182 case DEINTERLACE_LINEAR:
5183 RenderLinear( p_filter, p_dst[0], p_pic, !b_top_field_first );
5185 RenderLinear( p_filter, p_dst[1], p_pic, b_top_field_first );
5187 RenderLinear( p_filter, p_dst[2], p_pic, !b_top_field_first );
5190 case DEINTERLACE_MEAN:
5191 RenderMean( p_filter, p_dst[0], p_pic );
5194 case DEINTERLACE_BLEND:
5195 RenderBlend( p_filter, p_dst[0], p_pic );
5199 RenderX( p_dst[0], p_pic );
5202 case DEINTERLACE_YADIF:
5203 if( RenderYadif( p_filter, p_dst[0], p_pic, 0, 0 ) )
5207 case DEINTERLACE_YADIF2X:
5208 if( RenderYadif( p_filter, p_dst[0], p_pic, 0, !b_top_field_first ) )
5211 RenderYadif( p_filter, p_dst[1], p_pic, 1, b_top_field_first );
5213 RenderYadif( p_filter, p_dst[2], p_pic, 2, !b_top_field_first );
5216 case DEINTERLACE_PHOSPHOR:
5217 if( RenderPhosphor( p_filter, p_dst[0], p_pic, 0,
5218 !b_top_field_first ) )
5221 RenderPhosphor( p_filter, p_dst[1], p_pic, 1,
5222 b_top_field_first );
5224 RenderPhosphor( p_filter, p_dst[2], p_pic, 2,
5225 !b_top_field_first );
5228 case DEINTERLACE_IVTC:
5229 /* Note: RenderIVTC will automatically drop the duplicate frames
5230 produced by IVTC. This is part of normal operation. */
5231 if( RenderIVTC( p_filter, p_dst[0], p_pic ) )
5236 /* Set output timestamps, if the algorithm didn't request CUSTOM_PTS for this frame. */
5237 assert( i_frame_offset <= METADATA_SIZE || i_frame_offset == CUSTOM_PTS );
5238 if( i_frame_offset != CUSTOM_PTS )
5240 mtime_t i_base_pts = p_sys->meta.pi_date[i_meta_idx];
5242 /* Note: in the usual case (i_frame_offset = 0 and b_double_rate = false),
5243 this effectively does nothing. This is needed to correct the timestamp
5244 when i_frame_offset > 0. */
5245 p_dst[0]->date = i_base_pts;
5247 if( p_sys->b_double_rate )
5249 /* Processing all actually allocated output frames. */
5250 for( int i = 1; i < i_double_rate_alloc_end; ++i )
5252 /* XXX it's not really good especially for the first picture, but
5253 * I don't think that delaying by one frame is worth it */
5254 if( i_base_pts > VLC_TS_INVALID )
5255 p_dst[i]->date = i_base_pts + i * i_field_dur;
5257 p_dst[i]->date = VLC_TS_INVALID;
5262 for( int i = 0; i < DEINTERLACE_DST_SIZE; ++i )
5266 p_dst[i]->b_progressive = true;
5267 p_dst[i]->i_nb_fields = 2;
5271 picture_Release( p_pic );
5275 picture_Release( p_dst[0] );
5276 for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
5279 picture_Release( p_dst[i] );
5281 picture_Release( p_pic );
5285 static void Flush( filter_t *p_filter )
5287 filter_sys_t *p_sys = p_filter->p_sys;
5289 for( int i = 0; i < METADATA_SIZE; i++ )
5291 p_sys->meta.pi_date[i] = VLC_TS_INVALID;
5292 p_sys->meta.pi_nb_fields[i] = 2;
5293 p_sys->meta.pb_top_field_first[i] = true;
5295 p_sys->i_frame_offset = 0; /* reset to default value (first frame after flush cannot have offset) */
5296 for( int i = 0; i < HISTORY_SIZE; i++ )
5298 if( p_sys->pp_history[i] )
5299 picture_Release( p_sys->pp_history[i] );
5300 p_sys->pp_history[i] = NULL;
5302 IVTCClearState( p_filter );
5305 static int Mouse( filter_t *p_filter,
5306 vlc_mouse_t *p_mouse, const vlc_mouse_t *p_old, const vlc_mouse_t *p_new )
5310 if( p_filter->p_sys->b_half_height )
5316 /*****************************************************************************
5318 *****************************************************************************/
5319 static int Open( vlc_object_t *p_this )
5321 filter_t *p_filter = (filter_t*)p_this;
5322 filter_sys_t *p_sys;
5324 if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
5325 return VLC_EGENERIC;
5328 p_sys = p_filter->p_sys = malloc( sizeof( *p_sys ) );
5332 p_sys->i_mode = DEINTERLACE_BLEND;
5333 p_sys->b_double_rate = false;
5334 p_sys->b_half_height = true;
5335 p_sys->b_use_frame_history = false;
5336 for( int i = 0; i < METADATA_SIZE; i++ )
5338 p_sys->meta.pi_date[i] = VLC_TS_INVALID;
5339 p_sys->meta.pi_nb_fields[i] = 2;
5340 p_sys->meta.pb_top_field_first[i] = true;
5342 p_sys->i_frame_offset = 0; /* start with default value (first-ever frame cannot have offset) */
5343 for( int i = 0; i < HISTORY_SIZE; i++ )
5344 p_sys->pp_history[i] = NULL;
5346 IVTCClearState( p_filter );
5348 #if defined(CAN_COMPILE_C_ALTIVEC)
5349 if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
5351 p_sys->pf_merge = MergeAltivec;
5352 p_sys->pf_end_merge = NULL;
5356 #if defined(CAN_COMPILE_SSE)
5357 if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
5359 p_sys->pf_merge = MergeSSE2;
5360 p_sys->pf_end_merge = EndMMX;
5364 #if defined(CAN_COMPILE_MMXEXT)
5365 if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
5367 p_sys->pf_merge = MergeMMXEXT;
5368 p_sys->pf_end_merge = EndMMX;
5372 #if defined(CAN_COMPILE_3DNOW)
5373 if( vlc_CPU() & CPU_CAPABILITY_3DNOW )
5375 p_sys->pf_merge = Merge3DNow;
5376 p_sys->pf_end_merge = End3DNow;
5380 #if defined __ARM_NEON__
5381 if( vlc_CPU() & CPU_CAPABILITY_NEON )
5383 p_sys->pf_merge = MergeNEON;
5384 p_sys->pf_end_merge = NULL;
5389 p_sys->pf_merge = MergeGeneric;
5390 p_sys->pf_end_merge = NULL;
5394 config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
5397 char *psz_mode = var_GetNonEmptyString( p_filter, FILTER_CFG_PREFIX "mode" );
5398 SetFilterMethod( p_filter, psz_mode, p_filter->fmt_in.video.i_chroma );
5401 if( p_sys->i_mode == DEINTERLACE_PHOSPHOR )
5403 int i_c420 = var_GetInteger( p_filter,
5404 FILTER_CFG_PREFIX "phosphor-chroma" );
5405 if( i_c420 != PC_LATEST && i_c420 != PC_ALTLINE &&
5406 i_c420 != PC_BLEND && i_c420 != PC_UPCONVERT )
5408 msg_Dbg( p_filter, "Phosphor 4:2:0 input chroma mode not set"\
5409 "or out of range (valid: 1, 2, 3 or 4), "\
5411 i_c420 = PC_ALTLINE;
5413 msg_Dbg( p_filter, "using Phosphor 4:2:0 input chroma mode %d",
5415 /* This maps directly to the phosphor_chroma_t enum. */
5416 p_sys->phosphor.i_chroma_for_420 = i_c420;
5418 int i_dimmer = var_GetInteger( p_filter,
5419 FILTER_CFG_PREFIX "phosphor-dimmer" );
5420 if( i_dimmer < 1 || i_dimmer > 4 )
5422 msg_Dbg( p_filter, "Phosphor dimmer strength not set "\
5423 "or out of range (valid: 1, 2, 3 or 4), "\
5425 i_dimmer = 2; /* low */
5427 msg_Dbg( p_filter, "using Phosphor dimmer strength %d", i_dimmer );
5428 /* The internal value ranges from 0 to 3. */
5429 p_sys->phosphor.i_dimmer_strength = i_dimmer - 1;
5433 p_sys->phosphor.i_chroma_for_420 = PC_ALTLINE;
5434 p_sys->phosphor.i_dimmer_strength = 1;
5439 GetOutputFormat( p_filter, &fmt, &p_filter->fmt_in.video );
5440 if( !p_filter->b_allow_fmt_out_change &&
5441 ( fmt.i_chroma != p_filter->fmt_in.video.i_chroma ||
5442 fmt.i_height != p_filter->fmt_in.video.i_height ) )
5444 Close( VLC_OBJECT(p_filter) );
5445 return VLC_EGENERIC;
5447 p_filter->fmt_out.video = fmt;
5448 p_filter->fmt_out.i_codec = fmt.i_chroma;
5449 p_filter->pf_video_filter = Deinterlace;
5450 p_filter->pf_video_flush = Flush;
5451 p_filter->pf_video_mouse = Mouse;
5453 msg_Dbg( p_filter, "deinterlacing" );
5458 /*****************************************************************************
5459 * Close: clean up the filter
5460 *****************************************************************************/
5461 static void Close( vlc_object_t *p_this )
5463 filter_t *p_filter = (filter_t*)p_this;
5466 free( p_filter->p_sys );