1 /*****************************************************************************
2 * helpers.c : Generic helper functions for the VLC deinterlacer
3 *****************************************************************************
4 * Copyright (C) 2011 the VideoLAN team
7 * Author: Juha Jeronen <juha.jeronen@jyu.fi>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #ifdef CAN_COMPILE_MMXEXT
35 #include <vlc_common.h>
37 #include <vlc_filter.h>
38 #include <vlc_picture.h>
40 #include "deinterlace.h" /* definition of p_sys, needed for Merge() */
41 #include "common.h" /* FFMIN3 et al. */
46 /*****************************************************************************
48 *****************************************************************************/
51 * This internal function converts a normal (full frame) plane_t into a
54 * Field plane_t's can be used e.g. for a weaving copy operation from two
55 * source frames into one destination frame.
57 * The pixels themselves will not be touched; only the metadata is generated.
58 * The same pixel data is shared by both the original plane_t and the field
59 * plane_t. Note, however, that the bottom field's data starts from the
60 * second line, so for the bottom field, the actual pixel pointer value
61 * does not exactly match the original plane pixel pointer value. (It points
62 * one line further down.)
64 * The caller must allocate p_dst (creating a local variable is fine).
66 * @param p_dst Field plane_t is written here. Must be non-NULL.
67 * @param p_src Original full-frame plane_t. Must be non-NULL.
68 * @param i_field Extract which field? 0 = top field, 1 = bottom field.
69 * @see plane_CopyPixels()
71 * @see RenderPhosphor()
73 static void FieldFromPlane( plane_t *p_dst, const plane_t *p_src, int i_field )
75 assert( p_dst != NULL );
76 assert( p_src != NULL );
77 assert( i_field == 0 || i_field == 1 );
79 /* Start with a copy of the metadata, and then update it to refer
82 We utilize the fact that plane_CopyPixels() differentiates between
83 visible_pitch and pitch.
85 The other field will be defined as the "margin" by doubling the pitch.
86 The visible pitch will be left as in the original.
90 p_dst->i_visible_lines /= 2;
92 /* For the bottom field, skip the first line in the pixel data. */
94 p_dst->p_pixels += p_src->i_pitch;
98 * Internal helper function for EstimateNumBlocksWithMotion():
99 * estimates whether there is motion in the given 8x8 block on one plane
100 * between two images. The block as a whole and its fields are evaluated
101 * separately, and use different motion thresholds.
103 * This is a low-level function only used by EstimateNumBlocksWithMotion().
104 * There is no need to call this function manually.
106 * For interpretation of pi_top and pi_bot, it is assumed that the block
107 * starts on an even-numbered line (belonging to the top field).
109 * The b_mmx parameter avoids the need to call vlc_CPU() separately
112 * @param[in] p_pix_p Base pointer to the block in previous picture
113 * @param[in] p_pix_c Base pointer to the same block in current picture
114 * @param i_pitch_prev i_pitch of previous picture
115 * @param i_pitch_curr i_pitch of current picture
116 * @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false.
117 * @param[out] pi_top 1 if top field of the block had motion, 0 if no
118 * @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no
119 * @return 1 if the block had motion, 0 if no
120 * @see EstimateNumBlocksWithMotion()
122 static inline int TestForMotionInBlock( uint8_t *p_pix_p, uint8_t *p_pix_c,
123 int i_pitch_prev, int i_pitch_curr,
125 int* pi_top, int* pi_bot )
127 /* Pixel luma/chroma difference threshold to detect motion. */
130 int32_t i_motion = 0;
131 int32_t i_top_motion = 0;
132 int32_t i_bot_motion = 0;
134 /* See below for the C version to see more quickly what this does. */
135 #ifdef CAN_COMPILE_MMXEXT
138 static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } };
139 pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */
142 pxor_r2r( mm3, mm3 ); /* score (top field) */
143 pxor_r2r( mm4, mm4 ); /* score (bottom field) */
144 for( int y = 0; y < 8; y+=2 )
147 movq_m2r( *((uint64_t*)p_pix_c), mm0 );
148 movq_m2r( *((uint64_t*)p_pix_p), mm1 );
149 movq_r2r( mm0, mm2 );
150 psubusb_r2r( mm1, mm2 );
151 psubusb_r2r( mm0, mm1 );
153 pcmpgtb_r2r( mm5, mm2 );
154 pcmpgtb_r2r( mm5, mm1 );
155 psadbw_r2r( mm6, mm2 );
156 psadbw_r2r( mm6, mm1 );
158 paddd_r2r( mm2, mm1 );
159 paddd_r2r( mm1, mm3 ); /* add to top field score */
161 p_pix_c += i_pitch_curr;
162 p_pix_p += i_pitch_prev;
164 /* bottom field - handling identical to top field, except... */
165 movq_m2r( *((uint64_t*)p_pix_c), mm0 );
166 movq_m2r( *((uint64_t*)p_pix_p), mm1 );
167 movq_r2r( mm0, mm2 );
168 psubusb_r2r( mm1, mm2 );
169 psubusb_r2r( mm0, mm1 );
171 pcmpgtb_r2r( mm5, mm2 );
172 pcmpgtb_r2r( mm5, mm1 );
173 psadbw_r2r( mm6, mm2 );
174 psadbw_r2r( mm6, mm1 );
176 paddd_r2r( mm2, mm1 );
177 paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */
179 p_pix_c += i_pitch_curr;
180 p_pix_p += i_pitch_prev;
182 movq_r2r( mm3, mm7 ); /* score (total) */
183 paddd_r2r( mm4, mm7 );
184 movd_r2m( mm3, i_top_motion );
185 movd_r2m( mm4, i_bot_motion );
186 movd_r2m( mm7, i_motion );
188 /* The loop counts actual score * 255. */
198 for( int y = 0; y < 8; ++y )
200 uint8_t *pc = p_pix_c;
201 uint8_t *pp = p_pix_p;
203 for( int x = 0; x < 8; ++x )
205 int_fast16_t C = abs((*pc) - (*pp));
215 i_top_motion += score;
217 i_bot_motion += score;
219 p_pix_c += i_pitch_curr;
220 p_pix_p += i_pitch_prev;
224 /* Field motion thresholds.
226 Empirical value - works better in practice than the "4" that
227 would be consistent with the full-block threshold.
229 Especially the opening scene of The Third ep. 1 (just after the OP)
230 works better with this. It also fixes some talking scenes in
231 Stellvia ep. 1, where the cadence would otherwise catch on incorrectly,
232 leading to more interlacing artifacts than by just using the emergency
235 (*pi_top) = ( i_top_motion >= 8 );
236 (*pi_bot) = ( i_bot_motion >= 8 );
238 /* Full-block threshold = (8*8)/8: motion is detected if 1/8 of the block
240 return (i_motion >= 8);
244 /*****************************************************************************
246 *****************************************************************************/
248 /* See header for function doc. */
249 void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
250 picture_t *p_inpic_top, picture_t *p_inpic_bottom,
251 compose_chroma_t i_output_chroma )
253 assert( p_filter != NULL );
254 assert( p_outpic != NULL );
255 assert( p_inpic_top != NULL );
256 assert( p_inpic_bottom != NULL );
258 /* Valid 4:2:0 chroma handling modes. */
259 assert( i_output_chroma == CC_ALTLINE ||
260 i_output_chroma == CC_UPCONVERT ||
261 i_output_chroma == CC_SOURCE_TOP ||
262 i_output_chroma == CC_SOURCE_BOTTOM ||
263 i_output_chroma == CC_MERGE );
265 const int i_chroma = p_filter->fmt_in.video.i_chroma;
266 const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
267 i_chroma == VLC_CODEC_J422;
268 const bool b_upconvert_chroma = ( !b_i422 &&
269 i_output_chroma == CC_UPCONVERT );
271 for( int i_plane = 0 ; i_plane < p_inpic_top->i_planes ; i_plane++ )
273 bool b_is_chroma_plane = ( i_plane == U_PLANE || i_plane == V_PLANE );
275 /* YV12 is YVU, but I422 is YUV. For such input, swap chroma planes
276 in output when converting to 4:2:2. */
278 if( b_is_chroma_plane && b_upconvert_chroma &&
279 i_chroma == VLC_CODEC_YV12 )
281 if( i_plane == U_PLANE )
282 i_out_plane = V_PLANE;
284 i_out_plane = U_PLANE;
288 i_out_plane = i_plane;
291 /* Copy luma or chroma, alternating between input fields. */
292 if( !b_is_chroma_plane || b_i422 || i_output_chroma == CC_ALTLINE )
294 /* Do an alternating line copy. This is always done for luma,
295 and for 4:2:2 chroma. It can be requested for 4:2:0 chroma
296 using CC_ALTLINE (see function doc).
298 Note that when we get here, the number of lines matches
305 FieldFromPlane( &dst_top, &p_outpic->p[i_out_plane], 0 );
306 FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane], 1 );
307 FieldFromPlane( &src_top, &p_inpic_top->p[i_plane], 0 );
308 FieldFromPlane( &src_bottom, &p_inpic_bottom->p[i_plane], 1 );
310 /* Copy each field from the corresponding source. */
311 plane_CopyPixels( &dst_top, &src_top );
312 plane_CopyPixels( &dst_bottom, &src_bottom );
314 else /* Input 4:2:0, on a chroma plane, and not in altline mode. */
316 if( i_output_chroma == CC_UPCONVERT )
318 /* Upconverting copy - use all data from both input fields.
320 This produces an output picture with independent chroma
321 for each field. It can be used for general input when
322 the two input frames are different.
324 The output is 4:2:2, but the input is 4:2:0. Thus the output
325 has twice the lines of the input, and each full chroma plane
326 in the input corresponds to a field chroma plane in the
331 FieldFromPlane( &dst_top, &p_outpic->p[i_out_plane], 0 );
332 FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane], 1 );
334 /* Copy each field from the corresponding source. */
335 plane_CopyPixels( &dst_top, &p_inpic_top->p[i_plane] );
336 plane_CopyPixels( &dst_bottom, &p_inpic_bottom->p[i_plane] );
338 else if( i_output_chroma == CC_SOURCE_TOP )
340 /* Copy chroma of input top field. Ignore chroma of input
341 bottom field. Input and output are both 4:2:0, so we just
342 copy the whole plane. */
343 plane_CopyPixels( &p_outpic->p[i_out_plane],
344 &p_inpic_top->p[i_plane] );
346 else if( i_output_chroma == CC_SOURCE_BOTTOM )
348 /* Copy chroma of input bottom field. Ignore chroma of input
349 top field. Input and output are both 4:2:0, so we just
350 copy the whole plane. */
351 plane_CopyPixels( &p_outpic->p[i_out_plane],
352 &p_inpic_bottom->p[i_plane] );
354 else /* i_output_chroma == CC_MERGE */
356 /* Average the chroma of the input fields.
357 Input and output are both 4:2:0. */
358 uint8_t *p_in_top, *p_in_bottom, *p_out_end, *p_out;
359 p_in_top = p_inpic_top->p[i_plane].p_pixels;
360 p_in_bottom = p_inpic_bottom->p[i_plane].p_pixels;
361 p_out = p_outpic->p[i_out_plane].p_pixels;
362 p_out_end = p_out + p_outpic->p[i_out_plane].i_pitch
363 * p_outpic->p[i_out_plane].i_visible_lines;
365 int w = FFMIN3( p_inpic_top->p[i_plane].i_visible_pitch,
366 p_inpic_bottom->p[i_plane].i_visible_pitch,
367 p_outpic->p[i_plane].i_visible_pitch );
369 for( ; p_out < p_out_end ; )
371 Merge( p_out, p_in_top, p_in_bottom, w );
372 p_out += p_outpic->p[i_out_plane].i_pitch;
373 p_in_top += p_inpic_top->p[i_plane].i_pitch;
374 p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
382 /* See header for function doc. */
383 int EstimateNumBlocksWithMotion( const picture_t* p_prev,
384 const picture_t* p_curr,
385 int *pi_top, int *pi_bot)
387 assert( p_prev != NULL );
388 assert( p_curr != NULL );
393 if( p_prev->i_planes != p_curr->i_planes )
396 /* We must tell our inline helper whether to use MMX acceleration. */
397 #ifdef CAN_COMPILE_MMXEXT
398 bool b_mmx = ( vlc_CPU() & CPU_CAPABILITY_MMXEXT );
404 for( int i_plane = 0 ; i_plane < p_prev->i_planes ; i_plane++ )
407 if( p_prev->p[i_plane].i_visible_lines !=
408 p_curr->p[i_plane].i_visible_lines )
411 const int i_pitch_prev = p_prev->p[i_plane].i_pitch;
412 const int i_pitch_curr = p_curr->p[i_plane].i_pitch;
414 /* Last pixels and lines (which do not make whole blocks) are ignored.
415 Shouldn't really matter for our purposes. */
416 const int i_mby = p_prev->p[i_plane].i_visible_lines / 8;
417 const int w = FFMIN( p_prev->p[i_plane].i_visible_pitch,
418 p_curr->p[i_plane].i_visible_pitch );
419 const int i_mbx = w / 8;
421 for( int by = 0; by < i_mby; ++by )
423 uint8_t *p_pix_p = &p_prev->p[i_plane].p_pixels[i_pitch_prev*8*by];
424 uint8_t *p_pix_c = &p_curr->p[i_plane].p_pixels[i_pitch_curr*8*by];
426 for( int bx = 0; bx < i_mbx; ++bx )
428 int i_top_temp, i_bot_temp;
429 i_score += TestForMotionInBlock( p_pix_p, p_pix_c,
430 i_pitch_prev, i_pitch_curr,
432 &i_top_temp, &i_bot_temp );
433 i_score_top += i_top_temp;
434 i_score_bot += i_bot_temp;
443 (*pi_top) = i_score_top;
445 (*pi_bot) = i_score_bot;
450 /* See header for function doc. */
451 int CalculateInterlaceScore( const picture_t* p_pic_top,
452 const picture_t* p_pic_bot )
455 We use the comb metric from the IVTC filter of Transcode 1.1.5.
456 This was found to work better for the particular purpose of IVTC
457 than RenderX()'s comb metric.
459 Note that we *must not* subsample at all in order to catch interlacing
460 in telecined frames with localized motion (e.g. anime with characters
461 talking, where only mouths move and everything else stays still.)
464 assert( p_pic_top != NULL );
465 assert( p_pic_bot != NULL );
467 if( p_pic_top->i_planes != p_pic_bot->i_planes )
470 unsigned u_cpu = vlc_CPU();
472 /* Amount of bits must be known for MMX, thus int32_t.
473 Doesn't hurt the C implementation. */
476 #ifdef CAN_COMPILE_MMXEXT
477 if( u_cpu & CPU_CAPABILITY_MMXEXT )
478 pxor_r2r( mm7, mm7 ); /* we will keep score in mm7 */
481 for( int i_plane = 0 ; i_plane < p_pic_top->i_planes ; ++i_plane )
484 if( p_pic_top->p[i_plane].i_visible_lines !=
485 p_pic_bot->p[i_plane].i_visible_lines )
488 const int i_lasty = p_pic_top->p[i_plane].i_visible_lines-1;
489 const int w = FFMIN( p_pic_top->p[i_plane].i_visible_pitch,
490 p_pic_bot->p[i_plane].i_visible_pitch );
491 const int wm8 = w % 8; /* remainder */
492 const int w8 = w - wm8; /* part of width that is divisible by 8 */
494 /* Current line / neighbouring lines picture pointers */
495 const picture_t *cur = p_pic_bot;
496 const picture_t *ngh = p_pic_top;
497 int wc = cur->p[i_plane].i_pitch;
498 int wn = ngh->p[i_plane].i_pitch;
500 /* Transcode 1.1.5 only checks every other line. Checking every line
501 works better for anime, which may contain horizontal,
502 one pixel thick cartoon outlines.
504 for( int y = 1; y < i_lasty; ++y )
506 uint8_t *p_c = &cur->p[i_plane].p_pixels[y*wc]; /* this line */
507 uint8_t *p_p = &ngh->p[i_plane].p_pixels[(y-1)*wn]; /* prev line */
508 uint8_t *p_n = &ngh->p[i_plane].p_pixels[(y+1)*wn]; /* next line */
512 /* Threshold (value from Transcode 1.1.5) */
514 #ifdef CAN_COMPILE_MMXEXT
515 /* Easy-to-read C version further below.
517 Assumptions: 0 < T < 127
518 # of pixels < (2^32)/255
519 Note: calculates score * 255
521 if( u_cpu & CPU_CAPABILITY_MMXEXT )
523 static const mmx_t b0 = { .uq = 0x0000000000000000ULL };
524 static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
525 static const mmx_t bT = { .ub = { T, T, T, T, T, T, T, T } };
527 for( ; x < w8; x += 8 )
529 movq_m2r( *((int64_t*)p_c), mm0 );
530 movq_m2r( *((int64_t*)p_p), mm1 );
531 movq_m2r( *((int64_t*)p_n), mm2 );
533 psubb_m2r( b128, mm0 );
534 psubb_m2r( b128, mm1 );
535 psubb_m2r( b128, mm2 );
537 psubsb_r2r( mm0, mm1 );
538 psubsb_r2r( mm0, mm2 );
540 pxor_r2r( mm3, mm3 );
541 pxor_r2r( mm4, mm4 );
542 pxor_r2r( mm5, mm5 );
543 pxor_r2r( mm6, mm6 );
545 punpcklbw_r2r( mm1, mm3 );
546 punpcklbw_r2r( mm2, mm4 );
547 punpckhbw_r2r( mm1, mm5 );
548 punpckhbw_r2r( mm2, mm6 );
550 pmulhw_r2r( mm3, mm4 );
551 pmulhw_r2r( mm5, mm6 );
553 packsswb_r2r(mm4, mm6);
554 pcmpgtb_m2r( bT, mm6 );
555 psadbw_m2r( b0, mm6 );
556 paddd_r2r( mm6, mm7 );
566 /* Worst case: need 17 bits for "comb". */
567 int_fast32_t C = *p_c;
568 int_fast32_t P = *p_p;
569 int_fast32_t N = *p_n;
571 /* Comments in Transcode's filter_ivtc.c attribute this
572 combing metric to Gunnar Thalin.
574 The idea is that if the picture is interlaced, both
575 expressions will have the same sign, and this comes
576 up positive. The value T = 100 has been chosen such
577 that a pixel difference of 10 (on average) will
578 trigger the detector.
580 int_fast32_t comb = (P - C) * (N - C);
589 /* Now the other field - swap current and neighbour pictures */
590 const picture_t *tmp = cur;
599 #ifdef CAN_COMPILE_MMXEXT
600 if( u_cpu & CPU_CAPABILITY_MMXEXT )
602 movd_r2m( mm7, i_score );