git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace/helpers.c

   1 /*****************************************************************************
   2  * helpers.c : Generic helper functions for the VLC deinterlacer
   3  *****************************************************************************
   4  * Copyright (C) 2011 the VideoLAN team
   5  * $Id$
   6  *
   7  * Author: Juha Jeronen <juha.jeronen@jyu.fi>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  22  *****************************************************************************/
  23
  24 #ifdef HAVE_CONFIG_H
  25 #   include "config.h"
  26 #endif
  27
  28 #ifdef CAN_COMPILE_MMXEXT
  29 #   include "mmx.h"
  30 #endif
  31
  32 #include <stdint.h>
  33 #include <assert.h>
  34
  35 #include <vlc_common.h>
  36 #include <vlc_cpu.h>
  37 #include <vlc_filter.h>
  38 #include <vlc_picture.h>
  39
  40 #include "deinterlace.h" /* definition of p_sys, needed for Merge() */
  41 #include "common.h"      /* FFMIN3 et al. */
  42 #include "merge.h"
  43
  44 #include "helpers.h"
  45
  46 /*****************************************************************************
  47  * Internal functions
  48  *****************************************************************************/
  49
  50 /**
  51  * This internal function converts a normal (full frame) plane_t into a
  52  * field plane_t.
  53  *
  54  * Field plane_t's can be used e.g. for a weaving copy operation from two
  55  * source frames into one destination frame.
  56  *
  57  * The pixels themselves will not be touched; only the metadata is generated.
  58  * The same pixel data is shared by both the original plane_t and the field
  59  * plane_t. Note, however, that the bottom field's data starts from the
  60  * second line, so for the bottom field, the actual pixel pointer value
  61  * does not exactly match the original plane pixel pointer value. (It points
  62  * one line further down.)
  63  *
  64  * The caller must allocate p_dst (creating a local variable is fine).
  65  *
  66  * @param p_dst Field plane_t is written here. Must be non-NULL.
  67  * @param p_src Original full-frame plane_t. Must be non-NULL.
  68  * @param i_field Extract which field? 0 = top field, 1 = bottom field.
  69  * @see plane_CopyPixels()
  70  * @see ComposeFrame()
  71  * @see RenderPhosphor()
  72  */
  73 static void FieldFromPlane( plane_t *p_dst, const plane_t *p_src, int i_field )
  74 {
  75     assert( p_dst != NULL );
  76     assert( p_src != NULL );
  77     assert( i_field == 0  ||  i_field == 1 );
  78
  79     /* Start with a copy of the metadata, and then update it to refer
  80        to one field only.
  81
  82        We utilize the fact that plane_CopyPixels() differentiates between
  83        visible_pitch and pitch.
  84
  85        The other field will be defined as the "margin" by doubling the pitch.
  86        The visible pitch will be left as in the original.
  87     */
  88     (*p_dst) = (*p_src);
  89     p_dst->i_lines /= 2;
  90     p_dst->i_visible_lines /= 2;
  91     p_dst->i_pitch *= 2;
  92     /* For the bottom field, skip the first line in the pixel data. */
  93     if( i_field == 1 )
  94         p_dst->p_pixels += p_src->i_pitch;
  95 }
  96
  97 /**
  98  * Internal helper function for EstimateNumBlocksWithMotion():
  99  * estimates whether there is motion in the given 8x8 block on one plane
 100  * between two images. The block as a whole and its fields are evaluated
 101  * separately, and use different motion thresholds.
 102  *
 103  * This is a low-level function only used by EstimateNumBlocksWithMotion().
 104  * There is no need to call this function manually.
 105  *
 106  * For interpretation of pi_top and pi_bot, it is assumed that the block
 107  * starts on an even-numbered line (belonging to the top field).
 108  *
 109  * The b_mmx parameter avoids the need to call vlc_CPU() separately
 110  * for each block.
 111  *
 112  * @param[in] p_pix_p Base pointer to the block in previous picture
 113  * @param[in] p_pix_c Base pointer to the same block in current picture
 114  * @param i_pitch_prev i_pitch of previous picture
 115  * @param i_pitch_curr i_pitch of current picture
 116  * @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false.
 117  * @param[out] pi_top 1 if top field of the block had motion, 0 if no
 118  * @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no
 119  * @return 1 if the block had motion, 0 if no
 120  * @see EstimateNumBlocksWithMotion()
 121  */
 122 static inline int TestForMotionInBlock( uint8_t *p_pix_p, uint8_t *p_pix_c,
 123                                         int i_pitch_prev, int i_pitch_curr,
 124                                         bool b_mmx,
 125                                         int* pi_top, int* pi_bot )
 126 {
 127 /* Pixel luma/chroma difference threshold to detect motion. */
 128 #define T 10
 129
 130     int32_t i_motion = 0;
 131     int32_t i_top_motion = 0;
 132     int32_t i_bot_motion = 0;
 133
 134 /* See below for the C version to see more quickly what this does. */
 135 #ifdef CAN_COMPILE_MMXEXT
 136     if( b_mmx )
 137     {
 138         static const mmx_t bT   = { .ub = { T, T, T, T, T, T, T, T } };
 139         pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */
 140         movq_m2r( bT,  mm5 );
 141
 142         pxor_r2r( mm3, mm3 ); /* score (top field) */
 143         pxor_r2r( mm4, mm4 ); /* score (bottom field) */
 144         for( int y = 0; y < 8; y+=2 )
 145         {
 146             /* top field */
 147             movq_m2r( *((uint64_t*)p_pix_c), mm0 );
 148             movq_m2r( *((uint64_t*)p_pix_p), mm1 );
 149             movq_r2r( mm0, mm2 );
 150             psubusb_r2r( mm1, mm2 );
 151             psubusb_r2r( mm0, mm1 );
 152
 153             pcmpgtb_r2r( mm5, mm2 );
 154             pcmpgtb_r2r( mm5, mm1 );
 155             psadbw_r2r(  mm6, mm2 );
 156             psadbw_r2r(  mm6, mm1 );
 157
 158             paddd_r2r( mm2, mm1 );
 159             paddd_r2r( mm1, mm3 ); /* add to top field score */
 160
 161             p_pix_c += i_pitch_curr;
 162             p_pix_p += i_pitch_prev;
 163
 164             /* bottom field - handling identical to top field, except... */
 165             movq_m2r( *((uint64_t*)p_pix_c), mm0 );
 166             movq_m2r( *((uint64_t*)p_pix_p), mm1 );
 167             movq_r2r( mm0, mm2 );
 168             psubusb_r2r( mm1, mm2 );
 169             psubusb_r2r( mm0, mm1 );
 170
 171             pcmpgtb_r2r( mm5, mm2 );
 172             pcmpgtb_r2r( mm5, mm1 );
 173             psadbw_r2r(  mm6, mm2 );
 174             psadbw_r2r(  mm6, mm1 );
 175
 176             paddd_r2r( mm2, mm1 );
 177             paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */
 178
 179             p_pix_c += i_pitch_curr;
 180             p_pix_p += i_pitch_prev;
 181         }
 182         movq_r2r(  mm3, mm7 ); /* score (total) */
 183         paddd_r2r( mm4, mm7 );
 184         movd_r2m( mm3, i_top_motion );
 185         movd_r2m( mm4, i_bot_motion );
 186         movd_r2m( mm7, i_motion );
 187
 188         /* The loop counts actual score * 255. */
 189         i_top_motion /= 255;
 190         i_bot_motion /= 255;
 191         i_motion     /= 255;
 192
 193         emms();
 194     }
 195     else
 196 #endif
 197     {
 198         for( int y = 0; y < 8; ++y )
 199         {
 200             uint8_t *pc = p_pix_c;
 201             uint8_t *pp = p_pix_p;
 202             int score = 0;
 203             for( int x = 0; x < 8; ++x )
 204             {
 205                 int_fast16_t C = abs((*pc) - (*pp));
 206                 if( C > T )
 207                     ++score;
 208
 209                 ++pc;
 210                 ++pp;
 211             }
 212
 213             i_motion += score;
 214             if( y % 2 == 0 )
 215                 i_top_motion += score;
 216             else
 217                 i_bot_motion += score;
 218
 219             p_pix_c += i_pitch_curr;
 220             p_pix_p += i_pitch_prev;
 221         }
 222     }
 223
 224     /* Field motion thresholds.
 225
 226        Empirical value - works better in practice than the "4" that
 227        would be consistent with the full-block threshold.
 228
 229        Especially the opening scene of The Third ep. 1 (just after the OP)
 230        works better with this. It also fixes some talking scenes in
 231        Stellvia ep. 1, where the cadence would otherwise catch on incorrectly,
 232        leading to more interlacing artifacts than by just using the emergency
 233        mode frame composer.
 234     */
 235     (*pi_top) = ( i_top_motion >= 8 );
 236     (*pi_bot) = ( i_bot_motion >= 8 );
 237
 238     /* Full-block threshold = (8*8)/8: motion is detected if 1/8 of the block
 239        changes "enough". */
 240     return (i_motion >= 8);
 241 }
 242 #undef T
 243
 244 /*****************************************************************************
 245  * Public functions
 246  *****************************************************************************/
 247
 248 /* See header for function doc. */
 249 void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
 250                    picture_t *p_inpic_top, picture_t *p_inpic_bottom,
 251                    compose_chroma_t i_output_chroma )
 252 {
 253     assert( p_filter != NULL );
 254     assert( p_outpic != NULL );
 255     assert( p_inpic_top != NULL );
 256     assert( p_inpic_bottom != NULL );
 257
 258     /* Valid 4:2:0 chroma handling modes. */
 259     assert( i_output_chroma == CC_ALTLINE       ||
 260             i_output_chroma == CC_UPCONVERT     ||
 261             i_output_chroma == CC_SOURCE_TOP    ||
 262             i_output_chroma == CC_SOURCE_BOTTOM ||
 263             i_output_chroma == CC_MERGE );
 264
 265     const int i_chroma = p_filter->fmt_in.video.i_chroma;
 266     const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
 267                         i_chroma == VLC_CODEC_J422;
 268     const bool b_upconvert_chroma = ( !b_i422  &&
 269                                       i_output_chroma == CC_UPCONVERT );
 270
 271     for( int i_plane = 0 ; i_plane < p_inpic_top->i_planes ; i_plane++ )
 272     {
 273         bool b_is_chroma_plane = ( i_plane == U_PLANE || i_plane == V_PLANE );
 274
 275         /* YV12 is YVU, but I422 is YUV. For such input, swap chroma planes
 276            in output when converting to 4:2:2. */
 277         int i_out_plane;
 278         if( b_is_chroma_plane  &&  b_upconvert_chroma  &&
 279             i_chroma == VLC_CODEC_YV12 )
 280         {
 281             if( i_plane == U_PLANE )
 282                 i_out_plane = V_PLANE;
 283             else /* V_PLANE */
 284                 i_out_plane = U_PLANE;
 285         }
 286         else
 287         {
 288             i_out_plane = i_plane;
 289         }
 290
 291         /* Copy luma or chroma, alternating between input fields. */
 292         if( !b_is_chroma_plane  ||  b_i422  ||  i_output_chroma == CC_ALTLINE )
 293         {
 294             /* Do an alternating line copy. This is always done for luma,
 295                and for 4:2:2 chroma. It can be requested for 4:2:0 chroma
 296                using CC_ALTLINE (see function doc).
 297
 298                Note that when we get here, the number of lines matches
 299                in input and output.
 300             */
 301             plane_t dst_top;
 302             plane_t dst_bottom;
 303             plane_t src_top;
 304             plane_t src_bottom;
 305             FieldFromPlane( &dst_top,    &p_outpic->p[i_out_plane],   0 );
 306             FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane],   1 );
 307             FieldFromPlane( &src_top,    &p_inpic_top->p[i_plane],    0 );
 308             FieldFromPlane( &src_bottom, &p_inpic_bottom->p[i_plane], 1 );
 309
 310             /* Copy each field from the corresponding source. */
 311             plane_CopyPixels( &dst_top,    &src_top    );
 312             plane_CopyPixels( &dst_bottom, &src_bottom );
 313         }
 314         else /* Input 4:2:0, on a chroma plane, and not in altline mode. */
 315         {
 316             if( i_output_chroma == CC_UPCONVERT )
 317             {
 318                 /* Upconverting copy - use all data from both input fields.
 319
 320                    This produces an output picture with independent chroma
 321                    for each field. It can be used for general input when
 322                    the two input frames are different.
 323
 324                    The output is 4:2:2, but the input is 4:2:0. Thus the output
 325                    has twice the lines of the input, and each full chroma plane
 326                    in the input corresponds to a field chroma plane in the
 327                    output.
 328                 */
 329                 plane_t dst_top;
 330                 plane_t dst_bottom;
 331                 FieldFromPlane( &dst_top,    &p_outpic->p[i_out_plane], 0 );
 332                 FieldFromPlane( &dst_bottom, &p_outpic->p[i_out_plane], 1 );
 333
 334                 /* Copy each field from the corresponding source. */
 335                 plane_CopyPixels( &dst_top,    &p_inpic_top->p[i_plane]    );
 336                 plane_CopyPixels( &dst_bottom, &p_inpic_bottom->p[i_plane] );
 337             }
 338             else if( i_output_chroma == CC_SOURCE_TOP )
 339             {
 340                 /* Copy chroma of input top field. Ignore chroma of input
 341                    bottom field. Input and output are both 4:2:0, so we just
 342                    copy the whole plane. */
 343                 plane_CopyPixels( &p_outpic->p[i_out_plane],
 344                                   &p_inpic_top->p[i_plane] );
 345             }
 346             else if( i_output_chroma == CC_SOURCE_BOTTOM )
 347             {
 348                 /* Copy chroma of input bottom field. Ignore chroma of input
 349                    top field. Input and output are both 4:2:0, so we just
 350                    copy the whole plane. */
 351                 plane_CopyPixels( &p_outpic->p[i_out_plane],
 352                                   &p_inpic_bottom->p[i_plane] );
 353             }
 354             else /* i_output_chroma == CC_MERGE */
 355             {
 356                 /* Average the chroma of the input fields.
 357                    Input and output are both 4:2:0. */
 358                 uint8_t *p_in_top, *p_in_bottom, *p_out_end, *p_out;
 359                 p_in_top    = p_inpic_top->p[i_plane].p_pixels;
 360                 p_in_bottom = p_inpic_bottom->p[i_plane].p_pixels;
 361                 p_out = p_outpic->p[i_out_plane].p_pixels;
 362                 p_out_end = p_out + p_outpic->p[i_out_plane].i_pitch
 363                                   * p_outpic->p[i_out_plane].i_visible_lines;
 364
 365                 int w = FFMIN3( p_inpic_top->p[i_plane].i_visible_pitch,
 366                                 p_inpic_bottom->p[i_plane].i_visible_pitch,
 367                                 p_outpic->p[i_plane].i_visible_pitch );
 368
 369                 for( ; p_out < p_out_end ; )
 370                 {
 371                     Merge( p_out, p_in_top, p_in_bottom, w );
 372                     p_out       += p_outpic->p[i_out_plane].i_pitch;
 373                     p_in_top    += p_inpic_top->p[i_plane].i_pitch;
 374                     p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
 375                 }
 376                 EndMerge();
 377             }
 378         }
 379     }
 380 }
 381
 382 /* See header for function doc. */
 383 int EstimateNumBlocksWithMotion( const picture_t* p_prev,
 384                                  const picture_t* p_curr,
 385                                  int *pi_top, int *pi_bot)
 386 {
 387     assert( p_prev != NULL );
 388     assert( p_curr != NULL );
 389
 390     int i_score_top = 0;
 391     int i_score_bot = 0;
 392
 393     if( p_prev->i_planes != p_curr->i_planes )
 394         return -1;
 395
 396     /* We must tell our inline helper whether to use MMX acceleration. */
 397 #ifdef CAN_COMPILE_MMXEXT
 398     bool b_mmx = ( vlc_CPU() & CPU_CAPABILITY_MMXEXT );
 399 #else
 400     bool b_mmx = false;
 401 #endif
 402
 403     int i_score = 0;
 404     for( int i_plane = 0 ; i_plane < p_prev->i_planes ; i_plane++ )
 405     {
 406         /* Sanity check */
 407         if( p_prev->p[i_plane].i_visible_lines !=
 408             p_curr->p[i_plane].i_visible_lines )
 409             return -1;
 410
 411         const int i_pitch_prev = p_prev->p[i_plane].i_pitch;
 412         const int i_pitch_curr = p_curr->p[i_plane].i_pitch;
 413
 414         /* Last pixels and lines (which do not make whole blocks) are ignored.
 415            Shouldn't really matter for our purposes. */
 416         const int i_mby = p_prev->p[i_plane].i_visible_lines / 8;
 417         const int w = FFMIN( p_prev->p[i_plane].i_visible_pitch,
 418                              p_curr->p[i_plane].i_visible_pitch );
 419         const int i_mbx = w / 8;
 420
 421         for( int by = 0; by < i_mby; ++by )
 422         {
 423             uint8_t *p_pix_p = &p_prev->p[i_plane].p_pixels[i_pitch_prev*8*by];
 424             uint8_t *p_pix_c = &p_curr->p[i_plane].p_pixels[i_pitch_curr*8*by];
 425
 426             for( int bx = 0; bx < i_mbx; ++bx )
 427             {
 428                 int i_top_temp, i_bot_temp;
 429                 i_score += TestForMotionInBlock( p_pix_p, p_pix_c,
 430                                                  i_pitch_prev, i_pitch_curr,
 431                                                  b_mmx,
 432                                                  &i_top_temp, &i_bot_temp );
 433                 i_score_top += i_top_temp;
 434                 i_score_bot += i_bot_temp;
 435
 436                 p_pix_p += 8;
 437                 p_pix_c += 8;
 438             }
 439         }
 440     }
 441
 442     if( pi_top )
 443         (*pi_top) = i_score_top;
 444     if( pi_bot )
 445         (*pi_bot) = i_score_bot;
 446
 447     return i_score;
 448 }
 449
 450 /* See header for function doc. */
 451 int CalculateInterlaceScore( const picture_t* p_pic_top,
 452                              const picture_t* p_pic_bot )
 453 {
 454     /*
 455         We use the comb metric from the IVTC filter of Transcode 1.1.5.
 456         This was found to work better for the particular purpose of IVTC
 457         than RenderX()'s comb metric.
 458
 459         Note that we *must not* subsample at all in order to catch interlacing
 460         in telecined frames with localized motion (e.g. anime with characters
 461         talking, where only mouths move and everything else stays still.)
 462     */
 463
 464     assert( p_pic_top != NULL );
 465     assert( p_pic_bot != NULL );
 466
 467     if( p_pic_top->i_planes != p_pic_bot->i_planes )
 468         return -1;
 469
 470     unsigned u_cpu = vlc_CPU();
 471
 472     /* Amount of bits must be known for MMX, thus int32_t.
 473        Doesn't hurt the C implementation. */
 474     int32_t i_score = 0;
 475
 476 #ifdef CAN_COMPILE_MMXEXT
 477     if( u_cpu & CPU_CAPABILITY_MMXEXT )
 478         pxor_r2r( mm7, mm7 ); /* we will keep score in mm7 */
 479 #endif
 480
 481     for( int i_plane = 0 ; i_plane < p_pic_top->i_planes ; ++i_plane )
 482     {
 483         /* Sanity check */
 484         if( p_pic_top->p[i_plane].i_visible_lines !=
 485             p_pic_bot->p[i_plane].i_visible_lines )
 486             return -1;
 487
 488         const int i_lasty = p_pic_top->p[i_plane].i_visible_lines-1;
 489         const int w = FFMIN( p_pic_top->p[i_plane].i_visible_pitch,
 490                              p_pic_bot->p[i_plane].i_visible_pitch );
 491         const int wm8 = w % 8;   /* remainder */
 492         const int w8  = w - wm8; /* part of width that is divisible by 8 */
 493
 494         /* Current line / neighbouring lines picture pointers */
 495         const picture_t *cur = p_pic_bot;
 496         const picture_t *ngh = p_pic_top;
 497         int wc = cur->p[i_plane].i_pitch;
 498         int wn = ngh->p[i_plane].i_pitch;
 499
 500         /* Transcode 1.1.5 only checks every other line. Checking every line
 501            works better for anime, which may contain horizontal,
 502            one pixel thick cartoon outlines.
 503         */
 504         for( int y = 1; y < i_lasty; ++y )
 505         {
 506             uint8_t *p_c = &cur->p[i_plane].p_pixels[y*wc];     /* this line */
 507             uint8_t *p_p = &ngh->p[i_plane].p_pixels[(y-1)*wn]; /* prev line */
 508             uint8_t *p_n = &ngh->p[i_plane].p_pixels[(y+1)*wn]; /* next line */
 509
 510             int x = 0;
 511
 512 /* Threshold (value from Transcode 1.1.5) */
 513 #define T 100
 514 #ifdef CAN_COMPILE_MMXEXT
 515             /* Easy-to-read C version further below.
 516
 517                Assumptions: 0 < T < 127
 518                             # of pixels < (2^32)/255
 519                Note: calculates score * 255
 520             */
 521             if( u_cpu & CPU_CAPABILITY_MMXEXT )
 522             {
 523                 static const mmx_t b0   = { .uq = 0x0000000000000000ULL };
 524                 static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
 525                 static const mmx_t bT   = { .ub = { T, T, T, T, T, T, T, T } };
 526
 527                 for( ; x < w8; x += 8 )
 528                 {
 529                     movq_m2r( *((int64_t*)p_c), mm0 );
 530                     movq_m2r( *((int64_t*)p_p), mm1 );
 531                     movq_m2r( *((int64_t*)p_n), mm2 );
 532
 533                     psubb_m2r( b128, mm0 );
 534                     psubb_m2r( b128, mm1 );
 535                     psubb_m2r( b128, mm2 );
 536
 537                     psubsb_r2r( mm0, mm1 );
 538                     psubsb_r2r( mm0, mm2 );
 539
 540                     pxor_r2r( mm3, mm3 );
 541                     pxor_r2r( mm4, mm4 );
 542                     pxor_r2r( mm5, mm5 );
 543                     pxor_r2r( mm6, mm6 );
 544
 545                     punpcklbw_r2r( mm1, mm3 );
 546                     punpcklbw_r2r( mm2, mm4 );
 547                     punpckhbw_r2r( mm1, mm5 );
 548                     punpckhbw_r2r( mm2, mm6 );
 549
 550                     pmulhw_r2r( mm3, mm4 );
 551                     pmulhw_r2r( mm5, mm6 );
 552
 553                     packsswb_r2r(mm4, mm6);
 554                     pcmpgtb_m2r( bT, mm6 );
 555                     psadbw_m2r( b0, mm6 );
 556                     paddd_r2r( mm6, mm7 );
 557
 558                     p_c += 8;
 559                     p_p += 8;
 560                     p_n += 8;
 561                 }
 562             }
 563 #endif
 564             for( ; x < w; ++x )
 565             {
 566                 /* Worst case: need 17 bits for "comb". */
 567                 int_fast32_t C = *p_c;
 568                 int_fast32_t P = *p_p;
 569                 int_fast32_t N = *p_n;
 570
 571                 /* Comments in Transcode's filter_ivtc.c attribute this
 572                    combing metric to Gunnar Thalin.
 573
 574                     The idea is that if the picture is interlaced, both
 575                     expressions will have the same sign, and this comes
 576                     up positive. The value T = 100 has been chosen such
 577                     that a pixel difference of 10 (on average) will
 578                     trigger the detector.
 579                 */
 580                 int_fast32_t comb = (P - C) * (N - C);
 581                 if( comb > T )
 582                     ++i_score;
 583
 584                 ++p_c;
 585                 ++p_p;
 586                 ++p_n;
 587             }
 588
 589             /* Now the other field - swap current and neighbour pictures */
 590             const picture_t *tmp = cur;
 591             cur = ngh;
 592             ngh = tmp;
 593             int tmp_pitch = wc;
 594             wc = wn;
 595             wn = tmp_pitch;
 596         }
 597     }
 598
 599 #ifdef CAN_COMPILE_MMXEXT
 600     if( u_cpu & CPU_CAPABILITY_MMXEXT )
 601     {
 602         movd_r2m( mm7, i_score );
 603         emms();
 604         i_score /= 255;
 605     }
 606 #endif
 607
 608     return i_score;
 609 }
 610 #undef T