git.sesse.net Git - x264/blob - encoder/macroblock.c

   1 /*****************************************************************************
   2  * macroblock.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27
  28 #include "../common/common.h"
  29 #include "macroblock.h"
  30
  31
  32 static const uint8_t block_idx_x[16] =
  33 {
  34     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
  35 };
  36 static const uint8_t block_idx_y[16] =
  37 {
  38     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
  39 };
  40 static const uint8_t block_idx_xy[4][4] =
  41 {
  42     { 0, 2, 8,  10},
  43     { 1, 3, 9,  11},
  44     { 4, 6, 12, 14},
  45     { 5, 7, 13, 15}
  46 };
  47
  48 static const int quant_mf[6][4][4] =
  49 {
  50     {  { 13107, 8066, 13107, 8066}, {  8066, 5243,  8066, 5243},
  51        { 13107, 8066, 13107, 8066}, {  8066, 5243,  8066, 5243}  },
  52     {  { 11916, 7490, 11916, 7490}, {  7490, 4660,  7490, 4660},
  53        { 11916, 7490, 11916, 7490}, {  7490, 4660,  7490, 4660}  },
  54     {  { 10082, 6554, 10082, 6554}, {  6554, 4194,  6554, 4194},
  55        { 10082, 6554, 10082, 6554}, {  6554, 4194,  6554, 4194}  },
  56     {  {  9362, 5825,  9362, 5825}, {  5825, 3647,  5825, 3647},
  57        {  9362, 5825,  9362, 5825}, {  5825, 3647,  5825, 3647}  },
  58     {  {  8192, 5243,  8192, 5243}, {  5243, 3355,  5243, 3355},
  59        {  8192, 5243,  8192, 5243}, {  5243, 3355,  5243, 3355}  },
  60     {  {  7282, 4559,  7282, 4559}, {  4559, 2893,  4559, 2893},
  61        {  7282, 4559,  7282, 4559}, {  4559, 2893,  4559, 2893}  }
  62 };
  63
  64 static const int i_chroma_qp_table[52] =
  65 {
  66      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
  67     10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
  68     20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
  69     29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
  70     36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
  71     39, 39
  72 };
  73
  74 /****************************************************************************
  75  * Scan and Quant functions
  76  ****************************************************************************/
  77 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
  78 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
  79
  80 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
  81 {
  82     level[0] = dct[0][0];
  83     level[1] = dct[0][1];
  84     level[2] = dct[1][0];
  85     level[3] = dct[2][0];
  86     level[4] = dct[1][1];
  87     level[5] = dct[0][2];
  88     level[6] = dct[0][3];
  89     level[7] = dct[1][2];
  90     level[8] = dct[2][1];
  91     level[9] = dct[3][0];
  92     level[10] = dct[3][1];
  93     level[11] = dct[2][2];
  94     level[12] = dct[1][3];
  95     level[13] = dct[2][3];
  96     level[14] = dct[3][2];
  97     level[15] = dct[3][3];
  98 #if 0
  99     int i;
 100     for( i = 0; i < 16; i++ )
 101     {
 102         level[i] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
 103     }
 104 #endif
 105 }
 106 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
 107 {
 108     level[0] = dct[0][1];
 109     level[1] = dct[1][0];
 110     level[2] = dct[2][0];
 111     level[3] = dct[1][1];
 112     level[4] = dct[0][2];
 113     level[5] = dct[0][3];
 114     level[6] = dct[1][2];
 115     level[7] = dct[2][1];
 116     level[8] = dct[3][0];
 117     level[9] = dct[3][1];
 118     level[10] = dct[2][2];
 119     level[11] = dct[1][3];
 120     level[12] = dct[2][3];
 121     level[13] = dct[3][2];
 122     level[14] = dct[3][3];
 123 #if 0
 124     int i;
 125     for( i = 1; i < 16; i++ )
 126     {
 127         level[i - 1] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
 128     }
 129 #endif
 130 }
 131
 132 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
 133 {
 134     level[0] = dct[0][0];
 135     level[1] = dct[0][1];
 136     level[2] = dct[1][0];
 137     level[3] = dct[1][1];
 138 }
 139
 140
 141 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
 142 {
 143     const int i_qbits = 15 + i_qscale / 6;
 144     const int i_mf = i_qscale % 6;
 145     const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 146
 147     int x,y;
 148     for( y = 0; y < 4; y++ )
 149     {
 150         for( x = 0; x < 4; x++ )
 151         {
 152             if( dct[y][x] > 0 )
 153             {
 154                 dct[y][x] =( f + dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits;
 155             }
 156             else
 157             {
 158                 dct[y][x] = - ( ( f - dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits );
 159             }
 160         }
 161     }
 162 }
 163 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
 164 {
 165     const int i_qbits = 15 + i_qscale / 6;
 166     const int f2 = ( 2 << i_qbits ) / 3;
 167     const int i_qmf = quant_mf[i_qscale%6][0][0];
 168     int x,y;
 169
 170     for( y = 0; y < 4; y++ )
 171     {
 172         for( x = 0; x < 4; x++ )
 173         {
 174             if( dct[y][x] > 0 )
 175             {
 176                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 177             }
 178             else
 179             {
 180                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 181             }
 182         }
 183     }
 184 }
 185 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
 186 {
 187     int const i_qbits = 15 + i_qscale / 6;
 188     const int f2 = ( 2 << i_qbits ) / ( b_intra ? 3 : 6 );
 189     const int i_qmf = quant_mf[i_qscale%6][0][0];
 190
 191     int x,y;
 192     for( y = 0; y < 2; y++ )
 193     {
 194         for( x = 0; x < 2; x++ )
 195         {
 196             if( dct[y][x] > 0 )
 197             {
 198                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 199             }
 200             else
 201             {
 202                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 203             }
 204         }
 205     }
 206 }
 207 #if 0
 208 /* From a JVT doc */
 209 static const int f_deadzone_intra[4][4][2] = /* [num][den] */
 210 {
 211     { {1,2}, {3,7}, {2,5}, {1,3} },
 212     { {3,7}, {2,5}, {1,3}, {1,4} },
 213     { {2,5}, {1,3}, {1,4}, {1,5} },
 214     { {1,3}, {1,4}, {1,5}, {1,5} }
 215 };
 216 static const int f_deadzone_inter[4][4][2] = /* [num][den] */
 217 {
 218     { {1,3}, {2,7}, {4,15},{2,9} },
 219     { {2,7}, {4,15},{2,9}, {1,6} },
 220     { {4,15},{2,9}, {1,6}, {1,7} },
 221     { {2,9}, {1,6}, {1,7}, {2,15} }
 222 };
 223
 224
 225 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
 226 {
 227     const int(*f_deadzone)[4][4][2] = b_intra ? &f_deadzone_intra : &f_deadzone_inter;
 228     const int i_qbits = 15 + i_qscale / 6;
 229     const int i_mf = i_qscale % 6;
 230
 231     int x,y;
 232     for( y = 0; y < 4; y++ )
 233     {
 234         for( x = 0; x < 4; x++ )
 235         {
 236 #if 0
 237             const int f = b_intra ?
 238                           (f_deadzone_intra[y][x][0] * ( 1 << i_qbits ) / f_deadzone_intra[y][x][1])
 239                           :
 240                           (f_deadzone_inter[y][x][0] * ( 1 << i_qbits ) / f_deadzone_inter[y][x][1]);
 241 #else
 242             const int f = (*f_deadzone)[y][x][0] * ( 1 << i_qbits ) / (*f_deadzone)[y][x][1];
 243 #endif
 244
 245             if( dct[y][x] > 0 )
 246             {
 247                 dct[y][x] =( f + dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits;
 248             }
 249             else
 250             {
 251                 dct[y][x] = - ( ( f - dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits );
 252             }
 253         }
 254     }
 255 }
 256
 257 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
 258 {
 259     const int i_qbits = 15 + i_qscale / 6;
 260     const int i_qmf = quant_mf[i_qscale%6][0][0];
 261     const int f2 = f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1];
 262     int x,y;
 263
 264     for( y = 0; y < 4; y++ )
 265     {
 266         for( x = 0; x < 4; x++ )
 267         {
 268
 269             if( dct[y][x] > 0 )
 270             {
 271                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 272             }
 273             else
 274             {
 275                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 276             }
 277         }
 278     }
 279 }
 280
 281 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
 282 {
 283     int const i_qbits = 15 + i_qscale / 6;
 284     const int i_qmf = quant_mf[i_qscale%6][0][0];
 285     const int f2 = b_intra ?
 286                    (f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1])
 287                    :
 288                    (f_deadzone_inter[0][0][0] * ( 2 << i_qbits ) / f_deadzone_inter[0][0][1]);
 289     int x,y;
 290     for( y = 0; y < 2; y++ )
 291     {
 292         for( x = 0; x < 2; x++ )
 293         {
 294             if( dct[y][x] > 0 )
 295             {
 296                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 297             }
 298             else
 299             {
 300                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 301             }
 302         }
 303     }
 304 }
 305
 306
 307 #endif
 308
 309 static inline int array_non_zero_count( int *v, int i_count )
 310 {
 311     int i;
 312     int i_nz;
 313
 314     for( i = 0, i_nz = 0; i < i_count; i++ )
 315     {
 316         if( v[i] )
 317         {
 318             i_nz++;
 319         }
 320     }
 321     return i_nz;
 322 }
 323
 324 /* (ref: JVT-B118)
 325  * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
 326  * to 0 (low score means set it to null)
 327  * Used in inter macroblock (luma and chroma)
 328  *  luma: for a 8x8 block: if score < 4 -> null
 329  *        for the complete mb: if score < 6 -> null
 330  *  chroma: for the complete mb: if score < 7 -> null
 331  */
 332 static int x264_mb_decimate_score( int *dct, int i_max )
 333 {
 334     static const int i_ds_table[16] = { 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 335
 336     int i_score = 0;
 337     int idx = i_max - 1;
 338
 339     while( idx >= 0 && dct[idx] == 0 )
 340     {
 341         idx--;
 342     }
 343
 344     while( idx >= 0 )
 345     {
 346         int i_run;
 347
 348         if( abs( dct[idx--] ) > 1 )
 349         {
 350             return 9;
 351         }
 352
 353         i_run = 0;
 354         while( idx >= 0 && dct[idx] == 0 )
 355         {
 356             idx--;
 357             i_run++;
 358         }
 359         i_score += i_ds_table[i_run];
 360     }
 361
 362     return i_score;
 363 }
 364
 365 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
 366 {
 367     const int i_stride = h->mb.pic.i_stride[0];
 368     uint8_t  *p_src = &h->mb.pic.p_fenc[0][4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride];
 369     uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride];
 370
 371     int16_t dct4x4[4][4];
 372
 373     h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 374
 375     quant_4x4( dct4x4, i_qscale, 1 );
 376
 377     scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
 378
 379     x264_mb_dequant_4x4( dct4x4, i_qscale );
 380
 381     /* output samples to fdec */
 382     h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
 383 }
 384
 385 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
 386 {
 387     const int i_stride = h->mb.pic.i_stride[0];
 388     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 389     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 390
 391     int16_t dct4x4[16+1][4][4];
 392
 393     int i;
 394
 395     h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
 396     for( i = 0; i < 16; i++ )
 397     {
 398         /* copy dc coeff */
 399         dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
 400
 401         /* quant/scan/dequant */
 402         quant_4x4( dct4x4[1+i], i_qscale, 1 );
 403         scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
 404         x264_mb_dequant_4x4( dct4x4[1+i], i_qscale );
 405     }
 406
 407     h->dctf.dct4x4dc( dct4x4[0] );
 408     quant_4x4_dc( dct4x4[0], i_qscale );
 409     scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
 410
 411     /* output samples to fdec */
 412     h->dctf.idct4x4dc( dct4x4[0] );
 413     x264_mb_dequant_4x4_dc( dct4x4[0], i_qscale );  /* XXX not inversed */
 414
 415     /* calculate dct coeffs */
 416     for( i = 0; i < 16; i++ )
 417     {
 418         /* copy dc coeff */
 419         dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
 420     }
 421     /* put pixels to fdec */
 422     h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
 423 }
 424
 425 static void x264_mb_encode_8x8( x264_t *h, int b_inter, int i_qscale )
 426 {
 427     int i, ch;
 428
 429     for( ch = 0; ch < 2; ch++ )
 430     {
 431         const int i_stride = h->mb.pic.i_stride[1+ch];
 432         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 433         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 434         int i_decimate_score = 0;
 435
 436         int16_t dct2x2[2][2];
 437         int16_t dct4x4[4][4][4];
 438
 439         h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 440         /* calculate dct coeffs */
 441         for( i = 0; i < 4; i++ )
 442         {
 443             /* copy dc coeff */
 444             dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
 445
 446             quant_4x4( dct4x4[i], i_qscale, b_inter ? 0 : 1 );
 447             scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
 448             x264_mb_dequant_4x4( dct4x4[i], i_qscale );
 449
 450             if( b_inter )
 451             {
 452                 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
 453             }
 454         }
 455
 456         h->dctf.dct2x2dc( dct2x2 );
 457         quant_2x2_dc( dct2x2, i_qscale, b_inter ? 0 : 1 );
 458         scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
 459
 460         /* output samples to fdec */
 461         h->dctf.idct2x2dc( dct2x2 );
 462         x264_mb_dequant_2x2_dc( dct2x2, i_qscale );  /* XXX not inversed */
 463
 464         if( b_inter && i_decimate_score < 7 )
 465         {
 466             /* Near null chroma 8x8 block so make it null (bits saving) */
 467             for( i = 0; i < 4; i++ )
 468             {
 469                 int x, y;
 470                 for( x = 0; x < 15; x++ )
 471                 {
 472                     h->dct.block[16+i+ch*4].residual_ac[x] = 0;
 473                 }
 474                 for( x = 0; x < 4; x++ )
 475                 {
 476                     for( y = 0; y < 4; y++ )
 477                     {
 478                         dct4x4[i][x][y] = 0;
 479                     }
 480                 }
 481             }
 482         }
 483
 484         /* calculate dct coeffs */
 485         for( i = 0; i < 4; i++ )
 486         {
 487             /* copy dc coeff */
 488             dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
 489         }
 490         h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
 491     }
 492 }
 493
 494 static void x264_macroblock_encode_skip( x264_t *h )
 495 {
 496     int i;
 497     h->mb.i_cbp_luma = 0x00;
 498     h->mb.i_cbp_chroma = 0x00;
 499
 500     for( i = 0; i < 16+8; i++ )
 501     {
 502         h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
 503     }
 504
 505     /* store cbp */
 506     h->mb.cbp[h->mb.i_mb_xy] = 0;
 507 }
 508
 509 /*****************************************************************************
 510  * x264_macroblock_encode_pskip:
 511  *  Encode an already marked skip block
 512  *****************************************************************************/
 513 void x264_macroblock_encode_pskip( x264_t *h )
 514 {
 515     const int mvx = h->mb.cache.mv[0][x264_scan8[0]][0];
 516     const int mvy = h->mb.cache.mv[0][x264_scan8[0]][1];
 517
 518     /* Motion compensation XXX probably unneeded */
 519     h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
 520                     h->mb.pic.p_fdec[0],       h->mb.pic.i_stride[0],
 521                     mvx, mvy, 16, 16 );
 522
 523     /* Chroma MC */
 524     h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1], h->mb.pic.i_stride[1],
 525                       h->mb.pic.p_fdec[1],       h->mb.pic.i_stride[1],
 526                       mvx, mvy, 8, 8 );
 527
 528     h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][2], h->mb.pic.i_stride[2],
 529                       h->mb.pic.p_fdec[2],       h->mb.pic.i_stride[2],
 530                       mvx, mvy, 8, 8 );
 531
 532     x264_macroblock_encode_skip( h );
 533 }
 534
 535 /*****************************************************************************
 536  * x264_macroblock_encode:
 537  *****************************************************************************/
 538 void x264_macroblock_encode( x264_t *h )
 539 {
 540     int i_cbp_dc = 0;
 541     int i_qscale;
 542     int i;
 543
 544     if( h->mb.i_type == P_SKIP )
 545     {
 546         /* A bit special */
 547         x264_macroblock_encode_pskip( h );
 548         return;
 549     }
 550     if( h->mb.i_type == B_SKIP )
 551     {
 552         /* XXX motion compensation is probably unneeded */
 553         x264_mb_mc( h );
 554         x264_macroblock_encode_skip( h );
 555         return;
 556     }
 557
 558     /* quantification scale */
 559     i_qscale = h->mb.qp[h->mb.i_mb_xy];
 560
 561     if( h->mb.i_type == I_16x16 )
 562     {
 563         const int i_mode = h->mb.i_intra16x16_pred_mode;
 564         /* do the right prediction */
 565         h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 566
 567         /* encode the 16x16 macroblock */
 568         x264_mb_encode_i16x16( h, i_qscale );
 569
 570         /* fix the pred mode value */
 571         h->mb.i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix[i_mode];
 572     }
 573     else if( h->mb.i_type == I_4x4 )
 574     {
 575         for( i = 0; i < 16; i++ )
 576         {
 577             const int i_dst = h->mb.pic.i_stride[0];
 578             uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
 579             int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
 580
 581             /* Do the right prediction */
 582             h->predict_4x4[i_mode]( p_dst, i_dst );
 583
 584             /* encode one 4x4 block */
 585             x264_mb_encode_i4x4( h, i, i_qscale );
 586
 587             /* fix the pred mode value */
 588             h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix[i_mode];
 589         }
 590     }
 591     else    /* Inter MB */
 592     {
 593         int16_t dct4x4[16][4][4];
 594
 595         int i8x8, i4x4, idx;
 596         int i_decimate_mb = 0;
 597
 598         /* Motion compensation */
 599         x264_mb_mc( h );
 600
 601         h->dctf.sub16x16_dct( dct4x4,
 602                               h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 603                               h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 604
 605         for( i8x8 = 0; i8x8 < 4; i8x8++ )
 606         {
 607             int i_decimate_8x8;
 608
 609             /* encode one 4x4 block */
 610             i_decimate_8x8 = 0;
 611             for( i4x4 = 0; i4x4 < 4; i4x4++ )
 612             {
 613                 idx = i8x8 * 4 + i4x4;
 614
 615                 quant_4x4( dct4x4[idx], i_qscale, 0 );
 616                 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
 617                 x264_mb_dequant_4x4( dct4x4[idx], i_qscale );
 618
 619                 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
 620             }
 621
 622             /* decimate this 8x8 block */
 623             i_decimate_mb += i_decimate_8x8;
 624             if( i_decimate_8x8 < 4 )
 625             {
 626                 for( i4x4 = 0; i4x4 < 4; i4x4++ )
 627                 {
 628                     int x, y;
 629                     idx = i8x8 * 4 + i4x4;
 630                     for( i = 0; i < 16; i++ )
 631                     {
 632                         h->dct.block[idx].luma4x4[i] = 0;
 633                     }
 634                     for( x = 0; x < 4; x++ )
 635                     {
 636                         for( y = 0; y < 4; y++ )
 637                         {
 638                             dct4x4[idx][x][y] = 0;
 639                         }
 640                     }
 641                 }
 642             }
 643         }
 644
 645         if( i_decimate_mb < 6 )
 646         {
 647             for( idx = 0; idx < 16; idx++ )
 648             {
 649                 for( i = 0; i < 16; i++ )
 650                 {
 651                     h->dct.block[idx].luma4x4[i] = 0;
 652                 }
 653             }
 654         }
 655         else
 656         {
 657             h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
 658         }
 659     }
 660
 661     /* encode chroma */
 662     i_qscale = i_chroma_qp_table[x264_clip3( i_qscale + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 663     if( IS_INTRA( h->mb.i_type ) )
 664     {
 665         const int i_mode = h->mb.i_chroma_pred_mode;
 666         /* do the right prediction */
 667         h->predict_8x8[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
 668         h->predict_8x8[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
 669
 670         /* fix the pred mode value */
 671         h->mb.i_chroma_pred_mode = x264_mb_pred_mode8x8_fix[i_mode];
 672     }
 673
 674     /* encode the 8x8 blocks */
 675     x264_mb_encode_8x8( h, !IS_INTRA( h->mb.i_type ), i_qscale );
 676
 677     /* Calculate the Luma/Chroma patern and non_zero_count */
 678     if( h->mb.i_type == I_16x16 )
 679     {
 680         h->mb.i_cbp_luma = 0x00;
 681         for( i = 0; i < 16; i++ )
 682         {
 683             const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
 684             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 685             if( nz > 0 )
 686             {
 687                 h->mb.i_cbp_luma = 0x0f;
 688             }
 689         }
 690     }
 691     else
 692     {
 693         h->mb.i_cbp_luma = 0x00;
 694         for( i = 0; i < 16; i++ )
 695         {
 696             const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
 697             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 698             if( nz > 0 )
 699             {
 700                 h->mb.i_cbp_luma |= 1 << (i/4);
 701             }
 702         }
 703     }
 704
 705     /* Calculate the chroma patern */
 706     h->mb.i_cbp_chroma = 0x00;
 707     for( i = 0; i < 8; i++ )
 708     {
 709         const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
 710         h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
 711         if( nz > 0 )
 712         {
 713             h->mb.i_cbp_chroma = 0x02;    /* dc+ac (we can't do only ac) */
 714         }
 715     }
 716     if( h->mb.i_cbp_chroma == 0x00 &&
 717         ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
 718     {
 719         h->mb.i_cbp_chroma = 0x01;    /* dc only */
 720     }
 721
 722     if( h->param.b_cabac )
 723     {
 724         if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
 725             i_cbp_dc = 0x01;
 726         else
 727             i_cbp_dc = 0x00;
 728
 729         if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
 730             i_cbp_dc |= 0x02;
 731         if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
 732             i_cbp_dc |= 0x04;
 733     }
 734
 735     /* store cbp */
 736     h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
 737
 738     if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
 739     {
 740         /* It won'y change anything at the decoder side but it is needed else the
 741          * decoder will fail to read the next QP */
 742         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;
 743     }
 744
 745
 746     /* Check for P_SKIP
 747      * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
 748      *      (if multiple mv give same result)*/
 749     if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
 750         h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
 751     {
 752         if( h->mb.cache.ref[0][x264_scan8[0]] == 0 )
 753         {
 754             int mvp[2];
 755
 756             x264_mb_predict_mv_pskip( h, mvp );
 757             if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
 758                 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
 759             {
 760                 h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = P_SKIP;
 761                 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;  /* Needed */
 762             }
 763         }
 764     }
 765
 766     /* Check for B_SKIP */
 767     if( h->mb.i_type == B_DIRECT &&
 768         h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
 769     {
 770         h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = B_SKIP;
 771         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;  /* Needed */
 772     }
 773 }
 774
 775 /*****************************************************************************
 776  * x264_macroblock_probe_skip:
 777  *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
 778  *  the previous QP
 779  *****************************************************************************/
 780 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
 781 {
 782     DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
 783     DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
 784     DECLARE_ALIGNED( int,     dctscan[16], 16 );
 785
 786     int i_qp;
 787     int mvp[2];
 788     int ch;
 789     int n;
 790
 791     int i8x8, i4x4;
 792     int i_decimate_mb;
 793
 794     /* quantization scale */
 795     i_qp = h->mb.qp[h->mb.i_mb_xy];
 796
 797     if( !b_bidir )
 798     {
 799         /* Get the MV */
 800         x264_mb_predict_mv_pskip( h, mvp );
 801
 802         /* Special case, need to clip the vector */
 803         n = 16 * h->mb.i_mb_x + mvp[0];
 804         if( n < -24 )
 805             mvp[0] = -24 - 16*h->mb.i_mb_x;
 806         else if( n > 16 * h->sps->i_mb_width + 24 )
 807             mvp[0] = 16 * ( h->sps->i_mb_width - h->mb.i_mb_x ) + 24;
 808
 809         n = 16 * h->mb.i_mb_y + mvp[1];
 810         if( n < -24 )
 811             mvp[1] = -24 - 16*h->mb.i_mb_y;
 812         else if( n > 16 * h->sps->i_mb_height + 8 )
 813             mvp[1] = 16 * ( h->sps->i_mb_height - h->mb.i_mb_y ) + 8;
 814
 815
 816         /* Motion compensation */
 817         h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
 818                         h->mb.pic.p_fdec[0],       h->mb.pic.i_stride[0],
 819                         mvp[0], mvp[1], 16, 16 );
 820     }
 821
 822     /* get luma diff */
 823     h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 824                                   h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 825
 826     for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
 827     {
 828         /* encode one 4x4 block */
 829         for( i4x4 = 0; i4x4 < 4; i4x4++ )
 830         {
 831             const int idx = i8x8 * 4 + i4x4;
 832
 833             quant_4x4( dct4x4[idx], i_qp, 0 );
 834             scan_zigzag_4x4full( dctscan, dct4x4[idx] );
 835
 836             i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
 837
 838             if( i_decimate_mb >= 6 )
 839             {
 840                 /* not as P_SKIP */
 841                 return 0;
 842             }
 843         }
 844     }
 845
 846     /* encode chroma */
 847     i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 848
 849     for( ch = 0; ch < 2; ch++ )
 850     {
 851         const int i_stride = h->mb.pic.i_stride[1+ch];
 852         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 853         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 854
 855         if( !b_bidir )
 856         {
 857             h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride,
 858                               h->mb.pic.p_fdec[1+ch],       i_stride,
 859                               mvp[0], mvp[1], 8, 8 );
 860         }
 861
 862         h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 863
 864         /* calculate dct DC */
 865         dct2x2[0][0] = dct4x4[0][0][0];
 866         dct2x2[0][1] = dct4x4[1][0][0];
 867         dct2x2[1][0] = dct4x4[2][0][0];
 868         dct2x2[1][1] = dct4x4[3][0][0];
 869         h->dctf.dct2x2dc( dct2x2 );
 870         quant_2x2_dc( dct2x2, i_qp, 0 );
 871         if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1]  )
 872         {
 873             /* can't be */
 874             return 0;
 875         }
 876
 877         /* calculate dct coeffs */
 878         for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
 879         {
 880             quant_4x4( dct4x4[i4x4], i_qp, 0 );
 881             scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
 882
 883             i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
 884             if( i_decimate_mb >= 7 )
 885             {
 886                 return 0;
 887             }
 888         }
 889     }
 890
 891     return 1;
 892 }