git.sesse.net Git - x264/blob - encoder/macroblock.c

   1 /*****************************************************************************
   2  * macroblock.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdio.h>
  25 #include <string.h>
  26
  27 #include "common/common.h"
  28 #include "macroblock.h"
  29
  30
  31 #define ZIG(i,y,x) level[i] = dct[x][y];
  32 static inline void zigzag_scan_2x2_dc( int level[4], int16_t dct[2][2] )
  33 {
  34     ZIG(0,0,0)
  35     ZIG(1,0,1)
  36     ZIG(2,1,0)
  37     ZIG(3,1,1)
  38 }
  39 #undef ZIG
  40
  41 /* (ref: JVT-B118)
  42  * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
  43  * to 0 (low score means set it to null)
  44  * Used in inter macroblock (luma and chroma)
  45  *  luma: for a 8x8 block: if score < 4 -> null
  46  *        for the complete mb: if score < 6 -> null
  47  *  chroma: for the complete mb: if score < 7 -> null
  48  */
  49 static int x264_mb_decimate_score( int *dct, int i_max )
  50 {
  51     static const int i_ds_table4[16] = {
  52         3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
  53     static const int i_ds_table8[64] = {
  54         3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
  55         1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
  56         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  57         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
  58
  59     const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
  60     int i_score = 0;
  61     int idx = i_max - 1;
  62
  63     while( idx >= 0 && dct[idx] == 0 )
  64         idx--;
  65
  66     while( idx >= 0 )
  67     {
  68         int i_run;
  69
  70         if( abs( dct[idx--] ) > 1 )
  71             return 9;
  72
  73         i_run = 0;
  74         while( idx >= 0 && dct[idx] == 0 )
  75         {
  76             idx--;
  77             i_run++;
  78         }
  79         i_score += ds_table[i_run];
  80     }
  81
  82     return i_score;
  83 }
  84
  85 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
  86 {
  87     int x = 4 * block_idx_x[idx];
  88     int y = 4 * block_idx_y[idx];
  89     uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
  90     uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
  91     DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 );
  92
  93     if( h->mb.b_lossless )
  94     {
  95         h->zigzagf.sub_4x4( h->dct.block[idx].luma4x4, p_src, p_dst );
  96         return;
  97     }
  98
  99     h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
 100
 101     if( h->mb.b_trellis )
 102         x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 );
 103     else
 104         h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
 105
 106     h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4 );
 107     h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
 108
 109     /* output samples to fdec */
 110     h->dctf.add4x4_idct( p_dst, dct4x4 );
 111 }
 112
 113 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
 114 {
 115     int x = 8 * (idx&1);
 116     int y = 8 * (idx>>1);
 117     uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
 118     uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
 119     DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 );
 120
 121     h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
 122
 123     if( h->mb.b_trellis )
 124         x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 );
 125     else
 126         h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8IY][i_qscale], h->quant8_bias[CQM_8IY][i_qscale] );
 127
 128     h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
 129     h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
 130     h->dctf.add8x8_idct8( p_dst, dct8x8 );
 131 }
 132
 133 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
 134 {
 135     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 136     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 137
 138     DECLARE_ALIGNED( int16_t, dct4x4[16+1][4][4], 16 );
 139
 140     int i;
 141
 142     if( h->mb.b_lossless )
 143     {
 144         for( i = 0; i < 16; i++ )
 145         {
 146             int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
 147             int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
 148             h->zigzagf.sub_4x4ac( h->dct.block[i].residual_ac, p_src+oe, p_dst+od );
 149             dct4x4[0][block_idx_x[i]][block_idx_y[i]] = p_src[oe] - p_dst[od];
 150             p_dst[od] = p_src[oe];
 151         }
 152         h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct4x4[0] );
 153         return;
 154     }
 155
 156     h->dctf.sub16x16_dct( &dct4x4[1], p_src, p_dst );
 157     for( i = 0; i < 16; i++ )
 158     {
 159         /* copy dc coeff */
 160         dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
 161
 162         /* quant/scan/dequant */
 163         if( h->mb.b_trellis )
 164             x264_quant_4x4_trellis( h, dct4x4[1+i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 );
 165         else
 166             h->quantf.quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
 167
 168         h->zigzagf.scan_4x4ac( h->dct.block[i].residual_ac, dct4x4[1+i] );
 169         h->quantf.dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
 170     }
 171
 172     h->dctf.dct4x4dc( dct4x4[0] );
 173     h->quantf.quant_4x4_dc( dct4x4[0], h->quant4_mf[CQM_4IY][i_qscale][0]>>1, h->quant4_bias[CQM_4IY][i_qscale][0]<<1 );
 174     h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct4x4[0] );
 175
 176     /* output samples to fdec */
 177     h->dctf.idct4x4dc( dct4x4[0] );
 178     x264_mb_dequant_4x4_dc( dct4x4[0], h->dequant4_mf[CQM_4IY], i_qscale );  /* XXX not inversed */
 179
 180     /* calculate dct coeffs */
 181     for( i = 0; i < 16; i++ )
 182     {
 183         /* copy dc coeff */
 184         dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
 185     }
 186     /* put pixels to fdec */
 187     h->dctf.add16x16_idct( p_dst, &dct4x4[1] );
 188 }
 189
 190 void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
 191 {
 192     int i, ch;
 193     int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
 194
 195     for( ch = 0; ch < 2; ch++ )
 196     {
 197         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 198         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 199         int i_decimate_score = 0;
 200
 201         DECLARE_ALIGNED( int16_t, dct2x2[2][2] , 16 );
 202         DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 );
 203
 204         if( h->mb.b_lossless )
 205         {
 206             for( i = 0; i < 4; i++ )
 207             {
 208                 int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
 209                 int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
 210                 h->zigzagf.sub_4x4ac( h->dct.block[16+i+ch*4].residual_ac, p_src+oe, p_dst+od );
 211                 h->dct.chroma_dc[ch][i] = p_src[oe] - p_dst[od];
 212                 p_dst[od] = p_src[oe];
 213             }
 214             continue;
 215         }
 216
 217         h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
 218         /* calculate dct coeffs */
 219         for( i = 0; i < 4; i++ )
 220         {
 221             /* copy dc coeff */
 222             dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
 223
 224             /* no trellis; it doesn't seem to help chroma noticeably */
 225             h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qscale], h->quant4_bias[CQM_4IC+b_inter][i_qscale] );
 226             h->zigzagf.scan_4x4ac( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
 227
 228             if( b_decimate )
 229             {
 230                 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
 231             }
 232         }
 233
 234         h->dctf.dct2x2dc( dct2x2 );
 235         h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qscale][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qscale][0]<<1 );
 236         zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
 237
 238         /* output samples to fdec */
 239         h->dctf.idct2x2dc( dct2x2 );
 240         x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale );  /* XXX not inversed */
 241
 242         if( b_decimate && i_decimate_score < 7 )
 243         {
 244             /* Near null chroma 8x8 block so make it null (bits saving) */
 245             memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) );
 246             if( !array_non_zero( dct2x2 ) )
 247                 continue;
 248             memset( dct4x4, 0, sizeof( dct4x4 ) );
 249         }
 250         else
 251         {
 252             for( i = 0; i < 4; i++ )
 253                 h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
 254         }
 255
 256         for( i = 0; i < 4; i++ )
 257             dct4x4[i][0][0] = dct2x2[0][i];
 258         h->dctf.add8x8_idct( p_dst, dct4x4 );
 259     }
 260
 261     /* coded block pattern */
 262     h->mb.i_cbp_chroma = 0;
 263     for( i = 0; i < 8; i++ )
 264     {
 265         int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
 266         h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
 267         h->mb.i_cbp_chroma |= nz;
 268     }
 269     if( h->mb.i_cbp_chroma )
 270         h->mb.i_cbp_chroma = 2;    /* dc+ac (we can't do only ac) */
 271     else if( array_non_zero( h->dct.chroma_dc ) )
 272         h->mb.i_cbp_chroma = 1;    /* dc only */
 273 }
 274
 275 static void x264_macroblock_encode_skip( x264_t *h )
 276 {
 277     int i;
 278     h->mb.i_cbp_luma = 0x00;
 279     h->mb.i_cbp_chroma = 0x00;
 280
 281     for( i = 0; i < 16+8; i++ )
 282     {
 283         h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
 284     }
 285
 286     /* store cbp */
 287     h->mb.cbp[h->mb.i_mb_xy] = 0;
 288 }
 289
 290 /*****************************************************************************
 291  * x264_macroblock_encode_pskip:
 292  *  Encode an already marked skip block
 293  *****************************************************************************/
 294 void x264_macroblock_encode_pskip( x264_t *h )
 295 {
 296     const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
 297                                 h->mb.mv_min[0], h->mb.mv_max[0] );
 298     const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
 299                                 h->mb.mv_min[1], h->mb.mv_max[1] );
 300
 301     /* Motion compensation XXX probably unneeded */
 302     h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
 303                    h->mb.pic.p_fdec[0],    FDEC_STRIDE,
 304                    mvx, mvy, 16, 16 );
 305
 306     /* Chroma MC */
 307     h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
 308                      h->mb.pic.p_fdec[1],       FDEC_STRIDE,
 309                      mvx, mvy, 8, 8 );
 310
 311     h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
 312                      h->mb.pic.p_fdec[2],       FDEC_STRIDE,
 313                      mvx, mvy, 8, 8 );
 314
 315     x264_macroblock_encode_skip( h );
 316 }
 317
 318 /*****************************************************************************
 319  * x264_macroblock_encode:
 320  *****************************************************************************/
 321 void x264_macroblock_encode( x264_t *h )
 322 {
 323     int i_cbp_dc = 0;
 324     int i_qp = h->mb.i_qp;
 325     int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
 326     int b_force_no_skip = 0;
 327     int i;
 328
 329     if( h->sh.b_mbaff
 330         && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
 331         && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
 332     {
 333         /* The first skip is predicted to be a frame mb pair.
 334          * We don't yet support the aff part of mbaff, so force it to non-skip
 335          * so that we can pick the aff flag. */
 336         b_force_no_skip = 1;
 337         if( IS_SKIP(h->mb.i_type) )
 338         {
 339             if( h->mb.i_type == P_SKIP )
 340                 h->mb.i_type = P_L0;
 341             else if( h->mb.i_type == B_SKIP )
 342                 h->mb.i_type = B_DIRECT;
 343         }
 344     }
 345
 346     if( h->mb.i_type == P_SKIP )
 347     {
 348         /* A bit special */
 349         x264_macroblock_encode_pskip( h );
 350         return;
 351     }
 352     if( h->mb.i_type == B_SKIP )
 353     {
 354         /* XXX motion compensation is probably unneeded */
 355         x264_mb_mc( h );
 356         x264_macroblock_encode_skip( h );
 357         return;
 358     }
 359
 360     if( h->mb.i_type == I_16x16 )
 361     {
 362         const int i_mode = h->mb.i_intra16x16_pred_mode;
 363         h->mb.b_transform_8x8 = 0;
 364         /* do the right prediction */
 365         h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
 366
 367         /* encode the 16x16 macroblock */
 368         x264_mb_encode_i16x16( h, i_qp );
 369     }
 370     else if( h->mb.i_type == I_8x8 )
 371     {
 372         DECLARE_ALIGNED( uint8_t, edge[33], 8 );
 373         h->mb.b_transform_8x8 = 1;
 374         for( i = 0; i < 4; i++ )
 375         {
 376             uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
 377             int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
 378
 379             x264_predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
 380             h->predict_8x8[i_mode]( p_dst, edge );
 381             x264_mb_encode_i8x8( h, i, i_qp );
 382         }
 383     }
 384     else if( h->mb.i_type == I_4x4 )
 385     {
 386         h->mb.b_transform_8x8 = 0;
 387         for( i = 0; i < 16; i++ )
 388         {
 389             uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * FDEC_STRIDE];
 390             int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
 391
 392             if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
 393                 /* emulate missing topright samples */
 394                 *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
 395
 396             h->predict_4x4[i_mode]( p_dst );
 397             x264_mb_encode_i4x4( h, i, i_qp );
 398         }
 399     }
 400     else    /* Inter MB */
 401     {
 402         int i8x8, i4x4, idx;
 403         int i_decimate_mb = 0;
 404
 405         /* Motion compensation */
 406         x264_mb_mc( h );
 407
 408         if( h->mb.b_lossless )
 409         {
 410             for( i4x4 = 0; i4x4 < 16; i4x4++ )
 411             {
 412                 int x = 4*block_idx_x[i4x4];
 413                 int y = 4*block_idx_y[i4x4];
 414                 h->zigzagf.sub_4x4( h->dct.block[i4x4].luma4x4,
 415                                     h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
 416                                     h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
 417             }
 418         }
 419         else if( h->mb.b_transform_8x8 )
 420         {
 421             DECLARE_ALIGNED( int16_t, dct8x8[4][8][8], 16 );
 422             int nnz8x8[4] = {1,1,1,1};
 423             b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
 424             h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
 425
 426             for( idx = 0; idx < 4; idx++ )
 427             {
 428                 if( h->mb.b_noise_reduction )
 429                     x264_denoise_dct( h, (int16_t*)dct8x8[idx] );
 430                 if( h->mb.b_trellis )
 431                     x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 );
 432                 else
 433                     h->quantf.quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] );
 434
 435                 h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
 436
 437                 if( b_decimate )
 438                 {
 439                     int i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
 440                     i_decimate_mb += i_decimate_8x8;
 441                     if( i_decimate_8x8 < 4 )
 442                     {
 443                         memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
 444                         memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
 445                         nnz8x8[idx] = 0;
 446                     }
 447                 }
 448                 else
 449                     nnz8x8[idx] = array_non_zero( dct8x8[idx] );
 450             }
 451
 452             if( i_decimate_mb < 6 && b_decimate )
 453                 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
 454             else
 455             {
 456                 for( idx = 0; idx < 4; idx++ )
 457                     if( nnz8x8[idx] )
 458                     {
 459                         h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
 460                         h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*FDEC_STRIDE], dct8x8[idx] );
 461                     }
 462             }
 463         }
 464         else
 465         {
 466             DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
 467             int nnz8x8[4] = {1,1,1,1};
 468             h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
 469
 470             for( i8x8 = 0; i8x8 < 4; i8x8++ )
 471             {
 472                 int i_decimate_8x8;
 473
 474                 /* encode one 4x4 block */
 475                 i_decimate_8x8 = 0;
 476                 for( i4x4 = 0; i4x4 < 4; i4x4++ )
 477                 {
 478                     idx = i8x8 * 4 + i4x4;
 479
 480                     if( h->mb.b_noise_reduction )
 481                         x264_denoise_dct( h, (int16_t*)dct4x4[idx] );
 482                     if( h->mb.b_trellis )
 483                         x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 );
 484                     else
 485                         h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
 486
 487                     h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4[idx] );
 488
 489                     if( b_decimate )
 490                         i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
 491                 }
 492
 493                 /* decimate this 8x8 block */
 494                 i_decimate_mb += i_decimate_8x8;
 495                 if( i_decimate_8x8 < 4 && b_decimate )
 496                 {
 497                     memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) );
 498                     memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) );
 499                     nnz8x8[i8x8] = 0;
 500                 }
 501             }
 502
 503             if( i_decimate_mb < 6 && b_decimate )
 504                 memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) );
 505             else
 506             {
 507                 for( i8x8 = 0; i8x8 < 4; i8x8++ )
 508                     if( nnz8x8[i8x8] )
 509                     {
 510                         for( i = 0; i < 4; i++ )
 511                             h->quantf.dequant_4x4( dct4x4[i8x8*4+i], h->dequant4_mf[CQM_4PY], i_qp );
 512                         h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
 513                     }
 514             }
 515         }
 516     }
 517
 518     /* encode chroma */
 519     if( IS_INTRA( h->mb.i_type ) )
 520     {
 521         const int i_mode = h->mb.i_chroma_pred_mode;
 522         h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
 523         h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
 524     }
 525
 526     /* encode the 8x8 blocks */
 527     x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
 528
 529     /* coded block pattern and non_zero_count */
 530     h->mb.i_cbp_luma = 0x00;
 531     if( h->mb.i_type == I_16x16 )
 532     {
 533         for( i = 0; i < 16; i++ )
 534         {
 535             const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
 536             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 537             if( nz > 0 )
 538                 h->mb.i_cbp_luma = 0x0f;
 539         }
 540     }
 541     else if( h->mb.b_transform_8x8 )
 542     {
 543         /* coded_block_flag is enough for CABAC.
 544          * the full non_zero_count is done only in CAVLC. */
 545         for( i = 0; i < 4; i++ )
 546         {
 547             const int nz = array_non_zero( h->dct.luma8x8[i] );
 548             int j;
 549             for( j = 0; j < 4; j++ )
 550                 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
 551             if( nz > 0 )
 552                 h->mb.i_cbp_luma |= 1 << i;
 553         }
 554     }
 555     else
 556     {
 557         for( i = 0; i < 16; i++ )
 558         {
 559             const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
 560             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 561             if( nz > 0 )
 562                 h->mb.i_cbp_luma |= 1 << (i/4);
 563         }
 564     }
 565
 566     if( h->param.b_cabac )
 567     {
 568         i_cbp_dc = ( h->mb.i_type == I_16x16 && array_non_zero( h->dct.luma16x16_dc ) )
 569                  | array_non_zero( h->dct.chroma_dc[0] ) << 1
 570                  | array_non_zero( h->dct.chroma_dc[1] ) << 2;
 571     }
 572
 573     /* store cbp */
 574     h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
 575
 576     /* Check for P_SKIP
 577      * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
 578      *      (if multiple mv give same result)*/
 579     if( !b_force_no_skip )
 580     {
 581         if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
 582             h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma == 0x00 &&
 583             h->mb.cache.mv[0][x264_scan8[0]][0] == h->mb.cache.pskip_mv[0] &&
 584             h->mb.cache.mv[0][x264_scan8[0]][1] == h->mb.cache.pskip_mv[1] &&
 585             h->mb.cache.ref[0][x264_scan8[0]] == 0 )
 586         {
 587             h->mb.i_type = P_SKIP;
 588         }
 589
 590         /* Check for B_SKIP */
 591         if( h->mb.i_type == B_DIRECT &&
 592             h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
 593         {
 594             h->mb.i_type = B_SKIP;
 595         }
 596     }
 597 }
 598
 599 /*****************************************************************************
 600  * x264_macroblock_probe_skip:
 601  *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
 602  *  the previous QP
 603  *****************************************************************************/
 604 int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
 605 {
 606     DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
 607     DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
 608     DECLARE_ALIGNED( int,     dctscan[16], 16 );
 609
 610     int i_qp = h->mb.i_qp;
 611     int mvp[2];
 612     int ch;
 613
 614     int i8x8, i4x4;
 615     int i_decimate_mb;
 616
 617     if( !b_bidir )
 618     {
 619         /* Get the MV */
 620         mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
 621         mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
 622
 623         /* Motion compensation */
 624         h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
 625                        h->mb.pic.p_fdec[0],    FDEC_STRIDE,
 626                        mvp[0], mvp[1], 16, 16 );
 627     }
 628
 629     /* get luma diff */
 630     h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0],
 631                                   h->mb.pic.p_fdec[0] );
 632
 633     for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
 634     {
 635         /* encode one 4x4 block */
 636         for( i4x4 = 0; i4x4 < 4; i4x4++ )
 637         {
 638             const int idx = i8x8 * 4 + i4x4;
 639
 640             h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
 641             h->zigzagf.scan_4x4( dctscan, dct4x4[idx] );
 642
 643             i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
 644
 645             if( i_decimate_mb >= 6 )
 646             {
 647                 /* not as P_SKIP */
 648                 return 0;
 649             }
 650         }
 651     }
 652
 653     /* encode chroma */
 654     i_qp = h->mb.i_chroma_qp;
 655
 656     for( ch = 0; ch < 2; ch++ )
 657     {
 658         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 659         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 660
 661         if( !b_bidir )
 662         {
 663             h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
 664                              h->mb.pic.p_fdec[1+ch],       FDEC_STRIDE,
 665                              mvp[0], mvp[1], 8, 8 );
 666         }
 667
 668         h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
 669
 670         /* calculate dct DC */
 671         dct2x2[0][0] = dct4x4[0][0][0];
 672         dct2x2[0][1] = dct4x4[1][0][0];
 673         dct2x2[1][0] = dct4x4[2][0][0];
 674         dct2x2[1][1] = dct4x4[3][0][0];
 675         h->dctf.dct2x2dc( dct2x2 );
 676         h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 );
 677         if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1]  )
 678         {
 679             /* can't be */
 680             return 0;
 681         }
 682
 683         /* calculate dct coeffs */
 684         for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
 685         {
 686             h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
 687             h->zigzagf.scan_4x4ac( dctscan, dct4x4[i4x4] );
 688
 689             i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
 690             if( i_decimate_mb >= 7 )
 691             {
 692                 return 0;
 693             }
 694         }
 695     }
 696
 697     return 1;
 698 }
 699
 700 /****************************************************************************
 701  * DCT-domain noise reduction / adaptive deadzone
 702  * from libavcodec
 703  ****************************************************************************/
 704
 705 void x264_noise_reduction_update( x264_t *h )
 706 {
 707     int cat, i;
 708     for( cat = 0; cat < 2; cat++ )
 709     {
 710         int size = cat ? 64 : 16;
 711         const int *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
 712
 713         if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
 714         {
 715             for( i = 0; i < size; i++ )
 716                 h->nr_residual_sum[cat][i] >>= 1;
 717             h->nr_count[cat] >>= 1;
 718         }
 719
 720         for( i = 0; i < size; i++ )
 721             h->nr_offset[cat][i] =
 722                 ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
 723                  + h->nr_residual_sum[cat][i]/2)
 724               / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
 725     }
 726 }
 727
 728 void x264_denoise_dct( x264_t *h, int16_t *dct )
 729 {
 730     const int cat = h->mb.b_transform_8x8;
 731     int i;
 732
 733     h->nr_count[cat]++;
 734
 735     for( i = (cat ? 63 : 15); i >= 1; i-- )
 736     {
 737         int level = dct[i];
 738         if( level )
 739         {
 740             if( level > 0 )
 741             {
 742                 h->nr_residual_sum[cat][i] += level;
 743                 level -= h->nr_offset[cat][i];
 744                 if( level < 0 )
 745                     level = 0;
 746             }
 747             else
 748             {
 749                 h->nr_residual_sum[cat][i] -= level;
 750                 level += h->nr_offset[cat][i];
 751                 if( level > 0 )
 752                     level = 0;
 753             }
 754             dct[i] = level;
 755         }
 756     }
 757 }
 758
 759 /*****************************************************************************
 760  * RD only; 4 calls to this do not make up for one macroblock_encode.
 761  * doesn't transform chroma dc.
 762  *****************************************************************************/
 763 void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
 764 {
 765     int i_qp = h->mb.i_qp;
 766     uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
 767     uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
 768     int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
 769     int nnz8x8;
 770     int ch;
 771
 772     x264_mb_mc_8x8( h, i8 );
 773
 774     if( h->mb.b_transform_8x8 )
 775     {
 776         DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 );
 777         h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
 778         h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] );
 779         h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 );
 780
 781         if( b_decimate )
 782             nnz8x8 = 4 <= x264_mb_decimate_score( h->dct.luma8x8[i8], 64 );
 783         else
 784             nnz8x8 = array_non_zero( dct8x8 );
 785
 786         if( nnz8x8 )
 787         {
 788             h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp );
 789             h->dctf.add8x8_idct8( p_fdec, dct8x8 );
 790         }
 791     }
 792     else
 793     {
 794         int i4;
 795         DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 );
 796         h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
 797         h->quantf.quant_4x4( dct4x4[0], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
 798         h->quantf.quant_4x4( dct4x4[1], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
 799         h->quantf.quant_4x4( dct4x4[2], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
 800         h->quantf.quant_4x4( dct4x4[3], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
 801         for( i4 = 0; i4 < 4; i4++ )
 802             h->zigzagf.scan_4x4( h->dct.block[i8*4+i4].luma4x4, dct4x4[i4] );
 803
 804         if( b_decimate )
 805         {
 806             int i_decimate_8x8 = 0;
 807             for( i4 = 0; i4 < 4 && i_decimate_8x8 < 4; i4++ )
 808                 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[i8*4+i4].luma4x4, 16 );
 809             nnz8x8 = 4 <= i_decimate_8x8;
 810         }
 811         else
 812             nnz8x8 = array_non_zero( dct4x4 );
 813
 814         if( nnz8x8 )
 815         {
 816             for( i4 = 0; i4 < 4; i4++ )
 817                 h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp );
 818             h->dctf.add8x8_idct( p_fdec, dct4x4 );
 819         }
 820     }
 821
 822     i_qp = h->mb.i_chroma_qp;
 823
 824     for( ch = 0; ch < 2; ch++ )
 825     {
 826         DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 );
 827         p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
 828         p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
 829
 830         h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
 831         h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
 832         h->zigzagf.scan_4x4ac( h->dct.block[16+i8+ch*4].residual_ac, dct4x4 );
 833         if( array_non_zero( dct4x4 ) )
 834         {
 835             h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
 836             h->dctf.add4x4_idct( p_fdec, dct4x4 );
 837         }
 838     }
 839
 840     if( nnz8x8 )
 841         h->mb.i_cbp_luma |= (1 << i8);
 842     else
 843         h->mb.i_cbp_luma &= ~(1 << i8);
 844     h->mb.i_cbp_chroma = 0x02;
 845 }