git.sesse.net Git - x264/blob - encoder/macroblock.c

   1 /*****************************************************************************
   2  * macroblock.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27
  28 #include "common/common.h"
  29 #include "macroblock.h"
  30
  31
  32 static const uint8_t block_idx_x[16] =
  33 {
  34     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
  35 };
  36 static const uint8_t block_idx_y[16] =
  37 {
  38     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
  39 };
  40 static const uint8_t block_idx_xy[4][4] =
  41 {
  42     { 0, 2, 8,  10 },
  43     { 1, 3, 9,  11 },
  44     { 4, 6, 12, 14 },
  45     { 5, 7, 13, 15 }
  46 };
  47
  48 /* def_quant4_mf only for probe_skip; actual encoding uses matrices from set.c */
  49 /* FIXME this seems to make better decisions with cqm=jvt, but could screw up
  50  * with general custom matrices. */
  51 static const int def_quant4_mf[6][4][4] =
  52 {
  53     { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
  54       { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
  55     { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
  56       { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
  57     { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
  58       { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
  59     { {  9362, 5825,  9362, 5825 }, { 5825, 3647, 5825, 3647 },
  60       {  9362, 5825,  9362, 5825 }, { 5825, 3647, 5825, 3647 } },
  61     { {  8192, 5243,  8192, 5243 }, { 5243, 3355, 5243, 3355 },
  62       {  8192, 5243,  8192, 5243 }, { 5243, 3355, 5243, 3355 } },
  63     { {  7282, 4559,  7282, 4559 }, { 4559, 2893, 4559, 2893 },
  64       {  7282, 4559,  7282, 4559 }, { 4559, 2893, 4559, 2893 } }
  65 };
  66
  67 static const int i_chroma_qp_table[52] =
  68 {
  69      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
  70     10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
  71     20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
  72     29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
  73     36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
  74     39, 39
  75 };
  76
  77 /****************************************************************************
  78  * Scan and Quant functions
  79  ****************************************************************************/
  80 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
  81 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
  82
  83 #define ZIG(i,y,x) level[i] = dct[y][x];
  84 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
  85 {
  86     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
  87     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
  88     ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
  89     ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
  90     ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
  91     ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
  92     ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
  93     ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
  94     ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
  95     ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
  96     ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
  97     ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
  98     ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
  99     ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
 100     ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
 101     ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
 102 }
 103 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
 104 {
 105     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
 106     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
 107     ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
 108     ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
 109 }
 110 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
 111 {
 112                 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
 113     ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
 114     ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
 115     ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
 116 }
 117 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
 118 {
 119     ZIG(0,0,0)
 120     ZIG(1,0,1)
 121     ZIG(2,1,0)
 122     ZIG(3,1,1)
 123 }
 124 #undef ZIG
 125
 126 #define ZIG(i,y,x) {\
 127     int o = x+y*i_stride;\
 128     level[i] = p_src[o] - p_dst[o];\
 129     p_dst[o] = p_src[o];\
 130 }
 131 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
 132 {
 133     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
 134     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
 135     ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
 136     ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
 137 }
 138 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
 139 {
 140                 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
 141     ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
 142     ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
 143     ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
 144 }
 145 #undef ZIG
 146
 147 static void quant_8x8( int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
 148 {
 149     const int i_qbits = 16 + i_qscale / 6;
 150     const int i_mf = i_qscale % 6;
 151     const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 152
 153     int x,y;
 154     for( y = 0; y < 8; y++ )
 155     {
 156         for( x = 0; x < 8; x++ )
 157         {
 158             if( dct[y][x] > 0 )
 159                 dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
 160             else
 161                 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
 162         }
 163     }
 164 }
 165 static void quant_4x4( int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
 166 {
 167     const int i_qbits = 15 + i_qscale / 6;
 168     const int i_mf = i_qscale % 6;
 169     const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 170
 171     int x,y;
 172     for( y = 0; y < 4; y++ )
 173     {
 174         for( x = 0; x < 4; x++ )
 175         {
 176             if( dct[y][x] > 0 )
 177                 dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
 178             else
 179                 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
 180         }
 181     }
 182 }
 183 static void quant_4x4_dc( int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
 184 {
 185     const int i_qbits = 15 + i_qscale / 6;
 186     const int f2 = ( 2 << i_qbits ) / 3;
 187     const int i_qmf = quant_mf[i_qscale%6][0][0];
 188     int x,y;
 189
 190     for( y = 0; y < 4; y++ )
 191     {
 192         for( x = 0; x < 4; x++ )
 193         {
 194             if( dct[y][x] > 0 )
 195                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 196             else
 197                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 198         }
 199     }
 200 }
 201 static void quant_2x2_dc( int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
 202 {
 203     int const i_qbits = 15 + i_qscale / 6;
 204     const int f2 = ( 2 << i_qbits ) / ( b_intra ? 3 : 6 );
 205     const int i_qmf = quant_mf[i_qscale%6][0][0];
 206
 207     int x,y;
 208     for( y = 0; y < 2; y++ )
 209     {
 210         for( x = 0; x < 2; x++ )
 211         {
 212             if( dct[y][x] > 0 )
 213                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 214             else
 215                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 216         }
 217     }
 218 }
 219 #if 0
 220 /* From a JVT doc */
 221 static const int f_deadzone_intra[4][4][2] = /* [num][den] */
 222 {
 223     { {1,2}, {3,7}, {2,5}, {1,3} },
 224     { {3,7}, {2,5}, {1,3}, {1,4} },
 225     { {2,5}, {1,3}, {1,4}, {1,5} },
 226     { {1,3}, {1,4}, {1,5}, {1,5} }
 227 };
 228 static const int f_deadzone_inter[4][4][2] = /* [num][den] */
 229 {
 230     { {1,3}, {2,7}, {4,15},{2,9} },
 231     { {2,7}, {4,15},{2,9}, {1,6} },
 232     { {4,15},{2,9}, {1,6}, {1,7} },
 233     { {2,9}, {1,6}, {1,7}, {2,15} }
 234 };
 235
 236
 237 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
 238 {
 239     const int(*f_deadzone)[4][4][2] = b_intra ? &f_deadzone_intra : &f_deadzone_inter;
 240     const int i_qbits = 15 + i_qscale / 6;
 241     const int i_mf = i_qscale % 6;
 242
 243     int x,y;
 244     for( y = 0; y < 4; y++ )
 245     {
 246         for( x = 0; x < 4; x++ )
 247         {
 248 #if 0
 249             const int f = b_intra ?
 250                           (f_deadzone_intra[y][x][0] * ( 1 << i_qbits ) / f_deadzone_intra[y][x][1])
 251                           :
 252                           (f_deadzone_inter[y][x][0] * ( 1 << i_qbits ) / f_deadzone_inter[y][x][1]);
 253 #else
 254             const int f = (*f_deadzone)[y][x][0] * ( 1 << i_qbits ) / (*f_deadzone)[y][x][1];
 255 #endif
 256
 257             if( dct[y][x] > 0 )
 258             {
 259                 dct[y][x] =( f + dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits;
 260             }
 261             else
 262             {
 263                 dct[y][x] = - ( ( f - dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits );
 264             }
 265         }
 266     }
 267 }
 268
 269 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
 270 {
 271     const int i_qbits = 15 + i_qscale / 6;
 272     const int i_qmf = quant_mf[i_qscale%6][0][0];
 273     const int f2 = f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1];
 274     int x,y;
 275
 276     for( y = 0; y < 4; y++ )
 277     {
 278         for( x = 0; x < 4; x++ )
 279         {
 280
 281             if( dct[y][x] > 0 )
 282             {
 283                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 284             }
 285             else
 286             {
 287                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 288             }
 289         }
 290     }
 291 }
 292
 293 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
 294 {
 295     int const i_qbits = 15 + i_qscale / 6;
 296     const int i_qmf = quant_mf[i_qscale%6][0][0];
 297     const int f2 = b_intra ?
 298                    (f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1])
 299                    :
 300                    (f_deadzone_inter[0][0][0] * ( 2 << i_qbits ) / f_deadzone_inter[0][0][1]);
 301     int x,y;
 302     for( y = 0; y < 2; y++ )
 303     {
 304         for( x = 0; x < 2; x++ )
 305         {
 306             if( dct[y][x] > 0 )
 307             {
 308                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 309             }
 310             else
 311             {
 312                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 313             }
 314         }
 315     }
 316 }
 317
 318
 319 #endif
 320
 321 /* (ref: JVT-B118)
 322  * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
 323  * to 0 (low score means set it to null)
 324  * Used in inter macroblock (luma and chroma)
 325  *  luma: for a 8x8 block: if score < 4 -> null
 326  *        for the complete mb: if score < 6 -> null
 327  *  chroma: for the complete mb: if score < 7 -> null
 328  */
 329 static int x264_mb_decimate_score( int *dct, int i_max )
 330 {
 331     static const int i_ds_table4[16] = {
 332         3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
 333     static const int i_ds_table8[64] = {
 334         3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
 335         1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
 336         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 337         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
 338
 339     const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
 340     int i_score = 0;
 341     int idx = i_max - 1;
 342
 343     while( idx >= 0 && dct[idx] == 0 )
 344         idx--;
 345
 346     while( idx >= 0 )
 347     {
 348         int i_run;
 349
 350         if( abs( dct[idx--] ) > 1 )
 351             return 9;
 352
 353         i_run = 0;
 354         while( idx >= 0 && dct[idx] == 0 )
 355         {
 356             idx--;
 357             i_run++;
 358         }
 359         i_score += ds_table[i_run];
 360     }
 361
 362     return i_score;
 363 }
 364
 365 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
 366 {
 367     const int i_stride = h->mb.pic.i_stride[0];
 368     const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
 369     uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
 370     uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
 371     int16_t dct4x4[4][4];
 372
 373     if( h->mb.b_lossless )
 374     {
 375         sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst, i_stride );
 376         return;
 377     }
 378
 379     h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 380     quant_4x4( dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
 381     scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
 382     x264_mb_dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
 383
 384     /* output samples to fdec */
 385     h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
 386 }
 387
 388 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
 389 {
 390     const int i_stride = h->mb.pic.i_stride[0];
 391     const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
 392     uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
 393     uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
 394     int16_t dct8x8[8][8];
 395
 396     h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
 397     quant_8x8( dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
 398     scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
 399     x264_mb_dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
 400     h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
 401 }
 402
 403 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
 404 {
 405     const int i_stride = h->mb.pic.i_stride[0];
 406     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 407     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 408
 409     int16_t dct4x4[16+1][4][4];
 410
 411     int i;
 412
 413     if( h->mb.b_lossless )
 414     {
 415         for( i = 0; i < 16; i++ )
 416         {
 417             int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
 418             sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+o, p_dst+o, i_stride );
 419             dct4x4[0][block_idx_y[i]][block_idx_x[i]] = p_src[o] - p_dst[o];
 420             p_dst[o] = p_src[o];
 421         }
 422         scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
 423         return;
 424     }
 425
 426     h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
 427     for( i = 0; i < 16; i++ )
 428     {
 429         /* copy dc coeff */
 430         dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
 431
 432         /* quant/scan/dequant */
 433         quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
 434         scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
 435         x264_mb_dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
 436     }
 437
 438     h->dctf.dct4x4dc( dct4x4[0] );
 439     quant_4x4_dc( dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
 440     scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
 441
 442     /* output samples to fdec */
 443     h->dctf.idct4x4dc( dct4x4[0] );
 444     x264_mb_dequant_4x4_dc( dct4x4[0], h->dequant4_mf[CQM_4IY], i_qscale );  /* XXX not inversed */
 445
 446     /* calculate dct coeffs */
 447     for( i = 0; i < 16; i++ )
 448     {
 449         /* copy dc coeff */
 450         dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
 451     }
 452     /* put pixels to fdec */
 453     h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
 454 }
 455
 456 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
 457 {
 458     int i, ch;
 459
 460     for( ch = 0; ch < 2; ch++ )
 461     {
 462         const int i_stride = h->mb.pic.i_stride[1+ch];
 463         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 464         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 465         int i_decimate_score = 0;
 466
 467         int16_t dct2x2[2][2];
 468         int16_t dct4x4[4][4][4];
 469
 470         if( h->mb.b_lossless )
 471         {
 472             for( i = 0; i < 4; i++ )
 473             {
 474                 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
 475                 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+o, p_dst+o, i_stride );
 476                 h->dct.chroma_dc[ch][i] = p_src[o] - p_dst[o];
 477                 p_dst[o] = p_src[o];
 478             }
 479             continue;
 480         }
 481
 482         h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 483         /* calculate dct coeffs */
 484         for( i = 0; i < 4; i++ )
 485         {
 486             /* copy dc coeff */
 487             dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
 488
 489             quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, b_inter ? 0 : 1 );
 490             scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
 491             x264_mb_dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
 492
 493             if( b_inter )
 494             {
 495                 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
 496             }
 497         }
 498
 499         h->dctf.dct2x2dc( dct2x2 );
 500         quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, b_inter ? 0 : 1 );
 501         scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
 502
 503         /* output samples to fdec */
 504         h->dctf.idct2x2dc( dct2x2 );
 505         x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale );  /* XXX not inversed */
 506
 507         if( b_inter && i_decimate_score < 7 )
 508         {
 509             /* Near null chroma 8x8 block so make it null (bits saving) */
 510             for( i = 0; i < 4; i++ )
 511             {
 512                 int x, y;
 513                 for( x = 0; x < 15; x++ )
 514                 {
 515                     h->dct.block[16+i+ch*4].residual_ac[x] = 0;
 516                 }
 517                 for( x = 0; x < 4; x++ )
 518                 {
 519                     for( y = 0; y < 4; y++ )
 520                     {
 521                         dct4x4[i][x][y] = 0;
 522                     }
 523                 }
 524             }
 525         }
 526
 527         /* calculate dct coeffs */
 528         for( i = 0; i < 4; i++ )
 529         {
 530             /* copy dc coeff */
 531             dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
 532         }
 533         h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
 534     }
 535 }
 536
 537 static void x264_macroblock_encode_skip( x264_t *h )
 538 {
 539     int i;
 540     h->mb.i_cbp_luma = 0x00;
 541     h->mb.i_cbp_chroma = 0x00;
 542
 543     for( i = 0; i < 16+8; i++ )
 544     {
 545         h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
 546     }
 547
 548     /* store cbp */
 549     h->mb.cbp[h->mb.i_mb_xy] = 0;
 550 }
 551
 552 /*****************************************************************************
 553  * x264_macroblock_encode_pskip:
 554  *  Encode an already marked skip block
 555  *****************************************************************************/
 556 void x264_macroblock_encode_pskip( x264_t *h )
 557 {
 558     const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
 559                                 h->mb.mv_min[0], h->mb.mv_max[0] );
 560     const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
 561                                 h->mb.mv_min[1], h->mb.mv_max[1] );
 562
 563     /* Motion compensation XXX probably unneeded */
 564     h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
 565                     h->mb.pic.p_fdec[0],       h->mb.pic.i_stride[0],
 566                     mvx, mvy, 16, 16 );
 567
 568     /* Chroma MC */
 569     h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
 570                       h->mb.pic.p_fdec[1],       h->mb.pic.i_stride[1],
 571                       mvx, mvy, 8, 8 );
 572
 573     h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
 574                       h->mb.pic.p_fdec[2],       h->mb.pic.i_stride[2],
 575                       mvx, mvy, 8, 8 );
 576
 577     x264_macroblock_encode_skip( h );
 578 }
 579
 580 /*****************************************************************************
 581  * x264_macroblock_encode:
 582  *****************************************************************************/
 583 void x264_macroblock_encode( x264_t *h )
 584 {
 585     int i_cbp_dc = 0;
 586     int i_qp = h->mb.i_qp;
 587     int i;
 588
 589     if( h->mb.i_type == P_SKIP )
 590     {
 591         /* A bit special */
 592         x264_macroblock_encode_pskip( h );
 593         return;
 594     }
 595     if( h->mb.i_type == B_SKIP )
 596     {
 597         /* XXX motion compensation is probably unneeded */
 598         x264_mb_mc( h );
 599         x264_macroblock_encode_skip( h );
 600         return;
 601     }
 602
 603     if( h->mb.i_type == I_16x16 )
 604     {
 605         const int i_mode = h->mb.i_intra16x16_pred_mode;
 606         h->mb.b_transform_8x8 = 0;
 607         /* do the right prediction */
 608         h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 609
 610         /* encode the 16x16 macroblock */
 611         x264_mb_encode_i16x16( h, i_qp );
 612
 613         /* fix the pred mode value */
 614         h->mb.i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix[i_mode];
 615     }
 616     else if( h->mb.i_type == I_8x8 )
 617     {
 618         h->mb.b_transform_8x8 = 1;
 619         for( i = 0; i < 4; i++ )
 620         {
 621             const int i_dst = h->mb.pic.i_stride[0];
 622             uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
 623             int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
 624
 625             h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
 626             x264_mb_encode_i8x8( h, i, i_qp );
 627             h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]] = x264_mb_pred_mode4x4_fix(i_mode);
 628         }
 629     }
 630     else if( h->mb.i_type == I_4x4 )
 631     {
 632         h->mb.b_transform_8x8 = 0;
 633         for( i = 0; i < 16; i++ )
 634         {
 635             const int i_dst = h->mb.pic.i_stride[0];
 636             uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
 637             int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
 638
 639             h->predict_4x4[i_mode]( p_dst, i_dst );
 640             x264_mb_encode_i4x4( h, i, i_qp );
 641             h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix(i_mode);
 642         }
 643     }
 644     else    /* Inter MB */
 645     {
 646         int i8x8, i4x4, idx;
 647         int i_decimate_mb = 0;
 648
 649         /* Motion compensation */
 650         x264_mb_mc( h );
 651
 652         if( h->mb.b_lossless )
 653         {
 654             for( i4x4 = 0; i4x4 < 16; i4x4++ )
 655             {
 656                 int o = block_idx_x[i4x4]*4 + block_idx_y[i4x4]*4 * h->mb.pic.i_stride[0];
 657                 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4, h->mb.pic.p_fenc[0]+o, h->mb.pic.p_fdec[0]+o, h->mb.pic.i_stride[0] );
 658             }
 659         }
 660         else if( h->mb.b_transform_8x8 )
 661         {
 662             int16_t dct8x8[4][8][8];
 663             h->dctf.sub16x16_dct8( dct8x8,
 664                                    h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 665                                    h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 666
 667             for( idx = 0; idx < 4; idx++ )
 668             {
 669                 int i_decimate_8x8;
 670
 671                 quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
 672                 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
 673                 x264_mb_dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
 674
 675                 i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
 676                 i_decimate_mb += i_decimate_8x8;
 677                 if( i_decimate_8x8 < 4 )
 678                 {
 679                     memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
 680                     memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
 681                 }
 682             }
 683
 684             if( i_decimate_mb < 6 )
 685                 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
 686             else
 687                 h->dctf.add16x16_idct8( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct8x8 );
 688         }
 689         else
 690         {
 691             int16_t dct4x4[16][4][4];
 692             h->dctf.sub16x16_dct( dct4x4,
 693                                   h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 694                                   h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 695
 696             for( i8x8 = 0; i8x8 < 4; i8x8++ )
 697             {
 698                 int i_decimate_8x8;
 699
 700                 /* encode one 4x4 block */
 701                 i_decimate_8x8 = 0;
 702                 for( i4x4 = 0; i4x4 < 4; i4x4++ )
 703                 {
 704                     idx = i8x8 * 4 + i4x4;
 705
 706                     quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
 707                     scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
 708                     x264_mb_dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
 709
 710                     i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
 711                 }
 712
 713                 /* decimate this 8x8 block */
 714                 i_decimate_mb += i_decimate_8x8;
 715                 if( i_decimate_8x8 < 4 )
 716                 {
 717                     for( i4x4 = 0; i4x4 < 4; i4x4++ )
 718                     {
 719                         int x, y;
 720                         idx = i8x8 * 4 + i4x4;
 721                         for( i = 0; i < 16; i++ )
 722                             h->dct.block[idx].luma4x4[i] = 0;
 723                         for( x = 0; x < 4; x++ )
 724                             for( y = 0; y < 4; y++ )
 725                                 dct4x4[idx][x][y] = 0;
 726                     }
 727                 }
 728             }
 729
 730             if( i_decimate_mb < 6 )
 731                 for( idx = 0; idx < 16; idx++ )
 732                     for( i = 0; i < 16; i++ )
 733                         h->dct.block[idx].luma4x4[i] = 0;
 734             else
 735                 h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
 736         }
 737     }
 738
 739     /* encode chroma */
 740     i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 741     if( IS_INTRA( h->mb.i_type ) )
 742     {
 743         const int i_mode = h->mb.i_chroma_pred_mode;
 744         h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
 745         h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
 746     }
 747
 748     /* encode the 8x8 blocks */
 749     x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
 750
 751     /* Calculate the Luma/Chroma patern and non_zero_count */
 752     h->mb.i_cbp_luma = 0x00;
 753     if( h->mb.i_type == I_16x16 )
 754     {
 755         for( i = 0; i < 16; i++ )
 756         {
 757             const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
 758             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 759             if( nz > 0 )
 760                 h->mb.i_cbp_luma = 0x0f;
 761         }
 762     }
 763     else if( h->mb.b_transform_8x8 )
 764     {
 765         /* coded_block_flag is enough for CABAC.
 766          * the full non_zero_count is done only in CAVLC. */
 767         for( i = 0; i < 4; i++ )
 768         {
 769             const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
 770             int j;
 771             for( j = 0; j < 4; j++ )
 772                 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
 773             if( nz > 0 )
 774                 h->mb.i_cbp_luma |= 1 << i;
 775         }
 776     }
 777     else
 778     {
 779         for( i = 0; i < 16; i++ )
 780         {
 781             const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
 782             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 783             if( nz > 0 )
 784                 h->mb.i_cbp_luma |= 1 << (i/4);
 785         }
 786     }
 787
 788     /* Calculate the chroma patern */
 789     h->mb.i_cbp_chroma = 0x00;
 790     for( i = 0; i < 8; i++ )
 791     {
 792         const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
 793         h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
 794         if( nz > 0 )
 795         {
 796             h->mb.i_cbp_chroma = 0x02;    /* dc+ac (we can't do only ac) */
 797         }
 798     }
 799     if( h->mb.i_cbp_chroma == 0x00 &&
 800         ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
 801     {
 802         h->mb.i_cbp_chroma = 0x01;    /* dc only */
 803     }
 804
 805     if( h->param.b_cabac )
 806     {
 807         if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
 808             i_cbp_dc = 0x01;
 809         else
 810             i_cbp_dc = 0x00;
 811
 812         if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
 813             i_cbp_dc |= 0x02;
 814         if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
 815             i_cbp_dc |= 0x04;
 816     }
 817
 818     /* store cbp */
 819     h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
 820
 821     if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
 822     {
 823         /* It won'y change anything at the decoder side but it is needed else the
 824          * decoder will fail to read the next QP */
 825         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;
 826     }
 827
 828
 829     /* Check for P_SKIP
 830      * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
 831      *      (if multiple mv give same result)*/
 832     if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
 833         h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
 834         h->mb.cache.ref[0][x264_scan8[0]] == 0 )
 835     {
 836         int mvp[2];
 837
 838         x264_mb_predict_mv_pskip( h, mvp );
 839         if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
 840             h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
 841         {
 842             h->mb.i_type = P_SKIP;
 843             h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;  /* Needed */
 844             /* XXX qp reset may have issues when used in RD instead of the real encode */
 845         }
 846     }
 847
 848     /* Check for B_SKIP */
 849     if( h->mb.i_type == B_DIRECT &&
 850         h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
 851     {
 852         h->mb.i_type = B_SKIP;
 853         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;  /* Needed */
 854     }
 855
 856     if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
 857         h->mb.b_transform_8x8 = 0;
 858 }
 859
 860 /*****************************************************************************
 861  * x264_macroblock_probe_skip:
 862  *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
 863  *  the previous QP
 864  *****************************************************************************/
 865 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
 866 {
 867     DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
 868     DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
 869     DECLARE_ALIGNED( int,     dctscan[16], 16 );
 870
 871     int i_qp = h->mb.i_qp;
 872     int mvp[2];
 873     int ch;
 874
 875     int i8x8, i4x4;
 876     int i_decimate_mb;
 877
 878     if( !b_bidir )
 879     {
 880         /* Get the MV */
 881         x264_mb_predict_mv_pskip( h, mvp );
 882         mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
 883         mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
 884
 885         /* Motion compensation */
 886         h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
 887                         h->mb.pic.p_fdec[0],   h->mb.pic.i_stride[0],
 888                         mvp[0], mvp[1], 16, 16 );
 889     }
 890
 891     /* get luma diff */
 892     h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 893                                   h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 894
 895     for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
 896     {
 897         /* encode one 4x4 block */
 898         for( i4x4 = 0; i4x4 < 4; i4x4++ )
 899         {
 900             const int idx = i8x8 * 4 + i4x4;
 901
 902             quant_4x4( dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
 903             scan_zigzag_4x4full( dctscan, dct4x4[idx] );
 904
 905             i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
 906
 907             if( i_decimate_mb >= 6 )
 908             {
 909                 /* not as P_SKIP */
 910                 return 0;
 911             }
 912         }
 913     }
 914
 915     /* encode chroma */
 916     i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 917
 918     for( ch = 0; ch < 2; ch++ )
 919     {
 920         const int i_stride = h->mb.pic.i_stride[1+ch];
 921         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 922         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 923
 924         if( !b_bidir )
 925         {
 926             h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
 927                               h->mb.pic.p_fdec[1+ch],       i_stride,
 928                               mvp[0], mvp[1], 8, 8 );
 929         }
 930
 931         h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 932
 933         /* calculate dct DC */
 934         dct2x2[0][0] = dct4x4[0][0][0];
 935         dct2x2[0][1] = dct4x4[1][0][0];
 936         dct2x2[1][0] = dct4x4[2][0][0];
 937         dct2x2[1][1] = dct4x4[3][0][0];
 938         h->dctf.dct2x2dc( dct2x2 );
 939         quant_2x2_dc( dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
 940         if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1]  )
 941         {
 942             /* can't be */
 943             return 0;
 944         }
 945
 946         /* calculate dct coeffs */
 947         for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
 948         {
 949             quant_4x4( dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
 950             scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
 951
 952             i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
 953             if( i_decimate_mb >= 7 )
 954             {
 955                 return 0;
 956             }
 957         }
 958     }
 959
 960     return 1;
 961 }