git.sesse.net Git - x264/blob - encoder/macroblock.c

   1 /*****************************************************************************
   2  * macroblock.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include <string.h>
  27
  28 #include "common/common.h"
  29 #include "macroblock.h"
  30
  31
  32 static const uint8_t block_idx_x[16] =
  33 {
  34     0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
  35 };
  36 static const uint8_t block_idx_y[16] =
  37 {
  38     0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
  39 };
  40 static const uint8_t block_idx_xy[4][4] =
  41 {
  42     { 0, 2, 8,  10 },
  43     { 1, 3, 9,  11 },
  44     { 4, 6, 12, 14 },
  45     { 5, 7, 13, 15 }
  46 };
  47
  48 static const int quant_mf[6][4][4] =
  49 {
  50     { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
  51       { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
  52     { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
  53       { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
  54     { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
  55       { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
  56     { {  9362, 5825,  9362, 5825 }, { 5825, 3647, 5825, 3647 },
  57       {  9362, 5825,  9362, 5825 }, { 5825, 3647, 5825, 3647 } },
  58     { {  8192, 5243,  8192, 5243 }, { 5243, 3355, 5243, 3355 },
  59       {  8192, 5243,  8192, 5243 }, { 5243, 3355, 5243, 3355 } },
  60     { {  7282, 4559,  7282, 4559 }, { 4559, 2893, 4559, 2893 },
  61       {  7282, 4559,  7282, 4559 }, { 4559, 2893, 4559, 2893 } }
  62 };
  63
  64 const int quant8_mf[6][8][8] =
  65 {
  66   {
  67     { 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222 },
  68     { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
  69     { 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481 },
  70     { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
  71     { 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222 },
  72     { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
  73     { 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481 },
  74     { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 }
  75   }, {
  76     { 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058 },
  77     { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
  78     { 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290 },
  79     { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
  80     { 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058 },
  81     { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
  82     { 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290 },
  83     { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 }
  84   }, {
  85     { 10082,  9675, 12710,  9675, 10082,  9675, 12710,  9675 },
  86     {  9675,  8943, 11985,  8943,  9675,  8943, 11985,  8943 },
  87     { 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985 },
  88     {  9675,  8943, 11985,  8943,  9675,  8943, 11985,  8943 },
  89     { 10082,  9675, 12710,  9675, 10082,  9675, 12710,  9675 },
  90     {  9675,  8943, 11985,  8943,  9675,  8943, 11985,  8943 },
  91     { 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985 },
  92     {  9675,  8943, 11985,  8943,  9675,  8943, 11985,  8943 }
  93   }, {
  94     {  9362,  8931, 11984,  8931,  9362,  8931, 11984,  8931 },
  95     {  8931,  8228, 11259,  8228,  8931,  8228, 11259,  8228 },
  96     { 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259 },
  97     {  8931,  8228, 11259,  8228,  8931,  8228, 11259,  8228 },
  98     {  9362,  8931, 11984,  8931,  9362,  8931, 11984,  8931 },
  99     {  8931,  8228, 11259,  8228,  8931,  8228, 11259,  8228 },
 100     { 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259 },
 101     {  8931,  8228, 11259,  8228,  8931,  8228, 11259,  8228 }
 102   }, {
 103     {  8192,  7740, 10486,  7740,  8192,  7740, 10486,  7740 },
 104     {  7740,  7346,  9777,  7346,  7740,  7346,  9777,  7346 },
 105     { 10486,  9777, 13159,  9777, 10486,  9777, 13159,  9777 },
 106     {  7740,  7346,  9777,  7346,  7740,  7346,  9777,  7346 },
 107     {  8192,  7740, 10486,  7740,  8192,  7740, 10486,  7740 },
 108     {  7740,  7346,  9777,  7346,  7740,  7346,  9777,  7346 },
 109     { 10486,  9777, 13159,  9777, 10486,  9777, 13159,  9777 },
 110     {  7740,  7346,  9777,  7346,  7740,  7346,  9777,  7346 }
 111   }, {
 112     {  7282,  6830,  9118,  6830,  7282,  6830,  9118,  6830 },
 113     {  6830,  6428,  8640,  6428,  6830,  6428,  8640,  6428 },
 114     {  9118,  8640, 11570,  8640,  9118,  8640, 11570,  8640 },
 115     {  6830,  6428,  8640,  6428,  6830,  6428,  8640,  6428 },
 116     {  7282,  6830,  9118,  6830,  7282,  6830,  9118,  6830 },
 117     {  6830,  6428,  8640,  6428,  6830,  6428,  8640,  6428 },
 118     {  9118,  8640, 11570,  8640,  9118,  8640, 11570,  8640 },
 119     {  6830,  6428,  8640,  6428,  6830,  6428,  8640,  6428 }
 120   }
 121 };
 122
 123 static const int i_chroma_qp_table[52] =
 124 {
 125      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
 126     10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
 127     20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
 128     29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
 129     36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
 130     39, 39
 131 };
 132
 133 /****************************************************************************
 134  * Scan and Quant functions
 135  ****************************************************************************/
 136 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
 137 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
 138
 139 #define ZIG(i,y,x) level[i] = dct[y][x];
 140 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
 141 {
 142     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
 143     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
 144     ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
 145     ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
 146     ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
 147     ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
 148     ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
 149     ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
 150     ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
 151     ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
 152     ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
 153     ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
 154     ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
 155     ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
 156     ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
 157     ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
 158 }
 159 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
 160 {
 161     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
 162     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
 163     ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
 164     ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
 165 }
 166 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
 167 {
 168                 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
 169     ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
 170     ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
 171     ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
 172 }
 173 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
 174 {
 175     ZIG(0,0,0)
 176     ZIG(1,0,1)
 177     ZIG(2,1,0)
 178     ZIG(3,1,1)
 179 }
 180 #undef ZIG
 181
 182 #define ZIG(i,y,x) {\
 183     int o = x+y*i_stride;\
 184     level[i] = p_src[o] - p_dst[o];\
 185     p_dst[o] = p_src[o];\
 186 }
 187 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
 188 {
 189     ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
 190     ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
 191     ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
 192     ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
 193 }
 194 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
 195 {
 196                 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
 197     ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
 198     ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
 199     ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
 200 }
 201 #undef ZIG
 202
 203 static void quant_8x8( int16_t dct[8][8], int i_qscale, int b_intra )
 204 {
 205     const int i_qbits = 16 + i_qscale / 6;
 206     const int i_mf = i_qscale % 6;
 207     const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 208
 209     int x,y;
 210     for( y = 0; y < 8; y++ )
 211     {
 212         for( x = 0; x < 8; x++ )
 213         {
 214             if( dct[y][x] > 0 )
 215                 dct[y][x] = ( f + dct[y][x] * quant8_mf[i_mf][y][x] ) >> i_qbits;
 216             else
 217                 dct[y][x] = - ( ( f - dct[y][x] * quant8_mf[i_mf][y][x] ) >> i_qbits );
 218         }
 219     }
 220 }
 221 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
 222 {
 223     const int i_qbits = 15 + i_qscale / 6;
 224     const int i_mf = i_qscale % 6;
 225     const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
 226
 227     int x,y;
 228     for( y = 0; y < 4; y++ )
 229     {
 230         for( x = 0; x < 4; x++ )
 231         {
 232             if( dct[y][x] > 0 )
 233                 dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
 234             else
 235                 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
 236         }
 237     }
 238 }
 239 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
 240 {
 241     const int i_qbits = 15 + i_qscale / 6;
 242     const int f2 = ( 2 << i_qbits ) / 3;
 243     const int i_qmf = quant_mf[i_qscale%6][0][0];
 244     int x,y;
 245
 246     for( y = 0; y < 4; y++ )
 247     {
 248         for( x = 0; x < 4; x++ )
 249         {
 250             if( dct[y][x] > 0 )
 251                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 252             else
 253                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 254         }
 255     }
 256 }
 257 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
 258 {
 259     int const i_qbits = 15 + i_qscale / 6;
 260     const int f2 = ( 2 << i_qbits ) / ( b_intra ? 3 : 6 );
 261     const int i_qmf = quant_mf[i_qscale%6][0][0];
 262
 263     int x,y;
 264     for( y = 0; y < 2; y++ )
 265     {
 266         for( x = 0; x < 2; x++ )
 267         {
 268             if( dct[y][x] > 0 )
 269                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 270             else
 271                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 272         }
 273     }
 274 }
 275 #if 0
 276 /* From a JVT doc */
 277 static const int f_deadzone_intra[4][4][2] = /* [num][den] */
 278 {
 279     { {1,2}, {3,7}, {2,5}, {1,3} },
 280     { {3,7}, {2,5}, {1,3}, {1,4} },
 281     { {2,5}, {1,3}, {1,4}, {1,5} },
 282     { {1,3}, {1,4}, {1,5}, {1,5} }
 283 };
 284 static const int f_deadzone_inter[4][4][2] = /* [num][den] */
 285 {
 286     { {1,3}, {2,7}, {4,15},{2,9} },
 287     { {2,7}, {4,15},{2,9}, {1,6} },
 288     { {4,15},{2,9}, {1,6}, {1,7} },
 289     { {2,9}, {1,6}, {1,7}, {2,15} }
 290 };
 291
 292
 293 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
 294 {
 295     const int(*f_deadzone)[4][4][2] = b_intra ? &f_deadzone_intra : &f_deadzone_inter;
 296     const int i_qbits = 15 + i_qscale / 6;
 297     const int i_mf = i_qscale % 6;
 298
 299     int x,y;
 300     for( y = 0; y < 4; y++ )
 301     {
 302         for( x = 0; x < 4; x++ )
 303         {
 304 #if 0
 305             const int f = b_intra ?
 306                           (f_deadzone_intra[y][x][0] * ( 1 << i_qbits ) / f_deadzone_intra[y][x][1])
 307                           :
 308                           (f_deadzone_inter[y][x][0] * ( 1 << i_qbits ) / f_deadzone_inter[y][x][1]);
 309 #else
 310             const int f = (*f_deadzone)[y][x][0] * ( 1 << i_qbits ) / (*f_deadzone)[y][x][1];
 311 #endif
 312
 313             if( dct[y][x] > 0 )
 314             {
 315                 dct[y][x] =( f + dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits;
 316             }
 317             else
 318             {
 319                 dct[y][x] = - ( ( f - dct[y][x]  * quant_mf[i_mf][y][x] ) >> i_qbits );
 320             }
 321         }
 322     }
 323 }
 324
 325 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
 326 {
 327     const int i_qbits = 15 + i_qscale / 6;
 328     const int i_qmf = quant_mf[i_qscale%6][0][0];
 329     const int f2 = f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1];
 330     int x,y;
 331
 332     for( y = 0; y < 4; y++ )
 333     {
 334         for( x = 0; x < 4; x++ )
 335         {
 336
 337             if( dct[y][x] > 0 )
 338             {
 339                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 340             }
 341             else
 342             {
 343                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 344             }
 345         }
 346     }
 347 }
 348
 349 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
 350 {
 351     int const i_qbits = 15 + i_qscale / 6;
 352     const int i_qmf = quant_mf[i_qscale%6][0][0];
 353     const int f2 = b_intra ?
 354                    (f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1])
 355                    :
 356                    (f_deadzone_inter[0][0][0] * ( 2 << i_qbits ) / f_deadzone_inter[0][0][1]);
 357     int x,y;
 358     for( y = 0; y < 2; y++ )
 359     {
 360         for( x = 0; x < 2; x++ )
 361         {
 362             if( dct[y][x] > 0 )
 363             {
 364                 dct[y][x] =( f2 + dct[y][x]  * i_qmf) >> ( 1 + i_qbits );
 365             }
 366             else
 367             {
 368                 dct[y][x] = - ( ( f2 - dct[y][x]  * i_qmf ) >> (1 + i_qbits ) );
 369             }
 370         }
 371     }
 372 }
 373
 374
 375 #endif
 376
 377 /* (ref: JVT-B118)
 378  * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
 379  * to 0 (low score means set it to null)
 380  * Used in inter macroblock (luma and chroma)
 381  *  luma: for a 8x8 block: if score < 4 -> null
 382  *        for the complete mb: if score < 6 -> null
 383  *  chroma: for the complete mb: if score < 7 -> null
 384  */
 385 static int x264_mb_decimate_score( int *dct, int i_max )
 386 {
 387     static const int i_ds_table4[16] = {
 388         3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
 389     static const int i_ds_table8[64] = {
 390         3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
 391         1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
 392         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 393         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
 394
 395     const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
 396     int i_score = 0;
 397     int idx = i_max - 1;
 398
 399     while( idx >= 0 && dct[idx] == 0 )
 400         idx--;
 401
 402     while( idx >= 0 )
 403     {
 404         int i_run;
 405
 406         if( abs( dct[idx--] ) > 1 )
 407             return 9;
 408
 409         i_run = 0;
 410         while( idx >= 0 && dct[idx] == 0 )
 411         {
 412             idx--;
 413             i_run++;
 414         }
 415         i_score += ds_table[i_run];
 416     }
 417
 418     return i_score;
 419 }
 420
 421 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
 422 {
 423     const int i_stride = h->mb.pic.i_stride[0];
 424     const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
 425     uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
 426     uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
 427     int16_t dct4x4[4][4];
 428
 429     if( h->mb.b_lossless )
 430     {
 431         sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst, i_stride );
 432         return;
 433     }
 434
 435     h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 436     quant_4x4( dct4x4, i_qscale, 1 );
 437     scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
 438     x264_mb_dequant_4x4( dct4x4, i_qscale );
 439
 440     /* output samples to fdec */
 441     h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
 442 }
 443
 444 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
 445 {
 446     const int i_stride = h->mb.pic.i_stride[0];
 447     const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
 448     uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
 449     uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
 450     int16_t dct8x8[8][8];
 451
 452     h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
 453     quant_8x8( dct8x8, i_qscale, 1 );
 454     scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
 455     x264_mb_dequant_8x8( dct8x8, i_qscale );
 456     h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
 457 }
 458
 459 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
 460 {
 461     const int i_stride = h->mb.pic.i_stride[0];
 462     uint8_t  *p_src = h->mb.pic.p_fenc[0];
 463     uint8_t  *p_dst = h->mb.pic.p_fdec[0];
 464
 465     int16_t dct4x4[16+1][4][4];
 466
 467     int i;
 468
 469     if( h->mb.b_lossless )
 470     {
 471         for( i = 0; i < 16; i++ )
 472         {
 473             int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
 474             sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+o, p_dst+o, i_stride );
 475             dct4x4[0][block_idx_y[i]][block_idx_x[i]] = p_src[o] - p_dst[o];
 476             p_dst[o] = p_src[o];
 477         }
 478         scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
 479         return;
 480     }
 481
 482     h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
 483     for( i = 0; i < 16; i++ )
 484     {
 485         /* copy dc coeff */
 486         dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
 487
 488         /* quant/scan/dequant */
 489         quant_4x4( dct4x4[1+i], i_qscale, 1 );
 490         scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
 491         x264_mb_dequant_4x4( dct4x4[1+i], i_qscale );
 492     }
 493
 494     h->dctf.dct4x4dc( dct4x4[0] );
 495     quant_4x4_dc( dct4x4[0], i_qscale );
 496     scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
 497
 498     /* output samples to fdec */
 499     h->dctf.idct4x4dc( dct4x4[0] );
 500     x264_mb_dequant_4x4_dc( dct4x4[0], i_qscale );  /* XXX not inversed */
 501
 502     /* calculate dct coeffs */
 503     for( i = 0; i < 16; i++ )
 504     {
 505         /* copy dc coeff */
 506         dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
 507     }
 508     /* put pixels to fdec */
 509     h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
 510 }
 511
 512 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
 513 {
 514     int i, ch;
 515
 516     for( ch = 0; ch < 2; ch++ )
 517     {
 518         const int i_stride = h->mb.pic.i_stride[1+ch];
 519         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 520         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 521         int i_decimate_score = 0;
 522
 523         int16_t dct2x2[2][2];
 524         int16_t dct4x4[4][4][4];
 525
 526         if( h->mb.b_lossless )
 527         {
 528             for( i = 0; i < 4; i++ )
 529             {
 530                 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
 531                 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+o, p_dst+o, i_stride );
 532                 h->dct.chroma_dc[ch][i] = p_src[o] - p_dst[o];
 533                 p_dst[o] = p_src[o];
 534             }
 535             continue;
 536         }
 537
 538         h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 539         /* calculate dct coeffs */
 540         for( i = 0; i < 4; i++ )
 541         {
 542             /* copy dc coeff */
 543             dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
 544
 545             quant_4x4( dct4x4[i], i_qscale, b_inter ? 0 : 1 );
 546             scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
 547             x264_mb_dequant_4x4( dct4x4[i], i_qscale );
 548
 549             if( b_inter )
 550             {
 551                 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
 552             }
 553         }
 554
 555         h->dctf.dct2x2dc( dct2x2 );
 556         quant_2x2_dc( dct2x2, i_qscale, b_inter ? 0 : 1 );
 557         scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
 558
 559         /* output samples to fdec */
 560         h->dctf.idct2x2dc( dct2x2 );
 561         x264_mb_dequant_2x2_dc( dct2x2, i_qscale );  /* XXX not inversed */
 562
 563         if( b_inter && i_decimate_score < 7 )
 564         {
 565             /* Near null chroma 8x8 block so make it null (bits saving) */
 566             for( i = 0; i < 4; i++ )
 567             {
 568                 int x, y;
 569                 for( x = 0; x < 15; x++ )
 570                 {
 571                     h->dct.block[16+i+ch*4].residual_ac[x] = 0;
 572                 }
 573                 for( x = 0; x < 4; x++ )
 574                 {
 575                     for( y = 0; y < 4; y++ )
 576                     {
 577                         dct4x4[i][x][y] = 0;
 578                     }
 579                 }
 580             }
 581         }
 582
 583         /* calculate dct coeffs */
 584         for( i = 0; i < 4; i++ )
 585         {
 586             /* copy dc coeff */
 587             dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
 588         }
 589         h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
 590     }
 591 }
 592
 593 static void x264_macroblock_encode_skip( x264_t *h )
 594 {
 595     int i;
 596     h->mb.i_cbp_luma = 0x00;
 597     h->mb.i_cbp_chroma = 0x00;
 598
 599     for( i = 0; i < 16+8; i++ )
 600     {
 601         h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
 602     }
 603
 604     /* store cbp */
 605     h->mb.cbp[h->mb.i_mb_xy] = 0;
 606 }
 607
 608 /*****************************************************************************
 609  * x264_macroblock_encode_pskip:
 610  *  Encode an already marked skip block
 611  *****************************************************************************/
 612 void x264_macroblock_encode_pskip( x264_t *h )
 613 {
 614     const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
 615                                 h->mb.mv_min[0], h->mb.mv_max[0] );
 616     const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
 617                                 h->mb.mv_min[1], h->mb.mv_max[1] );
 618
 619     /* Motion compensation XXX probably unneeded */
 620     h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
 621                     h->mb.pic.p_fdec[0],       h->mb.pic.i_stride[0],
 622                     mvx, mvy, 16, 16 );
 623
 624     /* Chroma MC */
 625     h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
 626                       h->mb.pic.p_fdec[1],       h->mb.pic.i_stride[1],
 627                       mvx, mvy, 8, 8 );
 628
 629     h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
 630                       h->mb.pic.p_fdec[2],       h->mb.pic.i_stride[2],
 631                       mvx, mvy, 8, 8 );
 632
 633     x264_macroblock_encode_skip( h );
 634 }
 635
 636 /*****************************************************************************
 637  * x264_macroblock_encode:
 638  *****************************************************************************/
 639 void x264_macroblock_encode( x264_t *h )
 640 {
 641     int i_cbp_dc = 0;
 642     int i_qp = h->mb.i_qp;
 643     int i;
 644
 645     if( h->mb.i_type == P_SKIP )
 646     {
 647         /* A bit special */
 648         x264_macroblock_encode_pskip( h );
 649         return;
 650     }
 651     if( h->mb.i_type == B_SKIP )
 652     {
 653         /* XXX motion compensation is probably unneeded */
 654         x264_mb_mc( h );
 655         x264_macroblock_encode_skip( h );
 656         return;
 657     }
 658
 659     if( h->mb.i_type == I_16x16 )
 660     {
 661         const int i_mode = h->mb.i_intra16x16_pred_mode;
 662         h->mb.b_transform_8x8 = 0;
 663         /* do the right prediction */
 664         h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 665
 666         /* encode the 16x16 macroblock */
 667         x264_mb_encode_i16x16( h, i_qp );
 668
 669         /* fix the pred mode value */
 670         h->mb.i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix[i_mode];
 671     }
 672     else if( h->mb.i_type == I_8x8 )
 673     {
 674         h->mb.b_transform_8x8 = 1;
 675         for( i = 0; i < 4; i++ )
 676         {
 677             const int i_dst = h->mb.pic.i_stride[0];
 678             uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
 679             int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
 680
 681             h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
 682             x264_mb_encode_i8x8( h, i, i_qp );
 683             h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]] = x264_mb_pred_mode4x4_fix(i_mode);
 684         }
 685     }
 686     else if( h->mb.i_type == I_4x4 )
 687     {
 688         h->mb.b_transform_8x8 = 0;
 689         for( i = 0; i < 16; i++ )
 690         {
 691             const int i_dst = h->mb.pic.i_stride[0];
 692             uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
 693             int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
 694
 695             h->predict_4x4[i_mode]( p_dst, i_dst );
 696             x264_mb_encode_i4x4( h, i, i_qp );
 697             h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix(i_mode);
 698         }
 699     }
 700     else    /* Inter MB */
 701     {
 702         int i8x8, i4x4, idx;
 703         int i_decimate_mb = 0;
 704
 705         /* Motion compensation */
 706         x264_mb_mc( h );
 707
 708         if( h->mb.b_lossless )
 709         {
 710             for( i4x4 = 0; i4x4 < 16; i4x4++ )
 711             {
 712                 int o = block_idx_x[i4x4]*4 + block_idx_y[i4x4]*4 * h->mb.pic.i_stride[0];
 713                 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4, h->mb.pic.p_fenc[0]+o, h->mb.pic.p_fdec[0]+o, h->mb.pic.i_stride[0] );
 714             }
 715         }
 716         else if( h->mb.b_transform_8x8 )
 717         {
 718             int16_t dct8x8[4][8][8];
 719             h->dctf.sub16x16_dct8( dct8x8,
 720                                    h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 721                                    h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 722
 723             for( idx = 0; idx < 4; idx++ )
 724             {
 725                 int i_decimate_8x8;
 726
 727                 quant_8x8( dct8x8[idx], i_qp, 0 );
 728                 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
 729                 x264_mb_dequant_8x8( dct8x8[idx], i_qp );
 730
 731                 i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
 732                 i_decimate_mb += i_decimate_8x8;
 733                 if( i_decimate_8x8 < 4 )
 734                 {
 735                     memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
 736                     memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
 737                 }
 738             }
 739
 740             if( i_decimate_mb < 6 )
 741                 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
 742             else
 743                 h->dctf.add16x16_idct8( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct8x8 );
 744         }
 745         else
 746         {
 747             int16_t dct4x4[16][4][4];
 748             h->dctf.sub16x16_dct( dct4x4,
 749                                   h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 750                                   h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 751
 752             for( i8x8 = 0; i8x8 < 4; i8x8++ )
 753             {
 754                 int i_decimate_8x8;
 755
 756                 /* encode one 4x4 block */
 757                 i_decimate_8x8 = 0;
 758                 for( i4x4 = 0; i4x4 < 4; i4x4++ )
 759                 {
 760                     idx = i8x8 * 4 + i4x4;
 761
 762                     quant_4x4( dct4x4[idx], i_qp, 0 );
 763                     scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
 764                     x264_mb_dequant_4x4( dct4x4[idx], i_qp );
 765
 766                     i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
 767                 }
 768
 769                 /* decimate this 8x8 block */
 770                 i_decimate_mb += i_decimate_8x8;
 771                 if( i_decimate_8x8 < 4 )
 772                 {
 773                     for( i4x4 = 0; i4x4 < 4; i4x4++ )
 774                     {
 775                         int x, y;
 776                         idx = i8x8 * 4 + i4x4;
 777                         for( i = 0; i < 16; i++ )
 778                             h->dct.block[idx].luma4x4[i] = 0;
 779                         for( x = 0; x < 4; x++ )
 780                             for( y = 0; y < 4; y++ )
 781                                 dct4x4[idx][x][y] = 0;
 782                     }
 783                 }
 784             }
 785
 786             if( i_decimate_mb < 6 )
 787                 for( idx = 0; idx < 16; idx++ )
 788                     for( i = 0; i < 16; i++ )
 789                         h->dct.block[idx].luma4x4[i] = 0;
 790             else
 791                 h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
 792         }
 793     }
 794
 795     /* encode chroma */
 796     i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 797     if( IS_INTRA( h->mb.i_type ) )
 798     {
 799         const int i_mode = h->mb.i_chroma_pred_mode;
 800         h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
 801         h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
 802     }
 803
 804     /* encode the 8x8 blocks */
 805     x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
 806
 807     /* Calculate the Luma/Chroma patern and non_zero_count */
 808     h->mb.i_cbp_luma = 0x00;
 809     if( h->mb.i_type == I_16x16 )
 810     {
 811         for( i = 0; i < 16; i++ )
 812         {
 813             const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
 814             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 815             if( nz > 0 )
 816                 h->mb.i_cbp_luma = 0x0f;
 817         }
 818     }
 819     else if( h->mb.b_transform_8x8 )
 820     {
 821         /* coded_block_flag is enough for CABAC.
 822          * the full non_zero_count is done only in CAVLC. */
 823         for( i = 0; i < 4; i++ )
 824         {
 825             const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
 826             int j;
 827             for( j = 0; j < 4; j++ )
 828                 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
 829             if( nz > 0 )
 830                 h->mb.i_cbp_luma |= 1 << i;
 831         }
 832     }
 833     else
 834     {
 835         for( i = 0; i < 16; i++ )
 836         {
 837             const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
 838             h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
 839             if( nz > 0 )
 840                 h->mb.i_cbp_luma |= 1 << (i/4);
 841         }
 842     }
 843
 844     /* Calculate the chroma patern */
 845     h->mb.i_cbp_chroma = 0x00;
 846     for( i = 0; i < 8; i++ )
 847     {
 848         const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
 849         h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
 850         if( nz > 0 )
 851         {
 852             h->mb.i_cbp_chroma = 0x02;    /* dc+ac (we can't do only ac) */
 853         }
 854     }
 855     if( h->mb.i_cbp_chroma == 0x00 &&
 856         ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
 857     {
 858         h->mb.i_cbp_chroma = 0x01;    /* dc only */
 859     }
 860
 861     if( h->param.b_cabac )
 862     {
 863         if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
 864             i_cbp_dc = 0x01;
 865         else
 866             i_cbp_dc = 0x00;
 867
 868         if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
 869             i_cbp_dc |= 0x02;
 870         if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
 871             i_cbp_dc |= 0x04;
 872     }
 873
 874     /* store cbp */
 875     h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
 876
 877     if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
 878     {
 879         /* It won'y change anything at the decoder side but it is needed else the
 880          * decoder will fail to read the next QP */
 881         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;
 882     }
 883
 884
 885     /* Check for P_SKIP
 886      * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
 887      *      (if multiple mv give same result)*/
 888     if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
 889         h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
 890         h->mb.cache.ref[0][x264_scan8[0]] == 0 )
 891     {
 892         int mvp[2];
 893
 894         x264_mb_predict_mv_pskip( h, mvp );
 895         if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
 896             h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
 897         {
 898             h->mb.i_type = P_SKIP;
 899             h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;  /* Needed */
 900             /* XXX qp reset may have issues when used in RD instead of the real encode */
 901         }
 902     }
 903
 904     /* Check for B_SKIP */
 905     if( h->mb.i_type == B_DIRECT &&
 906         h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
 907     {
 908         h->mb.i_type = B_SKIP;
 909         h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;  /* Needed */
 910     }
 911
 912     if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
 913         h->mb.b_transform_8x8 = 0;
 914 }
 915
 916 /*****************************************************************************
 917  * x264_macroblock_probe_skip:
 918  *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
 919  *  the previous QP
 920  *****************************************************************************/
 921 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
 922 {
 923     DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
 924     DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
 925     DECLARE_ALIGNED( int,     dctscan[16], 16 );
 926
 927     int i_qp = h->mb.i_qp;
 928     int mvp[2];
 929     int ch;
 930
 931     int i8x8, i4x4;
 932     int i_decimate_mb;
 933
 934     if( !b_bidir )
 935     {
 936         /* Get the MV */
 937         x264_mb_predict_mv_pskip( h, mvp );
 938         mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
 939         mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
 940
 941         /* Motion compensation */
 942         h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
 943                         h->mb.pic.p_fdec[0],   h->mb.pic.i_stride[0],
 944                         mvp[0], mvp[1], 16, 16 );
 945     }
 946
 947     /* get luma diff */
 948     h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
 949                                   h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
 950
 951     for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
 952     {
 953         /* encode one 4x4 block */
 954         for( i4x4 = 0; i4x4 < 4; i4x4++ )
 955         {
 956             const int idx = i8x8 * 4 + i4x4;
 957
 958             quant_4x4( dct4x4[idx], i_qp, 0 );
 959             scan_zigzag_4x4full( dctscan, dct4x4[idx] );
 960
 961             i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
 962
 963             if( i_decimate_mb >= 6 )
 964             {
 965                 /* not as P_SKIP */
 966                 return 0;
 967             }
 968         }
 969     }
 970
 971     /* encode chroma */
 972     i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 973
 974     for( ch = 0; ch < 2; ch++ )
 975     {
 976         const int i_stride = h->mb.pic.i_stride[1+ch];
 977         uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
 978         uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
 979
 980         if( !b_bidir )
 981         {
 982             h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
 983                               h->mb.pic.p_fdec[1+ch],       i_stride,
 984                               mvp[0], mvp[1], 8, 8 );
 985         }
 986
 987         h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
 988
 989         /* calculate dct DC */
 990         dct2x2[0][0] = dct4x4[0][0][0];
 991         dct2x2[0][1] = dct4x4[1][0][0];
 992         dct2x2[1][0] = dct4x4[2][0][0];
 993         dct2x2[1][1] = dct4x4[3][0][0];
 994         h->dctf.dct2x2dc( dct2x2 );
 995         quant_2x2_dc( dct2x2, i_qp, 0 );
 996         if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1]  )
 997         {
 998             /* can't be */
 999             return 0;
1000         }
1001
1002         /* calculate dct coeffs */
1003         for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
1004         {
1005             quant_4x4( dct4x4[i4x4], i_qp, 0 );
1006             scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
1007
1008             i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
1009             if( i_decimate_mb >= 7 )
1010             {
1011                 return 0;
1012             }
1013         }
1014     }
1015
1016     return 1;
1017 }