1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
28 #include "common/common.h"
29 #include "macroblock.h"
32 /* def_quant4_mf only for probe_skip; actual encoding uses matrices from set.c */
33 /* FIXME this seems to make better decisions with cqm=jvt, but could screw up
34 * with general custom matrices. */
35 static const int def_quant4_mf[6][4][4] =
37 { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
38 { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
39 { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
40 { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
41 { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
42 { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
43 { { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 },
44 { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 } },
45 { { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 },
46 { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 } },
47 { { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 },
48 { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 } }
51 /****************************************************************************
52 * Scan and Quant functions
53 ****************************************************************************/
54 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
55 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
57 #define ZIG(i,y,x) level[i] = dct[y][x];
58 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
60 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
61 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
62 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
63 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
64 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
65 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
66 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
67 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
68 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
69 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
70 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
71 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
72 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
73 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
74 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
75 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
77 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
79 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
80 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
81 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
82 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
84 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
86 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
87 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
88 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
89 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
91 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
100 #define ZIG(i,y,x) {\
101 int o = x+y*i_stride;\
102 level[i] = p_src[o] - p_dst[o];\
103 p_dst[o] = p_src[o];\
105 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
107 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
108 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
109 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
110 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
112 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
114 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
115 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
116 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
117 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
121 static void quant_8x8( x264_t *h, int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
123 const int i_qbits = 16 + i_qscale / 6;
124 const int i_mf = i_qscale % 6;
125 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
126 h->quantf.quant_8x8_core( dct, quant_mf[i_mf], i_qbits, f );
128 static void quant_4x4( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
130 const int i_qbits = 15 + i_qscale / 6;
131 const int i_mf = i_qscale % 6;
132 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
133 h->quantf.quant_4x4_core( dct, quant_mf[i_mf], i_qbits, f );
135 static void quant_4x4_dc( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
137 const int i_qbits = 16 + i_qscale / 6;
138 const int i_mf = i_qscale % 6;
139 const int f = ( 1 << i_qbits ) / 3;
140 h->quantf.quant_4x4_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
142 static void quant_2x2_dc( x264_t *h, int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
144 const int i_qbits = 16 + i_qscale / 6;
145 const int i_mf = i_qscale % 6;
146 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
147 h->quantf.quant_2x2_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
151 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
152 * to 0 (low score means set it to null)
153 * Used in inter macroblock (luma and chroma)
154 * luma: for a 8x8 block: if score < 4 -> null
155 * for the complete mb: if score < 6 -> null
156 * chroma: for the complete mb: if score < 7 -> null
158 static int x264_mb_decimate_score( int *dct, int i_max )
160 static const int i_ds_table4[16] = {
161 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
162 static const int i_ds_table8[64] = {
163 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
164 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
165 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
166 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
168 const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
172 while( idx >= 0 && dct[idx] == 0 )
179 if( abs( dct[idx--] ) > 1 )
183 while( idx >= 0 && dct[idx] == 0 )
188 i_score += ds_table[i_run];
194 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
196 const int i_stride = h->mb.pic.i_stride[0];
197 const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
198 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
199 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
200 int16_t dct4x4[4][4];
202 if( h->mb.b_lossless )
204 sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst, i_stride );
208 h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
209 quant_4x4( h, dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
210 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
211 x264_mb_dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
213 /* output samples to fdec */
214 h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
217 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
219 const int i_stride = h->mb.pic.i_stride[0];
220 const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
221 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
222 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
223 int16_t dct8x8[8][8];
225 h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
226 quant_8x8( h, dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
227 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
228 x264_mb_dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
229 h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
232 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
234 const int i_stride = h->mb.pic.i_stride[0];
235 uint8_t *p_src = h->mb.pic.p_fenc[0];
236 uint8_t *p_dst = h->mb.pic.p_fdec[0];
238 int16_t dct4x4[16+1][4][4];
242 if( h->mb.b_lossless )
244 for( i = 0; i < 16; i++ )
246 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
247 sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+o, p_dst+o, i_stride );
248 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = p_src[o] - p_dst[o];
251 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
255 h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
256 for( i = 0; i < 16; i++ )
259 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
261 /* quant/scan/dequant */
262 quant_4x4( h, dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
263 scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
264 x264_mb_dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
267 h->dctf.dct4x4dc( dct4x4[0] );
268 quant_4x4_dc( h, dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
269 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
271 /* output samples to fdec */
272 h->dctf.idct4x4dc( dct4x4[0] );
273 x264_mb_dequant_4x4_dc( dct4x4[0], h->dequant4_mf[CQM_4IY], i_qscale ); /* XXX not inversed */
275 /* calculate dct coeffs */
276 for( i = 0; i < 16; i++ )
279 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
281 /* put pixels to fdec */
282 h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
285 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
289 for( ch = 0; ch < 2; ch++ )
291 const int i_stride = h->mb.pic.i_stride[1+ch];
292 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
293 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
294 int i_decimate_score = 0;
296 int16_t dct2x2[2][2];
297 int16_t dct4x4[4][4][4];
299 if( h->mb.b_lossless )
301 for( i = 0; i < 4; i++ )
303 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
304 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+o, p_dst+o, i_stride );
305 h->dct.chroma_dc[ch][i] = p_src[o] - p_dst[o];
311 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
312 /* calculate dct coeffs */
313 for( i = 0; i < 4; i++ )
316 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
318 quant_4x4( h, dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
319 scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
320 x264_mb_dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
324 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
328 h->dctf.dct2x2dc( dct2x2 );
329 quant_2x2_dc( h, dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
330 scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
332 /* output samples to fdec */
333 h->dctf.idct2x2dc( dct2x2 );
334 x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); /* XXX not inversed */
336 if( b_inter && i_decimate_score < 7 )
338 /* Near null chroma 8x8 block so make it null (bits saving) */
339 for( i = 0; i < 4; i++ )
342 for( x = 0; x < 15; x++ )
344 h->dct.block[16+i+ch*4].residual_ac[x] = 0;
346 for( x = 0; x < 4; x++ )
348 for( y = 0; y < 4; y++ )
356 /* calculate dct coeffs */
357 for( i = 0; i < 4; i++ )
360 dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
362 h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
366 static void x264_macroblock_encode_skip( x264_t *h )
369 h->mb.i_cbp_luma = 0x00;
370 h->mb.i_cbp_chroma = 0x00;
372 for( i = 0; i < 16+8; i++ )
374 h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
378 h->mb.cbp[h->mb.i_mb_xy] = 0;
381 /*****************************************************************************
382 * x264_macroblock_encode_pskip:
383 * Encode an already marked skip block
384 *****************************************************************************/
385 void x264_macroblock_encode_pskip( x264_t *h )
387 const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
388 h->mb.mv_min[0], h->mb.mv_max[0] );
389 const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
390 h->mb.mv_min[1], h->mb.mv_max[1] );
392 /* Motion compensation XXX probably unneeded */
393 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
394 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
398 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
399 h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1],
402 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
403 h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
406 x264_macroblock_encode_skip( h );
409 /*****************************************************************************
410 * x264_macroblock_encode:
411 *****************************************************************************/
412 void x264_macroblock_encode( x264_t *h )
415 int i_qp = h->mb.i_qp;
418 if( h->mb.i_type == P_SKIP )
421 x264_macroblock_encode_pskip( h );
424 if( h->mb.i_type == B_SKIP )
426 /* XXX motion compensation is probably unneeded */
428 x264_macroblock_encode_skip( h );
432 if( h->mb.i_type == I_16x16 )
434 const int i_mode = h->mb.i_intra16x16_pred_mode;
435 h->mb.b_transform_8x8 = 0;
436 /* do the right prediction */
437 h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
439 /* encode the 16x16 macroblock */
440 x264_mb_encode_i16x16( h, i_qp );
442 else if( h->mb.i_type == I_8x8 )
444 h->mb.b_transform_8x8 = 1;
445 for( i = 0; i < 4; i++ )
447 const int i_dst = h->mb.pic.i_stride[0];
448 uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
449 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
451 h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
452 x264_mb_encode_i8x8( h, i, i_qp );
455 else if( h->mb.i_type == I_4x4 )
457 h->mb.b_transform_8x8 = 0;
458 for( i = 0; i < 16; i++ )
460 const int i_dst = h->mb.pic.i_stride[0];
461 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
462 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
464 if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
465 /* emulate missing topright samples */
466 *(uint32_t*) &p_dst[4 - i_dst] = p_dst[3 - i_dst] * 0x01010101U;
468 h->predict_4x4[i_mode]( p_dst, i_dst );
469 x264_mb_encode_i4x4( h, i, i_qp );
475 int i_decimate_mb = 0;
477 /* Motion compensation */
480 if( h->mb.b_lossless )
482 for( i4x4 = 0; i4x4 < 16; i4x4++ )
484 int o = block_idx_x[i4x4]*4 + block_idx_y[i4x4]*4 * h->mb.pic.i_stride[0];
485 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4, h->mb.pic.p_fenc[0]+o, h->mb.pic.p_fdec[0]+o, h->mb.pic.i_stride[0] );
488 else if( h->mb.b_transform_8x8 )
490 int16_t dct8x8[4][8][8];
491 h->dctf.sub16x16_dct8( dct8x8,
492 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
493 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
495 for( idx = 0; idx < 4; idx++ )
499 quant_8x8( h, dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
500 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
501 x264_mb_dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
503 i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
504 i_decimate_mb += i_decimate_8x8;
505 if( i_decimate_8x8 < 4 )
507 memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
508 memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
512 if( i_decimate_mb < 6 )
513 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
515 h->dctf.add16x16_idct8( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct8x8 );
519 int16_t dct4x4[16][4][4];
520 h->dctf.sub16x16_dct( dct4x4,
521 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
522 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
524 for( i8x8 = 0; i8x8 < 4; i8x8++ )
528 /* encode one 4x4 block */
530 for( i4x4 = 0; i4x4 < 4; i4x4++ )
532 idx = i8x8 * 4 + i4x4;
534 quant_4x4( h, dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
535 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
536 x264_mb_dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
538 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
541 /* decimate this 8x8 block */
542 i_decimate_mb += i_decimate_8x8;
543 if( i_decimate_8x8 < 4 )
545 for( i4x4 = 0; i4x4 < 4; i4x4++ )
548 idx = i8x8 * 4 + i4x4;
549 for( i = 0; i < 16; i++ )
550 h->dct.block[idx].luma4x4[i] = 0;
551 for( x = 0; x < 4; x++ )
552 for( y = 0; y < 4; y++ )
553 dct4x4[idx][x][y] = 0;
558 if( i_decimate_mb < 6 )
559 for( idx = 0; idx < 16; idx++ )
560 for( i = 0; i < 16; i++ )
561 h->dct.block[idx].luma4x4[i] = 0;
563 h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
568 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
569 if( IS_INTRA( h->mb.i_type ) )
571 const int i_mode = h->mb.i_chroma_pred_mode;
572 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
573 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
576 /* encode the 8x8 blocks */
577 x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
579 /* Calculate the Luma/Chroma patern and non_zero_count */
580 h->mb.i_cbp_luma = 0x00;
581 if( h->mb.i_type == I_16x16 )
583 for( i = 0; i < 16; i++ )
585 const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
586 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
588 h->mb.i_cbp_luma = 0x0f;
591 else if( h->mb.b_transform_8x8 )
593 /* coded_block_flag is enough for CABAC.
594 * the full non_zero_count is done only in CAVLC. */
595 for( i = 0; i < 4; i++ )
597 const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
599 for( j = 0; j < 4; j++ )
600 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
602 h->mb.i_cbp_luma |= 1 << i;
607 for( i = 0; i < 16; i++ )
609 const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
610 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
612 h->mb.i_cbp_luma |= 1 << (i/4);
616 /* Calculate the chroma patern */
617 h->mb.i_cbp_chroma = 0x00;
618 for( i = 0; i < 8; i++ )
620 const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
621 h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
624 h->mb.i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
627 if( h->mb.i_cbp_chroma == 0x00 &&
628 ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
630 h->mb.i_cbp_chroma = 0x01; /* dc only */
633 if( h->param.b_cabac )
635 if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
640 if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
642 if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
647 h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
650 * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
651 * (if multiple mv give same result)*/
652 if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
653 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
654 h->mb.cache.ref[0][x264_scan8[0]] == 0 )
658 x264_mb_predict_mv_pskip( h, mvp );
659 if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
660 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
662 h->mb.i_type = P_SKIP;
666 /* Check for B_SKIP */
667 if( h->mb.i_type == B_DIRECT &&
668 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
670 h->mb.i_type = B_SKIP;
674 /*****************************************************************************
675 * x264_macroblock_probe_skip:
676 * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
678 *****************************************************************************/
679 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
681 DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
682 DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
683 DECLARE_ALIGNED( int, dctscan[16], 16 );
685 int i_qp = h->mb.i_qp;
695 x264_mb_predict_mv_pskip( h, mvp );
696 mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
697 mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
699 /* Motion compensation */
700 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
701 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
702 mvp[0], mvp[1], 16, 16 );
706 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
707 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
709 for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
711 /* encode one 4x4 block */
712 for( i4x4 = 0; i4x4 < 4; i4x4++ )
714 const int idx = i8x8 * 4 + i4x4;
716 quant_4x4( h, dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
717 scan_zigzag_4x4full( dctscan, dct4x4[idx] );
719 i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
721 if( i_decimate_mb >= 6 )
730 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
732 for( ch = 0; ch < 2; ch++ )
734 const int i_stride = h->mb.pic.i_stride[1+ch];
735 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
736 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
740 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
741 h->mb.pic.p_fdec[1+ch], i_stride,
742 mvp[0], mvp[1], 8, 8 );
745 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
747 /* calculate dct DC */
748 dct2x2[0][0] = dct4x4[0][0][0];
749 dct2x2[0][1] = dct4x4[1][0][0];
750 dct2x2[1][0] = dct4x4[2][0][0];
751 dct2x2[1][1] = dct4x4[3][0][0];
752 h->dctf.dct2x2dc( dct2x2 );
753 quant_2x2_dc( h, dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
754 if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
760 /* calculate dct coeffs */
761 for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
763 quant_4x4( h, dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
764 scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
766 i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
767 if( i_decimate_mb >= 7 )