1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
28 #include "common/common.h"
29 #include "macroblock.h"
32 /* def_quant4_mf only for probe_skip; actual encoding uses matrices from set.c */
33 /* FIXME this seems to make better decisions with cqm=jvt, but could screw up
34 * with general custom matrices. */
35 static const int def_quant4_mf[6][4][4] =
37 { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
38 { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
39 { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
40 { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
41 { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
42 { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
43 { { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 },
44 { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 } },
45 { { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 },
46 { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 } },
47 { { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 },
48 { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 } }
51 /****************************************************************************
52 * Scan and Quant functions
53 ****************************************************************************/
54 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
55 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
57 #define ZIG(i,y,x) level[i] = dct[y][x];
58 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
60 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
61 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
62 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
63 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
64 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
65 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
66 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
67 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
68 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
69 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
70 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
71 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
72 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
73 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
74 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
75 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
77 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
79 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
80 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
81 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
82 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
84 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
86 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
87 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
88 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
89 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
91 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
100 #define ZIG(i,y,x) {\
101 int o = x+y*i_stride;\
102 level[i] = p_src[o] - p_dst[o];\
103 p_dst[o] = p_src[o];\
105 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
107 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
108 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
109 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
110 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
112 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
114 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
115 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
116 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
117 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
121 static void quant_8x8( x264_t *h, int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
123 const int i_qbits = 16 + i_qscale / 6;
124 const int i_mf = i_qscale % 6;
125 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
126 h->quantf.quant_8x8_core( dct, quant_mf[i_mf], i_qbits, f );
128 static void quant_4x4( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
130 const int i_qbits = 15 + i_qscale / 6;
131 const int i_mf = i_qscale % 6;
132 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
133 h->quantf.quant_4x4_core( dct, quant_mf[i_mf], i_qbits, f );
135 static void quant_4x4_dc( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
137 const int i_qbits = 16 + i_qscale / 6;
138 const int i_mf = i_qscale % 6;
139 const int f = ( 1 << i_qbits ) / 3;
140 h->quantf.quant_4x4_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
142 static void quant_2x2_dc( x264_t *h, int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
144 const int i_qbits = 16 + i_qscale / 6;
145 const int i_mf = i_qscale % 6;
146 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
147 h->quantf.quant_2x2_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
151 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
152 * to 0 (low score means set it to null)
153 * Used in inter macroblock (luma and chroma)
154 * luma: for a 8x8 block: if score < 4 -> null
155 * for the complete mb: if score < 6 -> null
156 * chroma: for the complete mb: if score < 7 -> null
158 static int x264_mb_decimate_score( int *dct, int i_max )
160 static const int i_ds_table4[16] = {
161 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
162 static const int i_ds_table8[64] = {
163 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
164 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
165 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
166 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
168 const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
172 while( idx >= 0 && dct[idx] == 0 )
179 if( abs( dct[idx--] ) > 1 )
183 while( idx >= 0 && dct[idx] == 0 )
188 i_score += ds_table[i_run];
194 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
196 const int i_stride = h->mb.pic.i_stride[0];
197 const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
198 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
199 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
200 int16_t dct4x4[4][4];
202 if( h->mb.b_lossless )
204 sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst, i_stride );
208 h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
210 if( h->mb.b_trellis )
211 x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 );
213 quant_4x4( h, dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
215 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
216 x264_mb_dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
218 /* output samples to fdec */
219 h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
222 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
224 const int i_stride = h->mb.pic.i_stride[0];
225 const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
226 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
227 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
228 int16_t dct8x8[8][8];
230 h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
232 if( h->mb.b_trellis )
233 x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 );
235 quant_8x8( h, dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
237 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
238 x264_mb_dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
239 h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
242 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
244 const int i_stride = h->mb.pic.i_stride[0];
245 uint8_t *p_src = h->mb.pic.p_fenc[0];
246 uint8_t *p_dst = h->mb.pic.p_fdec[0];
248 int16_t dct4x4[16+1][4][4];
252 if( h->mb.b_lossless )
254 for( i = 0; i < 16; i++ )
256 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
257 sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+o, p_dst+o, i_stride );
258 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = p_src[o] - p_dst[o];
261 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
265 h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
266 for( i = 0; i < 16; i++ )
269 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
271 /* quant/scan/dequant */
272 if( h->mb.b_trellis )
273 x264_quant_4x4_trellis( h, dct4x4[1+i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 );
275 quant_4x4( h, dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
277 scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
278 x264_mb_dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
281 h->dctf.dct4x4dc( dct4x4[0] );
282 quant_4x4_dc( h, dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
283 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
285 /* output samples to fdec */
286 h->dctf.idct4x4dc( dct4x4[0] );
287 x264_mb_dequant_4x4_dc( dct4x4[0], h->dequant4_mf[CQM_4IY], i_qscale ); /* XXX not inversed */
289 /* calculate dct coeffs */
290 for( i = 0; i < 16; i++ )
293 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
295 /* put pixels to fdec */
296 h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
299 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
303 for( ch = 0; ch < 2; ch++ )
305 const int i_stride = h->mb.pic.i_stride[1+ch];
306 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
307 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
308 int i_decimate_score = 0;
310 int16_t dct2x2[2][2];
311 int16_t dct4x4[4][4][4];
313 if( h->mb.b_lossless )
315 for( i = 0; i < 4; i++ )
317 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
318 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+o, p_dst+o, i_stride );
319 h->dct.chroma_dc[ch][i] = p_src[o] - p_dst[o];
325 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
326 /* calculate dct coeffs */
327 for( i = 0; i < 4; i++ )
330 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
332 /* no trellis; it doesn't seem to help chroma noticeably */
333 quant_4x4( h, dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
334 scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
335 x264_mb_dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
339 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
343 h->dctf.dct2x2dc( dct2x2 );
344 quant_2x2_dc( h, dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
345 scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
347 /* output samples to fdec */
348 h->dctf.idct2x2dc( dct2x2 );
349 x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); /* XXX not inversed */
351 if( b_inter && i_decimate_score < 7 )
353 /* Near null chroma 8x8 block so make it null (bits saving) */
354 memset( dct4x4, 0, sizeof( dct4x4 ) );
355 memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) );
358 /* calculate dct coeffs */
359 for( i = 0; i < 4; i++ )
362 dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
364 h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
368 static void x264_macroblock_encode_skip( x264_t *h )
371 h->mb.i_cbp_luma = 0x00;
372 h->mb.i_cbp_chroma = 0x00;
374 for( i = 0; i < 16+8; i++ )
376 h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
380 h->mb.cbp[h->mb.i_mb_xy] = 0;
383 /*****************************************************************************
384 * x264_macroblock_encode_pskip:
385 * Encode an already marked skip block
386 *****************************************************************************/
387 void x264_macroblock_encode_pskip( x264_t *h )
389 const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
390 h->mb.mv_min[0], h->mb.mv_max[0] );
391 const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
392 h->mb.mv_min[1], h->mb.mv_max[1] );
394 /* Motion compensation XXX probably unneeded */
395 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
396 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
400 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
401 h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1],
404 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
405 h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
408 x264_macroblock_encode_skip( h );
411 /*****************************************************************************
412 * x264_macroblock_encode:
413 *****************************************************************************/
414 void x264_macroblock_encode( x264_t *h )
417 int i_qp = h->mb.i_qp;
420 if( h->mb.i_type == P_SKIP )
423 x264_macroblock_encode_pskip( h );
426 if( h->mb.i_type == B_SKIP )
428 /* XXX motion compensation is probably unneeded */
430 x264_macroblock_encode_skip( h );
434 if( h->mb.i_type == I_16x16 )
436 const int i_mode = h->mb.i_intra16x16_pred_mode;
437 h->mb.b_transform_8x8 = 0;
438 /* do the right prediction */
439 h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
441 /* encode the 16x16 macroblock */
442 x264_mb_encode_i16x16( h, i_qp );
444 else if( h->mb.i_type == I_8x8 )
446 h->mb.b_transform_8x8 = 1;
447 for( i = 0; i < 4; i++ )
449 const int i_dst = h->mb.pic.i_stride[0];
450 uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
451 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
453 h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
454 x264_mb_encode_i8x8( h, i, i_qp );
457 else if( h->mb.i_type == I_4x4 )
459 h->mb.b_transform_8x8 = 0;
460 for( i = 0; i < 16; i++ )
462 const int i_dst = h->mb.pic.i_stride[0];
463 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
464 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
466 if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
467 /* emulate missing topright samples */
468 *(uint32_t*) &p_dst[4 - i_dst] = p_dst[3 - i_dst] * 0x01010101U;
470 h->predict_4x4[i_mode]( p_dst, i_dst );
471 x264_mb_encode_i4x4( h, i, i_qp );
477 int i_decimate_mb = 0;
479 /* Motion compensation */
482 if( h->mb.b_lossless )
484 for( i4x4 = 0; i4x4 < 16; i4x4++ )
486 int o = block_idx_x[i4x4]*4 + block_idx_y[i4x4]*4 * h->mb.pic.i_stride[0];
487 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4, h->mb.pic.p_fenc[0]+o, h->mb.pic.p_fdec[0]+o, h->mb.pic.i_stride[0] );
490 else if( h->mb.b_transform_8x8 )
492 int16_t dct8x8[4][8][8];
493 h->dctf.sub16x16_dct8( dct8x8,
494 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
495 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
497 for( idx = 0; idx < 4; idx++ )
499 if( h->mb.b_trellis )
500 x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 );
502 quant_8x8( h, dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
504 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
505 x264_mb_dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
507 if( !h->mb.b_trellis )
509 int i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
510 i_decimate_mb += i_decimate_8x8;
511 if( i_decimate_8x8 < 4 )
513 memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
514 memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
519 if( i_decimate_mb < 6 && !h->mb.b_trellis )
520 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
522 h->dctf.add16x16_idct8( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct8x8 );
526 int16_t dct4x4[16][4][4];
527 h->dctf.sub16x16_dct( dct4x4,
528 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
529 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
531 for( i8x8 = 0; i8x8 < 4; i8x8++ )
535 /* encode one 4x4 block */
537 for( i4x4 = 0; i4x4 < 4; i4x4++ )
539 idx = i8x8 * 4 + i4x4;
541 if( h->mb.b_trellis )
542 x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 );
544 quant_4x4( h, dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
546 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
547 x264_mb_dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
549 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
552 /* decimate this 8x8 block */
553 i_decimate_mb += i_decimate_8x8;
554 if( i_decimate_8x8 < 4 )
556 memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) );
557 memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) );
561 if( i_decimate_mb < 6 )
562 memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) );
564 h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
569 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
570 if( IS_INTRA( h->mb.i_type ) )
572 const int i_mode = h->mb.i_chroma_pred_mode;
573 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
574 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
577 /* encode the 8x8 blocks */
578 x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
580 /* Calculate the Luma/Chroma patern and non_zero_count */
581 h->mb.i_cbp_luma = 0x00;
582 if( h->mb.i_type == I_16x16 )
584 for( i = 0; i < 16; i++ )
586 const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
587 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
589 h->mb.i_cbp_luma = 0x0f;
592 else if( h->mb.b_transform_8x8 )
594 /* coded_block_flag is enough for CABAC.
595 * the full non_zero_count is done only in CAVLC. */
596 for( i = 0; i < 4; i++ )
598 const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
600 for( j = 0; j < 4; j++ )
601 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
603 h->mb.i_cbp_luma |= 1 << i;
608 for( i = 0; i < 16; i++ )
610 const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
611 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
613 h->mb.i_cbp_luma |= 1 << (i/4);
617 /* Calculate the chroma patern */
618 h->mb.i_cbp_chroma = 0x00;
619 for( i = 0; i < 8; i++ )
621 const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
622 h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
625 h->mb.i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
628 if( h->mb.i_cbp_chroma == 0x00 &&
629 ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
631 h->mb.i_cbp_chroma = 0x01; /* dc only */
634 if( h->param.b_cabac )
636 if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
641 if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
643 if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
648 h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
651 * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
652 * (if multiple mv give same result)*/
653 if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
654 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
655 h->mb.cache.ref[0][x264_scan8[0]] == 0 )
659 x264_mb_predict_mv_pskip( h, mvp );
660 if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
661 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
663 h->mb.i_type = P_SKIP;
667 /* Check for B_SKIP */
668 if( h->mb.i_type == B_DIRECT &&
669 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
671 h->mb.i_type = B_SKIP;
675 /*****************************************************************************
676 * x264_macroblock_probe_skip:
677 * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
679 *****************************************************************************/
680 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
682 DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
683 DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
684 DECLARE_ALIGNED( int, dctscan[16], 16 );
686 int i_qp = h->mb.i_qp;
696 x264_mb_predict_mv_pskip( h, mvp );
697 mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
698 mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
700 /* Motion compensation */
701 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
702 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
703 mvp[0], mvp[1], 16, 16 );
707 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
708 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
710 for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
712 /* encode one 4x4 block */
713 for( i4x4 = 0; i4x4 < 4; i4x4++ )
715 const int idx = i8x8 * 4 + i4x4;
717 quant_4x4( h, dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
718 scan_zigzag_4x4full( dctscan, dct4x4[idx] );
720 i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
722 if( i_decimate_mb >= 6 )
731 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
733 for( ch = 0; ch < 2; ch++ )
735 const int i_stride = h->mb.pic.i_stride[1+ch];
736 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
737 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
741 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
742 h->mb.pic.p_fdec[1+ch], i_stride,
743 mvp[0], mvp[1], 8, 8 );
746 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
748 /* calculate dct DC */
749 dct2x2[0][0] = dct4x4[0][0][0];
750 dct2x2[0][1] = dct4x4[1][0][0];
751 dct2x2[1][0] = dct4x4[2][0][0];
752 dct2x2[1][1] = dct4x4[3][0][0];
753 h->dctf.dct2x2dc( dct2x2 );
754 quant_2x2_dc( h, dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
755 if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
761 /* calculate dct coeffs */
762 for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
764 quant_4x4( h, dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
765 scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
767 i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
768 if( i_decimate_mb >= 7 )