1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
27 #include "common/common.h"
28 #include "macroblock.h"
31 /* def_quant4_mf only for probe_skip; actual encoding uses matrices from set.c */
32 /* FIXME this seems to make better decisions with cqm=jvt, but could screw up
33 * with general custom matrices. */
34 static const int def_quant4_mf[6][4][4] =
36 { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
37 { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
38 { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
39 { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
40 { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
41 { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
42 { { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 },
43 { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 } },
44 { { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 },
45 { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 } },
46 { { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 },
47 { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 } }
50 /****************************************************************************
51 * Scan and Quant functions
52 ****************************************************************************/
54 #define ZIG(i,y,x) level[i] = dct[x][y];
55 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
57 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
58 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
59 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
60 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
61 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
62 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
63 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
64 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
65 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
66 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
67 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
68 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
69 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
70 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
71 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
72 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
74 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
76 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
77 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
78 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
79 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
81 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
83 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
84 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
85 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
86 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
88 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
98 int oe = x+y*FENC_STRIDE;\
99 int od = x+y*FDEC_STRIDE;\
100 level[i] = p_src[oe] - p_dst[od];\
101 p_dst[od] = p_src[oe];\
103 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst )
105 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
106 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
107 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
108 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
110 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst )
112 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
113 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
114 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
115 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
119 static void quant_8x8( x264_t *h, int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
121 const int i_qbits = 16 + i_qscale / 6;
122 const int i_mf = i_qscale % 6;
123 const int f = ( 1 << (i_qbits + b_intra) ) / 6;
124 h->quantf.quant_8x8_core( dct, quant_mf[i_mf], i_qbits, f );
126 static void quant_4x4( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
128 const int i_qbits = 15 + i_qscale / 6;
129 const int i_mf = i_qscale % 6;
130 const int f = ( 1 << (i_qbits + b_intra) ) / 6;
131 h->quantf.quant_4x4_core( dct, quant_mf[i_mf], i_qbits, f );
133 static void quant_4x4_dc( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
135 const int i_qbits = 16 + i_qscale / 6;
136 const int i_mf = i_qscale % 6;
137 const int f = ( 1 << i_qbits ) / 3;
138 h->quantf.quant_4x4_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
140 static void quant_2x2_dc( x264_t *h, int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
142 const int i_qbits = 16 + i_qscale / 6;
143 const int i_mf = i_qscale % 6;
144 const int f = ( 1 << (i_qbits + b_intra) ) / 6;
145 h->quantf.quant_2x2_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
149 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
150 * to 0 (low score means set it to null)
151 * Used in inter macroblock (luma and chroma)
152 * luma: for a 8x8 block: if score < 4 -> null
153 * for the complete mb: if score < 6 -> null
154 * chroma: for the complete mb: if score < 7 -> null
156 static int x264_mb_decimate_score( int *dct, int i_max )
158 static const int i_ds_table4[16] = {
159 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
160 static const int i_ds_table8[64] = {
161 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
162 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
163 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
164 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
166 const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
170 while( idx >= 0 && dct[idx] == 0 )
177 if( abs( dct[idx--] ) > 1 )
181 while( idx >= 0 && dct[idx] == 0 )
186 i_score += ds_table[i_run];
192 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
194 int x = 4 * block_idx_x[idx];
195 int y = 4 * block_idx_y[idx];
196 uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
197 uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
198 int16_t dct4x4[4][4];
200 if( h->mb.b_lossless )
202 sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst );
206 h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
208 if( h->mb.b_trellis )
209 x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 );
211 quant_4x4( h, dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
213 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
214 h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
216 /* output samples to fdec */
217 h->dctf.add4x4_idct( p_dst, dct4x4 );
220 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
223 int y = 8 * (idx>>1);
224 uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
225 uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
226 int16_t dct8x8[8][8];
228 h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
230 if( h->mb.b_trellis )
231 x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 );
233 quant_8x8( h, dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
235 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
236 h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
237 h->dctf.add8x8_idct8( p_dst, dct8x8 );
240 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
242 uint8_t *p_src = h->mb.pic.p_fenc[0];
243 uint8_t *p_dst = h->mb.pic.p_fdec[0];
245 int16_t dct4x4[16+1][4][4];
249 if( h->mb.b_lossless )
251 for( i = 0; i < 16; i++ )
253 int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
254 int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
255 sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+oe, p_dst+od );
256 dct4x4[0][block_idx_x[i]][block_idx_y[i]] = p_src[oe] - p_dst[od];
257 p_dst[od] = p_src[oe];
259 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
263 h->dctf.sub16x16_dct( &dct4x4[1], p_src, p_dst );
264 for( i = 0; i < 16; i++ )
267 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
269 /* quant/scan/dequant */
270 if( h->mb.b_trellis )
271 x264_quant_4x4_trellis( h, dct4x4[1+i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 );
273 quant_4x4( h, dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
275 scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
276 h->quantf.dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
279 h->dctf.dct4x4dc( dct4x4[0] );
280 quant_4x4_dc( h, dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
281 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
283 /* output samples to fdec */
284 h->dctf.idct4x4dc( dct4x4[0] );
285 x264_mb_dequant_4x4_dc( dct4x4[0], h->dequant4_mf[CQM_4IY], i_qscale ); /* XXX not inversed */
287 /* calculate dct coeffs */
288 for( i = 0; i < 16; i++ )
291 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
293 /* put pixels to fdec */
294 h->dctf.add16x16_idct( p_dst, &dct4x4[1] );
297 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
300 int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
302 for( ch = 0; ch < 2; ch++ )
304 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
305 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
306 int i_decimate_score = 0;
308 int16_t dct2x2[2][2];
309 int16_t dct4x4[4][4][4];
311 if( h->mb.b_lossless )
313 for( i = 0; i < 4; i++ )
315 int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
316 int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
317 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+oe, p_dst+od );
318 h->dct.chroma_dc[ch][i] = p_src[oe] - p_dst[od];
319 p_dst[od] = p_src[oe];
324 h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
325 /* calculate dct coeffs */
326 for( i = 0; i < 4; i++ )
329 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
331 /* no trellis; it doesn't seem to help chroma noticeably */
332 quant_4x4( h, dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
333 scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
337 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
341 h->dctf.dct2x2dc( dct2x2 );
342 quant_2x2_dc( h, dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
343 scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
345 /* output samples to fdec */
346 h->dctf.idct2x2dc( dct2x2 );
347 x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); /* XXX not inversed */
349 if( b_decimate && i_decimate_score < 7 )
351 /* Near null chroma 8x8 block so make it null (bits saving) */
352 memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) );
353 if( !array_non_zero( (int*)dct2x2, sizeof(dct2x2)/sizeof(int) ) )
355 memset( dct4x4, 0, sizeof( dct4x4 ) );
359 for( i = 0; i < 4; i++ )
360 h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
363 for( i = 0; i < 4; i++ )
364 dct4x4[i][0][0] = dct2x2[0][i];
365 h->dctf.add8x8_idct( p_dst, dct4x4 );
369 static void x264_macroblock_encode_skip( x264_t *h )
372 h->mb.i_cbp_luma = 0x00;
373 h->mb.i_cbp_chroma = 0x00;
375 for( i = 0; i < 16+8; i++ )
377 h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
381 h->mb.cbp[h->mb.i_mb_xy] = 0;
384 /*****************************************************************************
385 * x264_macroblock_encode_pskip:
386 * Encode an already marked skip block
387 *****************************************************************************/
388 void x264_macroblock_encode_pskip( x264_t *h )
390 const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
391 h->mb.mv_min[0], h->mb.mv_max[0] );
392 const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
393 h->mb.mv_min[1], h->mb.mv_max[1] );
395 /* Motion compensation XXX probably unneeded */
396 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
397 h->mb.pic.p_fdec[0], FDEC_STRIDE,
401 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
402 h->mb.pic.p_fdec[1], FDEC_STRIDE,
405 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
406 h->mb.pic.p_fdec[2], FDEC_STRIDE,
409 x264_macroblock_encode_skip( h );
412 /*****************************************************************************
413 * x264_macroblock_encode:
414 *****************************************************************************/
415 void x264_macroblock_encode( x264_t *h )
418 int i_qp = h->mb.i_qp;
419 int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
422 if( h->mb.i_type == P_SKIP )
425 x264_macroblock_encode_pskip( h );
428 if( h->mb.i_type == B_SKIP )
430 /* XXX motion compensation is probably unneeded */
432 x264_macroblock_encode_skip( h );
436 if( h->mb.i_type == I_16x16 )
438 const int i_mode = h->mb.i_intra16x16_pred_mode;
439 h->mb.b_transform_8x8 = 0;
440 /* do the right prediction */
441 h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
443 /* encode the 16x16 macroblock */
444 x264_mb_encode_i16x16( h, i_qp );
446 else if( h->mb.i_type == I_8x8 )
448 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
449 h->mb.b_transform_8x8 = 1;
450 for( i = 0; i < 4; i++ )
452 uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
453 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
455 x264_predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
456 h->predict_8x8[i_mode]( p_dst, edge );
457 x264_mb_encode_i8x8( h, i, i_qp );
460 else if( h->mb.i_type == I_4x4 )
462 h->mb.b_transform_8x8 = 0;
463 for( i = 0; i < 16; i++ )
465 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * FDEC_STRIDE];
466 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
468 if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
469 /* emulate missing topright samples */
470 *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
472 h->predict_4x4[i_mode]( p_dst );
473 x264_mb_encode_i4x4( h, i, i_qp );
479 int i_decimate_mb = 0;
481 /* Motion compensation */
484 if( h->mb.b_lossless )
486 for( i4x4 = 0; i4x4 < 16; i4x4++ )
488 int x = 4*block_idx_x[i4x4];
489 int y = 4*block_idx_y[i4x4];
490 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4,
491 h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
492 h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
495 else if( h->mb.b_transform_8x8 )
497 int16_t dct8x8[4][8][8];
498 int nnz8x8[4] = {1,1,1,1};
499 b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
500 h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
502 for( idx = 0; idx < 4; idx++ )
504 if( h->mb.b_noise_reduction )
505 x264_denoise_dct( h, (int16_t*)dct8x8[idx] );
506 if( h->mb.b_trellis )
507 x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 );
509 quant_8x8( h, dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
511 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
515 int i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
516 i_decimate_mb += i_decimate_8x8;
517 if( i_decimate_8x8 < 4 )
519 memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
520 memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
525 nnz8x8[idx] = array_non_zero( (int*)dct8x8[idx], sizeof(*dct8x8)/sizeof(int) );
528 if( i_decimate_mb < 6 && b_decimate )
529 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
532 for( idx = 0; idx < 4; idx++ )
535 h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
536 h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*FDEC_STRIDE], dct8x8[idx] );
542 int16_t dct4x4[16][4][4];
543 int nnz8x8[4] = {1,1,1,1};
544 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
546 for( i8x8 = 0; i8x8 < 4; i8x8++ )
550 /* encode one 4x4 block */
552 for( i4x4 = 0; i4x4 < 4; i4x4++ )
554 idx = i8x8 * 4 + i4x4;
556 if( h->mb.b_noise_reduction )
557 x264_denoise_dct( h, (int16_t*)dct4x4[idx] );
558 if( h->mb.b_trellis )
559 x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 );
561 quant_4x4( h, dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
563 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
566 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
569 /* decimate this 8x8 block */
570 i_decimate_mb += i_decimate_8x8;
571 if( i_decimate_8x8 < 4 && b_decimate )
573 memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) );
574 memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) );
579 if( i_decimate_mb < 6 && b_decimate )
580 memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) );
583 for( i8x8 = 0; i8x8 < 4; i8x8++ )
586 for( i = 0; i < 4; i++ )
587 h->quantf.dequant_4x4( dct4x4[i8x8*4+i], h->dequant4_mf[CQM_4PY], i_qp );
588 h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
595 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
596 if( IS_INTRA( h->mb.i_type ) )
598 const int i_mode = h->mb.i_chroma_pred_mode;
599 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
600 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
603 /* encode the 8x8 blocks */
604 x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
606 /* Calculate the Luma/Chroma pattern and non_zero_count */
607 h->mb.i_cbp_luma = 0x00;
608 if( h->mb.i_type == I_16x16 )
610 for( i = 0; i < 16; i++ )
612 const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
613 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
615 h->mb.i_cbp_luma = 0x0f;
618 else if( h->mb.b_transform_8x8 )
620 /* coded_block_flag is enough for CABAC.
621 * the full non_zero_count is done only in CAVLC. */
622 for( i = 0; i < 4; i++ )
624 const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
626 for( j = 0; j < 4; j++ )
627 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
629 h->mb.i_cbp_luma |= 1 << i;
634 for( i = 0; i < 16; i++ )
636 const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
637 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
639 h->mb.i_cbp_luma |= 1 << (i/4);
643 /* Calculate the chroma pattern */
644 h->mb.i_cbp_chroma = 0x00;
645 for( i = 0; i < 8; i++ )
647 const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
648 h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
651 h->mb.i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
654 if( h->mb.i_cbp_chroma == 0x00 && array_non_zero( h->dct.chroma_dc[0], 8 ) )
656 h->mb.i_cbp_chroma = 0x01; /* dc only */
659 if( h->param.b_cabac )
661 i_cbp_dc = ( h->mb.i_type == I_16x16 && array_non_zero( h->dct.luma16x16_dc, 16 ) )
662 | array_non_zero( h->dct.chroma_dc[0], 4 ) << 1
663 | array_non_zero( h->dct.chroma_dc[1], 4 ) << 2;
667 h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
670 * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
671 * (if multiple mv give same result)*/
672 if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
673 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
674 h->mb.cache.ref[0][x264_scan8[0]] == 0 )
678 x264_mb_predict_mv_pskip( h, mvp );
679 if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
680 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
682 h->mb.i_type = P_SKIP;
686 /* Check for B_SKIP */
687 if( h->mb.i_type == B_DIRECT &&
688 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
690 h->mb.i_type = B_SKIP;
694 /*****************************************************************************
695 * x264_macroblock_probe_skip:
696 * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
698 *****************************************************************************/
699 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
701 DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
702 DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
703 DECLARE_ALIGNED( int, dctscan[16], 16 );
705 int i_qp = h->mb.i_qp;
715 x264_mb_predict_mv_pskip( h, mvp );
716 mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
717 mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
719 /* Motion compensation */
720 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
721 h->mb.pic.p_fdec[0], FDEC_STRIDE,
722 mvp[0], mvp[1], 16, 16 );
726 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0],
727 h->mb.pic.p_fdec[0] );
729 for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
731 /* encode one 4x4 block */
732 for( i4x4 = 0; i4x4 < 4; i4x4++ )
734 const int idx = i8x8 * 4 + i4x4;
736 quant_4x4( h, dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
737 scan_zigzag_4x4full( dctscan, dct4x4[idx] );
739 i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
741 if( i_decimate_mb >= 6 )
750 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
752 for( ch = 0; ch < 2; ch++ )
754 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
755 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
759 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
760 h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
761 mvp[0], mvp[1], 8, 8 );
764 h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
766 /* calculate dct DC */
767 dct2x2[0][0] = dct4x4[0][0][0];
768 dct2x2[0][1] = dct4x4[1][0][0];
769 dct2x2[1][0] = dct4x4[2][0][0];
770 dct2x2[1][1] = dct4x4[3][0][0];
771 h->dctf.dct2x2dc( dct2x2 );
772 quant_2x2_dc( h, dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
773 if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
779 /* calculate dct coeffs */
780 for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
782 quant_4x4( h, dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
783 scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
785 i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
786 if( i_decimate_mb >= 7 )
796 /****************************************************************************
797 * DCT-domain noise reduction / adaptive deadzone
799 ****************************************************************************/
801 void x264_noise_reduction_update( x264_t *h )
804 for( cat = 0; cat < 2; cat++ )
806 int size = cat ? 64 : 16;
807 const int *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
809 if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
811 for( i = 0; i < size; i++ )
812 h->nr_residual_sum[cat][i] >>= 1;
813 h->nr_count[cat] >>= 1;
816 for( i = 0; i < size; i++ )
817 h->nr_offset[cat][i] =
818 ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
819 + h->nr_residual_sum[cat][i]/2)
820 / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
824 void x264_denoise_dct( x264_t *h, int16_t *dct )
826 const int cat = h->mb.b_transform_8x8;
831 for( i = (cat ? 63 : 15); i >= 1; i-- )
838 h->nr_residual_sum[cat][i] += level;
839 level -= h->nr_offset[cat][i];
845 h->nr_residual_sum[cat][i] -= level;
846 level += h->nr_offset[cat][i];
855 /*****************************************************************************
856 * RD only; 4 calls to this do not make up for one macroblock_encode.
857 * doesn't transform chroma dc.
858 *****************************************************************************/
859 void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
861 int i_qp = h->mb.i_qp;
862 uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
863 uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
864 int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
868 x264_mb_mc_8x8( h, i8 );
870 if( h->mb.b_transform_8x8 )
872 int16_t dct8x8[8][8];
873 h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
874 quant_8x8( h, dct8x8, h->quant8_mf[CQM_8PY], i_qp, 0 );
875 scan_zigzag_8x8full( h->dct.luma8x8[i8], dct8x8 );
878 nnz8x8 = 4 <= x264_mb_decimate_score( h->dct.luma8x8[i8], 64 );
880 nnz8x8 = array_non_zero( (int*)dct8x8, sizeof(dct8x8)/sizeof(int) );
884 h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp );
885 h->dctf.add8x8_idct8( p_fdec, dct8x8 );
891 int16_t dct4x4[4][4][4];
892 h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
893 quant_4x4( h, dct4x4[0], h->quant4_mf[CQM_4PY], i_qp, 0 );
894 quant_4x4( h, dct4x4[1], h->quant4_mf[CQM_4PY], i_qp, 0 );
895 quant_4x4( h, dct4x4[2], h->quant4_mf[CQM_4PY], i_qp, 0 );
896 quant_4x4( h, dct4x4[3], h->quant4_mf[CQM_4PY], i_qp, 0 );
897 for( i4 = 0; i4 < 4; i4++ )
898 scan_zigzag_4x4full( h->dct.block[i8*4+i4].luma4x4, dct4x4[i4] );
902 int i_decimate_8x8 = 0;
903 for( i4 = 0; i4 < 4 && i_decimate_8x8 < 4; i4++ )
904 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[i8*4+i4].luma4x4, 16 );
905 nnz8x8 = 4 <= i_decimate_8x8;
908 nnz8x8 = array_non_zero( (int*)dct4x4, sizeof(dct4x4)/sizeof(int) );
912 for( i4 = 0; i4 < 4; i4++ )
913 h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp );
914 h->dctf.add8x8_idct( p_fdec, dct4x4 );
918 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
920 for( ch = 0; ch < 2; ch++ )
922 int16_t dct4x4[4][4];
923 p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
924 p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
926 h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
927 quant_4x4( h, dct4x4, h->quant4_mf[CQM_4PC], i_qp, 0 );
928 scan_zigzag_4x4( h->dct.block[16+i8+ch*4].residual_ac, dct4x4 );
929 if( array_non_zero( (int*)dct4x4, sizeof(dct4x4)/sizeof(int) ) )
931 h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
932 h->dctf.add4x4_idct( p_fdec, dct4x4 );
937 h->mb.i_cbp_luma |= (1 << i8);
939 h->mb.i_cbp_luma &= ~(1 << i8);
940 h->mb.i_cbp_chroma = 0x02;