1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
28 #include "common/common.h"
29 #include "macroblock.h"
32 /* def_quant4_mf only for probe_skip; actual encoding uses matrices from set.c */
33 /* FIXME this seems to make better decisions with cqm=jvt, but could screw up
34 * with general custom matrices. */
35 static const int def_quant4_mf[6][4][4] =
37 { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
38 { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
39 { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
40 { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
41 { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
42 { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
43 { { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 },
44 { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 } },
45 { { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 },
46 { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 } },
47 { { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 },
48 { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 } }
51 /****************************************************************************
52 * Scan and Quant functions
53 ****************************************************************************/
54 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
55 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
57 #define ZIG(i,y,x) level[i] = dct[y][x];
58 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
60 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
61 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
62 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
63 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
64 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
65 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
66 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
67 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
68 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
69 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
70 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
71 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
72 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
73 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
74 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
75 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
77 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
79 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
80 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
81 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
82 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
84 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
86 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
87 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
88 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
89 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
91 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
100 #define ZIG(i,y,x) {\
101 int o = x+y*i_stride;\
102 level[i] = p_src[o] - p_dst[o];\
103 p_dst[o] = p_src[o];\
105 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
107 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
108 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
109 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
110 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
112 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
114 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
115 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
116 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
117 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
121 static void quant_8x8( x264_t *h, int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
123 const int i_qbits = 16 + i_qscale / 6;
124 const int i_mf = i_qscale % 6;
125 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
126 h->quantf.quant_8x8_core( dct, quant_mf[i_mf], i_qbits, f );
128 static void quant_4x4( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
130 const int i_qbits = 15 + i_qscale / 6;
131 const int i_mf = i_qscale % 6;
132 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
133 h->quantf.quant_4x4_core( dct, quant_mf[i_mf], i_qbits, f );
135 static void quant_4x4_dc( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
137 const int i_qbits = 16 + i_qscale / 6;
138 const int i_mf = i_qscale % 6;
139 const int f = ( 1 << i_qbits ) / 3;
140 h->quantf.quant_4x4_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
142 static void quant_2x2_dc( x264_t *h, int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
144 const int i_qbits = 16 + i_qscale / 6;
145 const int i_mf = i_qscale % 6;
146 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
147 h->quantf.quant_2x2_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
151 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
152 * to 0 (low score means set it to null)
153 * Used in inter macroblock (luma and chroma)
154 * luma: for a 8x8 block: if score < 4 -> null
155 * for the complete mb: if score < 6 -> null
156 * chroma: for the complete mb: if score < 7 -> null
158 static int x264_mb_decimate_score( int *dct, int i_max )
160 static const int i_ds_table4[16] = {
161 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
162 static const int i_ds_table8[64] = {
163 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
164 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
165 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
166 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
168 const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
172 while( idx >= 0 && dct[idx] == 0 )
179 if( abs( dct[idx--] ) > 1 )
183 while( idx >= 0 && dct[idx] == 0 )
188 i_score += ds_table[i_run];
194 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
196 const int i_stride = h->mb.pic.i_stride[0];
197 const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
198 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
199 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
200 int16_t dct4x4[4][4];
202 if( h->mb.b_lossless )
204 sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst, i_stride );
208 h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
210 if( h->mb.b_noise_reduction )
211 x264_denoise_dct( h, (int16_t*)dct4x4 );
212 if( h->mb.b_trellis )
213 x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 );
215 quant_4x4( h, dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
217 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
218 h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
220 /* output samples to fdec */
221 h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
224 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
226 const int i_stride = h->mb.pic.i_stride[0];
227 const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
228 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
229 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
230 int16_t dct8x8[8][8];
232 h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
234 if( h->mb.b_noise_reduction )
235 x264_denoise_dct( h, (int16_t*)dct8x8 );
236 if( h->mb.b_trellis )
237 x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 );
239 quant_8x8( h, dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
241 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
242 h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
243 h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
246 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
248 const int i_stride = h->mb.pic.i_stride[0];
249 uint8_t *p_src = h->mb.pic.p_fenc[0];
250 uint8_t *p_dst = h->mb.pic.p_fdec[0];
252 int16_t dct4x4[16+1][4][4];
256 if( h->mb.b_lossless )
258 for( i = 0; i < 16; i++ )
260 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
261 sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+o, p_dst+o, i_stride );
262 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = p_src[o] - p_dst[o];
265 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
269 h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
270 for( i = 0; i < 16; i++ )
273 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
275 /* quant/scan/dequant */
276 if( h->mb.b_noise_reduction )
277 x264_denoise_dct( h, (int16_t*)dct4x4[i] );
278 if( h->mb.b_trellis )
279 x264_quant_4x4_trellis( h, dct4x4[1+i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 );
281 quant_4x4( h, dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
283 scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
284 h->quantf.dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
287 h->dctf.dct4x4dc( dct4x4[0] );
288 quant_4x4_dc( h, dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
289 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
291 /* output samples to fdec */
292 h->dctf.idct4x4dc( dct4x4[0] );
293 x264_mb_dequant_4x4_dc( dct4x4[0], h->dequant4_mf[CQM_4IY], i_qscale ); /* XXX not inversed */
295 /* calculate dct coeffs */
296 for( i = 0; i < 16; i++ )
299 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
301 /* put pixels to fdec */
302 h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
305 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
309 for( ch = 0; ch < 2; ch++ )
311 const int i_stride = h->mb.pic.i_stride[1+ch];
312 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
313 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
314 int i_decimate_score = 0;
316 int16_t dct2x2[2][2];
317 int16_t dct4x4[4][4][4];
319 if( h->mb.b_lossless )
321 for( i = 0; i < 4; i++ )
323 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
324 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+o, p_dst+o, i_stride );
325 h->dct.chroma_dc[ch][i] = p_src[o] - p_dst[o];
331 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
332 /* calculate dct coeffs */
333 for( i = 0; i < 4; i++ )
336 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
338 /* no trellis; it doesn't seem to help chroma noticeably */
339 quant_4x4( h, dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
340 scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
344 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
348 h->dctf.dct2x2dc( dct2x2 );
349 quant_2x2_dc( h, dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
350 scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
352 /* output samples to fdec */
353 h->dctf.idct2x2dc( dct2x2 );
354 x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); /* XXX not inversed */
356 if( b_inter && i_decimate_score < 7 )
358 /* Near null chroma 8x8 block so make it null (bits saving) */
359 memset( dct4x4, 0, sizeof( dct4x4 ) );
360 memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) );
364 for( i = 0; i < 4; i++ )
365 h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
368 /* calculate dct coeffs */
369 for( i = 0; i < 4; i++ )
372 dct4x4[i][0][0] = dct2x2[0][i];
374 h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
378 static void x264_macroblock_encode_skip( x264_t *h )
381 h->mb.i_cbp_luma = 0x00;
382 h->mb.i_cbp_chroma = 0x00;
384 for( i = 0; i < 16+8; i++ )
386 h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
390 h->mb.cbp[h->mb.i_mb_xy] = 0;
393 /*****************************************************************************
394 * x264_macroblock_encode_pskip:
395 * Encode an already marked skip block
396 *****************************************************************************/
397 void x264_macroblock_encode_pskip( x264_t *h )
399 const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
400 h->mb.mv_min[0], h->mb.mv_max[0] );
401 const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
402 h->mb.mv_min[1], h->mb.mv_max[1] );
404 /* Motion compensation XXX probably unneeded */
405 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
406 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
410 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
411 h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1],
414 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
415 h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
418 x264_macroblock_encode_skip( h );
421 /*****************************************************************************
422 * x264_macroblock_encode:
423 *****************************************************************************/
424 void x264_macroblock_encode( x264_t *h )
427 int i_qp = h->mb.i_qp;
430 if( h->mb.i_type == P_SKIP )
433 x264_macroblock_encode_pskip( h );
436 if( h->mb.i_type == B_SKIP )
438 /* XXX motion compensation is probably unneeded */
440 x264_macroblock_encode_skip( h );
444 if( h->mb.i_type == I_16x16 )
446 const int i_mode = h->mb.i_intra16x16_pred_mode;
447 h->mb.b_transform_8x8 = 0;
448 /* do the right prediction */
449 h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
451 /* encode the 16x16 macroblock */
452 x264_mb_encode_i16x16( h, i_qp );
454 else if( h->mb.i_type == I_8x8 )
456 h->mb.b_transform_8x8 = 1;
457 for( i = 0; i < 4; i++ )
459 const int i_dst = h->mb.pic.i_stride[0];
460 uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
461 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
463 h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
464 x264_mb_encode_i8x8( h, i, i_qp );
467 else if( h->mb.i_type == I_4x4 )
469 h->mb.b_transform_8x8 = 0;
470 for( i = 0; i < 16; i++ )
472 const int i_dst = h->mb.pic.i_stride[0];
473 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
474 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
476 if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
477 /* emulate missing topright samples */
478 *(uint32_t*) &p_dst[4 - i_dst] = p_dst[3 - i_dst] * 0x01010101U;
480 h->predict_4x4[i_mode]( p_dst, i_dst );
481 x264_mb_encode_i4x4( h, i, i_qp );
487 int i_decimate_mb = 0;
489 /* Motion compensation */
492 if( h->mb.b_lossless )
494 for( i4x4 = 0; i4x4 < 16; i4x4++ )
496 int o = block_idx_x[i4x4]*4 + block_idx_y[i4x4]*4 * h->mb.pic.i_stride[0];
497 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4, h->mb.pic.p_fenc[0]+o, h->mb.pic.p_fdec[0]+o, h->mb.pic.i_stride[0] );
500 else if( h->mb.b_transform_8x8 )
502 int16_t dct8x8[4][8][8];
503 int nnz8x8[4] = {1,1,1,1};
504 h->dctf.sub16x16_dct8( dct8x8,
505 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
506 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
508 for( idx = 0; idx < 4; idx++ )
510 if( h->mb.b_noise_reduction )
511 x264_denoise_dct( h, (int16_t*)dct8x8[idx] );
512 if( h->mb.b_trellis )
513 x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 );
515 quant_8x8( h, dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
517 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
519 if( !h->mb.b_trellis )
521 int i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
522 i_decimate_mb += i_decimate_8x8;
523 if( i_decimate_8x8 < 4 )
525 memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
526 memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
532 if( i_decimate_mb < 6 && !h->mb.b_trellis )
533 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
536 const int stride = h->mb.pic.i_stride[0];
537 for( idx = 0; idx < 4; idx++ )
540 h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
541 h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*stride], stride, dct8x8[idx] );
547 int16_t dct4x4[16][4][4];
548 int nnz8x8[4] = {1,1,1,1};
549 h->dctf.sub16x16_dct( dct4x4,
550 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
551 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
553 for( i8x8 = 0; i8x8 < 4; i8x8++ )
557 /* encode one 4x4 block */
559 for( i4x4 = 0; i4x4 < 4; i4x4++ )
561 idx = i8x8 * 4 + i4x4;
563 if( h->mb.b_noise_reduction )
564 x264_denoise_dct( h, (int16_t*)dct4x4[idx] );
565 if( h->mb.b_trellis )
566 x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 );
568 quant_4x4( h, dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
570 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
572 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
575 /* decimate this 8x8 block */
576 i_decimate_mb += i_decimate_8x8;
577 if( i_decimate_8x8 < 4 )
579 memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) );
580 memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) );
585 if( i_decimate_mb < 6 )
586 memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) );
589 const int stride = h->mb.pic.i_stride[0];
590 for( i8x8 = 0; i8x8 < 4; i8x8++ )
593 for( i = 0; i < 4; i++ )
594 h->quantf.dequant_4x4( dct4x4[i8x8*4+i], h->dequant4_mf[CQM_4PY], i_qp );
595 h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*stride], stride, &dct4x4[i8x8*4] );
602 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
603 if( IS_INTRA( h->mb.i_type ) )
605 const int i_mode = h->mb.i_chroma_pred_mode;
606 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
607 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
610 /* encode the 8x8 blocks */
611 x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
613 /* Calculate the Luma/Chroma patern and non_zero_count */
614 h->mb.i_cbp_luma = 0x00;
615 if( h->mb.i_type == I_16x16 )
617 for( i = 0; i < 16; i++ )
619 const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
620 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
622 h->mb.i_cbp_luma = 0x0f;
625 else if( h->mb.b_transform_8x8 )
627 /* coded_block_flag is enough for CABAC.
628 * the full non_zero_count is done only in CAVLC. */
629 for( i = 0; i < 4; i++ )
631 const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
633 for( j = 0; j < 4; j++ )
634 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
636 h->mb.i_cbp_luma |= 1 << i;
641 for( i = 0; i < 16; i++ )
643 const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
644 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
646 h->mb.i_cbp_luma |= 1 << (i/4);
650 /* Calculate the chroma patern */
651 h->mb.i_cbp_chroma = 0x00;
652 for( i = 0; i < 8; i++ )
654 const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
655 h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
658 h->mb.i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
661 if( h->mb.i_cbp_chroma == 0x00 && array_non_zero( h->dct.chroma_dc[0], 8 ) )
663 h->mb.i_cbp_chroma = 0x01; /* dc only */
666 if( h->param.b_cabac )
668 i_cbp_dc = ( h->mb.i_type == I_16x16 && array_non_zero( h->dct.luma16x16_dc, 16 ) )
669 | array_non_zero( h->dct.chroma_dc[0], 4 ) << 1
670 | array_non_zero( h->dct.chroma_dc[1], 4 ) << 2;
674 h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
677 * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
678 * (if multiple mv give same result)*/
679 if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
680 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
681 h->mb.cache.ref[0][x264_scan8[0]] == 0 )
685 x264_mb_predict_mv_pskip( h, mvp );
686 if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
687 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
689 h->mb.i_type = P_SKIP;
693 /* Check for B_SKIP */
694 if( h->mb.i_type == B_DIRECT &&
695 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
697 h->mb.i_type = B_SKIP;
701 /*****************************************************************************
702 * x264_macroblock_probe_skip:
703 * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
705 *****************************************************************************/
706 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
708 DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
709 DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
710 DECLARE_ALIGNED( int, dctscan[16], 16 );
712 int i_qp = h->mb.i_qp;
722 x264_mb_predict_mv_pskip( h, mvp );
723 mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
724 mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
726 /* Motion compensation */
727 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
728 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
729 mvp[0], mvp[1], 16, 16 );
733 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
734 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
736 for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
738 /* encode one 4x4 block */
739 for( i4x4 = 0; i4x4 < 4; i4x4++ )
741 const int idx = i8x8 * 4 + i4x4;
743 quant_4x4( h, dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
744 scan_zigzag_4x4full( dctscan, dct4x4[idx] );
746 i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
748 if( i_decimate_mb >= 6 )
757 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
759 for( ch = 0; ch < 2; ch++ )
761 const int i_stride = h->mb.pic.i_stride[1+ch];
762 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
763 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
767 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
768 h->mb.pic.p_fdec[1+ch], i_stride,
769 mvp[0], mvp[1], 8, 8 );
772 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
774 /* calculate dct DC */
775 dct2x2[0][0] = dct4x4[0][0][0];
776 dct2x2[0][1] = dct4x4[1][0][0];
777 dct2x2[1][0] = dct4x4[2][0][0];
778 dct2x2[1][1] = dct4x4[3][0][0];
779 h->dctf.dct2x2dc( dct2x2 );
780 quant_2x2_dc( h, dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
781 if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
787 /* calculate dct coeffs */
788 for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
790 quant_4x4( h, dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
791 scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
793 i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
794 if( i_decimate_mb >= 7 )
804 /****************************************************************************
805 * DCT-domain noise reduction / adaptive deadzone
807 ****************************************************************************/
809 void x264_noise_reduction_update( x264_t *h )
812 for( cat = 0; cat < 4; cat++ )
814 int b_8x8 = cat >= 2;
815 int size = b_8x8 ? 64 : 16;
816 const int *weight = b_8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
818 if( h->nr_count[cat] > (b_8x8 ? (1<<16) : (1<<18)) )
820 for( i = 0; i < size; i++ )
821 h->nr_residual_sum[cat][i] >>= 1;
822 h->nr_count[cat] >>= 1;
825 for( i = 0; i < size; i++ )
826 h->nr_offset[cat][i] =
827 ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
828 + h->nr_residual_sum[cat][i]/2)
829 / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
833 void x264_denoise_dct( x264_t *h, int16_t *dct )
835 const int cat = !IS_INTRA(h->mb.i_type) + 2*h->mb.b_transform_8x8;
840 for( i = (cat >= 2 ? 63 : 15); i >= 1; i-- )
847 h->nr_residual_sum[cat][i] += level;
848 level -= h->nr_offset[cat][i];
854 h->nr_residual_sum[cat][i] -= level;
855 level += h->nr_offset[cat][i];