1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
28 #include "common/common.h"
29 #include "macroblock.h"
32 static const uint8_t block_idx_x[16] =
34 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
36 static const uint8_t block_idx_y[16] =
38 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
40 static const uint8_t block_idx_xy[4][4] =
48 /* def_quant4_mf only for probe_skip; actual encoding uses matrices from set.c */
49 /* FIXME this seems to make better decisions with cqm=jvt, but could screw up
50 * with general custom matrices. */
51 static const int def_quant4_mf[6][4][4] =
53 { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
54 { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
55 { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
56 { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
57 { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
58 { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
59 { { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 },
60 { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 } },
61 { { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 },
62 { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 } },
63 { { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 },
64 { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 } }
67 static const int i_chroma_qp_table[52] =
69 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
70 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
71 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
72 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
73 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
77 /****************************************************************************
78 * Scan and Quant functions
79 ****************************************************************************/
80 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
81 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
83 #define ZIG(i,y,x) level[i] = dct[y][x];
84 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
86 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
87 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
88 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
89 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
90 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
91 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
92 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
93 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
94 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
95 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
96 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
97 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
98 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
99 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
100 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
101 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
103 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
105 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
106 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
107 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
108 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
110 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
112 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
113 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
114 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
115 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
117 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
126 #define ZIG(i,y,x) {\
127 int o = x+y*i_stride;\
128 level[i] = p_src[o] - p_dst[o];\
129 p_dst[o] = p_src[o];\
131 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
133 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
134 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
135 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
136 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
138 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
140 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
141 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
142 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
143 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
147 static void quant_8x8( int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
149 const int i_qbits = 16 + i_qscale / 6;
150 const int i_mf = i_qscale % 6;
151 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
154 for( y = 0; y < 8; y++ )
156 for( x = 0; x < 8; x++ )
159 dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
161 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
165 static void quant_4x4( int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
167 const int i_qbits = 15 + i_qscale / 6;
168 const int i_mf = i_qscale % 6;
169 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
172 for( y = 0; y < 4; y++ )
174 for( x = 0; x < 4; x++ )
177 dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
179 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
183 static void quant_4x4_dc( int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
185 const int i_qbits = 15 + i_qscale / 6;
186 const int f2 = ( 2 << i_qbits ) / 3;
187 const int i_qmf = quant_mf[i_qscale%6][0][0];
190 for( y = 0; y < 4; y++ )
192 for( x = 0; x < 4; x++ )
195 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
197 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
201 static void quant_2x2_dc( int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
203 int const i_qbits = 15 + i_qscale / 6;
204 const int f2 = ( 2 << i_qbits ) / ( b_intra ? 3 : 6 );
205 const int i_qmf = quant_mf[i_qscale%6][0][0];
208 for( y = 0; y < 2; y++ )
210 for( x = 0; x < 2; x++ )
213 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
215 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
221 static const int f_deadzone_intra[4][4][2] = /* [num][den] */
223 { {1,2}, {3,7}, {2,5}, {1,3} },
224 { {3,7}, {2,5}, {1,3}, {1,4} },
225 { {2,5}, {1,3}, {1,4}, {1,5} },
226 { {1,3}, {1,4}, {1,5}, {1,5} }
228 static const int f_deadzone_inter[4][4][2] = /* [num][den] */
230 { {1,3}, {2,7}, {4,15},{2,9} },
231 { {2,7}, {4,15},{2,9}, {1,6} },
232 { {4,15},{2,9}, {1,6}, {1,7} },
233 { {2,9}, {1,6}, {1,7}, {2,15} }
237 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
239 const int(*f_deadzone)[4][4][2] = b_intra ? &f_deadzone_intra : &f_deadzone_inter;
240 const int i_qbits = 15 + i_qscale / 6;
241 const int i_mf = i_qscale % 6;
244 for( y = 0; y < 4; y++ )
246 for( x = 0; x < 4; x++ )
249 const int f = b_intra ?
250 (f_deadzone_intra[y][x][0] * ( 1 << i_qbits ) / f_deadzone_intra[y][x][1])
252 (f_deadzone_inter[y][x][0] * ( 1 << i_qbits ) / f_deadzone_inter[y][x][1]);
254 const int f = (*f_deadzone)[y][x][0] * ( 1 << i_qbits ) / (*f_deadzone)[y][x][1];
259 dct[y][x] =( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
263 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
269 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
271 const int i_qbits = 15 + i_qscale / 6;
272 const int i_qmf = quant_mf[i_qscale%6][0][0];
273 const int f2 = f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1];
276 for( y = 0; y < 4; y++ )
278 for( x = 0; x < 4; x++ )
283 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
287 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
293 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
295 int const i_qbits = 15 + i_qscale / 6;
296 const int i_qmf = quant_mf[i_qscale%6][0][0];
297 const int f2 = b_intra ?
298 (f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1])
300 (f_deadzone_inter[0][0][0] * ( 2 << i_qbits ) / f_deadzone_inter[0][0][1]);
302 for( y = 0; y < 2; y++ )
304 for( x = 0; x < 2; x++ )
308 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
312 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
322 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
323 * to 0 (low score means set it to null)
324 * Used in inter macroblock (luma and chroma)
325 * luma: for a 8x8 block: if score < 4 -> null
326 * for the complete mb: if score < 6 -> null
327 * chroma: for the complete mb: if score < 7 -> null
329 static int x264_mb_decimate_score( int *dct, int i_max )
331 static const int i_ds_table4[16] = {
332 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
333 static const int i_ds_table8[64] = {
334 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
335 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
336 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
337 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
339 const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
343 while( idx >= 0 && dct[idx] == 0 )
350 if( abs( dct[idx--] ) > 1 )
354 while( idx >= 0 && dct[idx] == 0 )
359 i_score += ds_table[i_run];
365 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
367 const int i_stride = h->mb.pic.i_stride[0];
368 const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
369 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
370 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
371 int16_t dct4x4[4][4];
373 if( h->mb.b_lossless )
375 sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst, i_stride );
379 h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
380 quant_4x4( dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
381 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
382 x264_mb_dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
384 /* output samples to fdec */
385 h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
388 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
390 const int i_stride = h->mb.pic.i_stride[0];
391 const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
392 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
393 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
394 int16_t dct8x8[8][8];
396 h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
397 quant_8x8( dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
398 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
399 x264_mb_dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
400 h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
403 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
405 const int i_stride = h->mb.pic.i_stride[0];
406 uint8_t *p_src = h->mb.pic.p_fenc[0];
407 uint8_t *p_dst = h->mb.pic.p_fdec[0];
409 int16_t dct4x4[16+1][4][4];
413 if( h->mb.b_lossless )
415 for( i = 0; i < 16; i++ )
417 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
418 sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+o, p_dst+o, i_stride );
419 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = p_src[o] - p_dst[o];
422 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
426 h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
427 for( i = 0; i < 16; i++ )
430 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
432 /* quant/scan/dequant */
433 quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
434 scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
435 x264_mb_dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
438 h->dctf.dct4x4dc( dct4x4[0] );
439 quant_4x4_dc( dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
440 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
442 /* output samples to fdec */
443 h->dctf.idct4x4dc( dct4x4[0] );
444 x264_mb_dequant_4x4_dc( dct4x4[0], h->dequant4_mf[CQM_4IY], i_qscale ); /* XXX not inversed */
446 /* calculate dct coeffs */
447 for( i = 0; i < 16; i++ )
450 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
452 /* put pixels to fdec */
453 h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
456 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
460 for( ch = 0; ch < 2; ch++ )
462 const int i_stride = h->mb.pic.i_stride[1+ch];
463 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
464 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
465 int i_decimate_score = 0;
467 int16_t dct2x2[2][2];
468 int16_t dct4x4[4][4][4];
470 if( h->mb.b_lossless )
472 for( i = 0; i < 4; i++ )
474 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
475 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+o, p_dst+o, i_stride );
476 h->dct.chroma_dc[ch][i] = p_src[o] - p_dst[o];
482 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
483 /* calculate dct coeffs */
484 for( i = 0; i < 4; i++ )
487 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
489 quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, b_inter ? 0 : 1 );
490 scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
491 x264_mb_dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
495 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
499 h->dctf.dct2x2dc( dct2x2 );
500 quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, b_inter ? 0 : 1 );
501 scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
503 /* output samples to fdec */
504 h->dctf.idct2x2dc( dct2x2 );
505 x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); /* XXX not inversed */
507 if( b_inter && i_decimate_score < 7 )
509 /* Near null chroma 8x8 block so make it null (bits saving) */
510 for( i = 0; i < 4; i++ )
513 for( x = 0; x < 15; x++ )
515 h->dct.block[16+i+ch*4].residual_ac[x] = 0;
517 for( x = 0; x < 4; x++ )
519 for( y = 0; y < 4; y++ )
527 /* calculate dct coeffs */
528 for( i = 0; i < 4; i++ )
531 dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
533 h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
537 static void x264_macroblock_encode_skip( x264_t *h )
540 h->mb.i_cbp_luma = 0x00;
541 h->mb.i_cbp_chroma = 0x00;
543 for( i = 0; i < 16+8; i++ )
545 h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
549 h->mb.cbp[h->mb.i_mb_xy] = 0;
552 /*****************************************************************************
553 * x264_macroblock_encode_pskip:
554 * Encode an already marked skip block
555 *****************************************************************************/
556 void x264_macroblock_encode_pskip( x264_t *h )
558 const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
559 h->mb.mv_min[0], h->mb.mv_max[0] );
560 const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
561 h->mb.mv_min[1], h->mb.mv_max[1] );
563 /* Motion compensation XXX probably unneeded */
564 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
565 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
569 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
570 h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1],
573 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
574 h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
577 x264_macroblock_encode_skip( h );
580 /*****************************************************************************
581 * x264_macroblock_encode:
582 *****************************************************************************/
583 void x264_macroblock_encode( x264_t *h )
586 int i_qp = h->mb.i_qp;
589 if( h->mb.i_type == P_SKIP )
592 x264_macroblock_encode_pskip( h );
595 if( h->mb.i_type == B_SKIP )
597 /* XXX motion compensation is probably unneeded */
599 x264_macroblock_encode_skip( h );
603 if( h->mb.i_type == I_16x16 )
605 const int i_mode = h->mb.i_intra16x16_pred_mode;
606 h->mb.b_transform_8x8 = 0;
607 /* do the right prediction */
608 h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
610 /* encode the 16x16 macroblock */
611 x264_mb_encode_i16x16( h, i_qp );
613 /* fix the pred mode value */
614 h->mb.i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix[i_mode];
616 else if( h->mb.i_type == I_8x8 )
618 h->mb.b_transform_8x8 = 1;
619 for( i = 0; i < 4; i++ )
621 const int i_dst = h->mb.pic.i_stride[0];
622 uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
623 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
625 h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
626 x264_mb_encode_i8x8( h, i, i_qp );
627 h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]] = x264_mb_pred_mode4x4_fix(i_mode);
630 else if( h->mb.i_type == I_4x4 )
632 h->mb.b_transform_8x8 = 0;
633 for( i = 0; i < 16; i++ )
635 const int i_dst = h->mb.pic.i_stride[0];
636 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
637 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
639 h->predict_4x4[i_mode]( p_dst, i_dst );
640 x264_mb_encode_i4x4( h, i, i_qp );
641 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix(i_mode);
647 int i_decimate_mb = 0;
649 /* Motion compensation */
652 if( h->mb.b_lossless )
654 for( i4x4 = 0; i4x4 < 16; i4x4++ )
656 int o = block_idx_x[i4x4]*4 + block_idx_y[i4x4]*4 * h->mb.pic.i_stride[0];
657 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4, h->mb.pic.p_fenc[0]+o, h->mb.pic.p_fdec[0]+o, h->mb.pic.i_stride[0] );
660 else if( h->mb.b_transform_8x8 )
662 int16_t dct8x8[4][8][8];
663 h->dctf.sub16x16_dct8( dct8x8,
664 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
665 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
667 for( idx = 0; idx < 4; idx++ )
671 quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
672 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
673 x264_mb_dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
675 i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
676 i_decimate_mb += i_decimate_8x8;
677 if( i_decimate_8x8 < 4 )
679 memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
680 memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
684 if( i_decimate_mb < 6 )
685 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
687 h->dctf.add16x16_idct8( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct8x8 );
691 int16_t dct4x4[16][4][4];
692 h->dctf.sub16x16_dct( dct4x4,
693 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
694 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
696 for( i8x8 = 0; i8x8 < 4; i8x8++ )
700 /* encode one 4x4 block */
702 for( i4x4 = 0; i4x4 < 4; i4x4++ )
704 idx = i8x8 * 4 + i4x4;
706 quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
707 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
708 x264_mb_dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
710 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
713 /* decimate this 8x8 block */
714 i_decimate_mb += i_decimate_8x8;
715 if( i_decimate_8x8 < 4 )
717 for( i4x4 = 0; i4x4 < 4; i4x4++ )
720 idx = i8x8 * 4 + i4x4;
721 for( i = 0; i < 16; i++ )
722 h->dct.block[idx].luma4x4[i] = 0;
723 for( x = 0; x < 4; x++ )
724 for( y = 0; y < 4; y++ )
725 dct4x4[idx][x][y] = 0;
730 if( i_decimate_mb < 6 )
731 for( idx = 0; idx < 16; idx++ )
732 for( i = 0; i < 16; i++ )
733 h->dct.block[idx].luma4x4[i] = 0;
735 h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
740 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
741 if( IS_INTRA( h->mb.i_type ) )
743 const int i_mode = h->mb.i_chroma_pred_mode;
744 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
745 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
748 /* encode the 8x8 blocks */
749 x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
751 /* Calculate the Luma/Chroma patern and non_zero_count */
752 h->mb.i_cbp_luma = 0x00;
753 if( h->mb.i_type == I_16x16 )
755 for( i = 0; i < 16; i++ )
757 const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
758 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
760 h->mb.i_cbp_luma = 0x0f;
763 else if( h->mb.b_transform_8x8 )
765 /* coded_block_flag is enough for CABAC.
766 * the full non_zero_count is done only in CAVLC. */
767 for( i = 0; i < 4; i++ )
769 const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
771 for( j = 0; j < 4; j++ )
772 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
774 h->mb.i_cbp_luma |= 1 << i;
779 for( i = 0; i < 16; i++ )
781 const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
782 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
784 h->mb.i_cbp_luma |= 1 << (i/4);
788 /* Calculate the chroma patern */
789 h->mb.i_cbp_chroma = 0x00;
790 for( i = 0; i < 8; i++ )
792 const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
793 h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
796 h->mb.i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
799 if( h->mb.i_cbp_chroma == 0x00 &&
800 ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
802 h->mb.i_cbp_chroma = 0x01; /* dc only */
805 if( h->param.b_cabac )
807 if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
812 if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
814 if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
819 h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
821 if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
823 /* It won'y change anything at the decoder side but it is needed else the
824 * decoder will fail to read the next QP */
825 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;
830 * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
831 * (if multiple mv give same result)*/
832 if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
833 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
834 h->mb.cache.ref[0][x264_scan8[0]] == 0 )
838 x264_mb_predict_mv_pskip( h, mvp );
839 if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
840 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
842 h->mb.i_type = P_SKIP;
843 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
844 /* XXX qp reset may have issues when used in RD instead of the real encode */
848 /* Check for B_SKIP */
849 if( h->mb.i_type == B_DIRECT &&
850 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
852 h->mb.i_type = B_SKIP;
853 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
856 if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
857 h->mb.b_transform_8x8 = 0;
860 /*****************************************************************************
861 * x264_macroblock_probe_skip:
862 * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
864 *****************************************************************************/
865 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
867 DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
868 DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
869 DECLARE_ALIGNED( int, dctscan[16], 16 );
871 int i_qp = h->mb.i_qp;
881 x264_mb_predict_mv_pskip( h, mvp );
882 mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
883 mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
885 /* Motion compensation */
886 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
887 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
888 mvp[0], mvp[1], 16, 16 );
892 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
893 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
895 for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
897 /* encode one 4x4 block */
898 for( i4x4 = 0; i4x4 < 4; i4x4++ )
900 const int idx = i8x8 * 4 + i4x4;
902 quant_4x4( dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
903 scan_zigzag_4x4full( dctscan, dct4x4[idx] );
905 i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
907 if( i_decimate_mb >= 6 )
916 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
918 for( ch = 0; ch < 2; ch++ )
920 const int i_stride = h->mb.pic.i_stride[1+ch];
921 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
922 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
926 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
927 h->mb.pic.p_fdec[1+ch], i_stride,
928 mvp[0], mvp[1], 8, 8 );
931 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
933 /* calculate dct DC */
934 dct2x2[0][0] = dct4x4[0][0][0];
935 dct2x2[0][1] = dct4x4[1][0][0];
936 dct2x2[1][0] = dct4x4[2][0][0];
937 dct2x2[1][1] = dct4x4[3][0][0];
938 h->dctf.dct2x2dc( dct2x2 );
939 quant_2x2_dc( dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
940 if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
946 /* calculate dct coeffs */
947 for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
949 quant_4x4( dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
950 scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
952 i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
953 if( i_decimate_mb >= 7 )