1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
28 #include "../common/common.h"
29 #include "macroblock.h"
32 static const uint8_t block_idx_x[16] =
34 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
36 static const uint8_t block_idx_y[16] =
38 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
40 static const uint8_t block_idx_xy[4][4] =
48 static const int quant_mf[6][4][4] =
50 { { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243},
51 { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243} },
52 { { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660},
53 { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660} },
54 { { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194},
55 { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194} },
56 { { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647},
57 { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647} },
58 { { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355},
59 { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355} },
60 { { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893},
61 { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893} }
64 static const int i_chroma_qp_table[52] =
66 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
67 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
68 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
69 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
70 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
74 /****************************************************************************
75 * Scan and Quant functions
76 ****************************************************************************/
77 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
78 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
80 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
92 level[10] = dct[3][1];
93 level[11] = dct[2][2];
94 level[12] = dct[1][3];
95 level[13] = dct[2][3];
96 level[14] = dct[3][2];
97 level[15] = dct[3][3];
100 for( i = 0; i < 16; i++ )
102 level[i] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
106 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
108 level[0] = dct[0][1];
109 level[1] = dct[1][0];
110 level[2] = dct[2][0];
111 level[3] = dct[1][1];
112 level[4] = dct[0][2];
113 level[5] = dct[0][3];
114 level[6] = dct[1][2];
115 level[7] = dct[2][1];
116 level[8] = dct[3][0];
117 level[9] = dct[3][1];
118 level[10] = dct[2][2];
119 level[11] = dct[1][3];
120 level[12] = dct[2][3];
121 level[13] = dct[3][2];
122 level[14] = dct[3][3];
125 for( i = 1; i < 16; i++ )
127 level[i - 1] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
132 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
134 level[0] = dct[0][0];
135 level[1] = dct[0][1];
136 level[2] = dct[1][0];
137 level[3] = dct[1][1];
141 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
143 const int i_qbits = 15 + i_qscale / 6;
144 const int i_mf = i_qscale % 6;
145 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
148 for( y = 0; y < 4; y++ )
150 for( x = 0; x < 4; x++ )
154 dct[y][x] =( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
158 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
163 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
165 const int i_qbits = 15 + i_qscale / 6;
166 const int f2 = ( 2 << i_qbits ) / 3;
167 const int i_qmf = quant_mf[i_qscale%6][0][0];
170 for( y = 0; y < 4; y++ )
172 for( x = 0; x < 4; x++ )
176 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
180 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
185 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
187 int const i_qbits = 15 + i_qscale / 6;
188 const int f2 = ( 2 << i_qbits ) / ( b_intra ? 3 : 6 );
189 const int i_qmf = quant_mf[i_qscale%6][0][0];
192 for( y = 0; y < 2; y++ )
194 for( x = 0; x < 2; x++ )
198 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
202 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
209 static const int f_deadzone_intra[4][4][2] = /* [num][den] */
211 { {1,2}, {3,7}, {2,5}, {1,3} },
212 { {3,7}, {2,5}, {1,3}, {1,4} },
213 { {2,5}, {1,3}, {1,4}, {1,5} },
214 { {1,3}, {1,4}, {1,5}, {1,5} }
216 static const int f_deadzone_inter[4][4][2] = /* [num][den] */
218 { {1,3}, {2,7}, {4,15},{2,9} },
219 { {2,7}, {4,15},{2,9}, {1,6} },
220 { {4,15},{2,9}, {1,6}, {1,7} },
221 { {2,9}, {1,6}, {1,7}, {2,15} }
225 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
227 const int(*f_deadzone)[4][4][2] = b_intra ? &f_deadzone_intra : &f_deadzone_inter;
228 const int i_qbits = 15 + i_qscale / 6;
229 const int i_mf = i_qscale % 6;
232 for( y = 0; y < 4; y++ )
234 for( x = 0; x < 4; x++ )
237 const int f = b_intra ?
238 (f_deadzone_intra[y][x][0] * ( 1 << i_qbits ) / f_deadzone_intra[y][x][1])
240 (f_deadzone_inter[y][x][0] * ( 1 << i_qbits ) / f_deadzone_inter[y][x][1]);
242 const int f = (*f_deadzone)[y][x][0] * ( 1 << i_qbits ) / (*f_deadzone)[y][x][1];
247 dct[y][x] =( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
251 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
257 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
259 const int i_qbits = 15 + i_qscale / 6;
260 const int i_qmf = quant_mf[i_qscale%6][0][0];
261 const int f2 = f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1];
264 for( y = 0; y < 4; y++ )
266 for( x = 0; x < 4; x++ )
271 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
275 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
281 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
283 int const i_qbits = 15 + i_qscale / 6;
284 const int i_qmf = quant_mf[i_qscale%6][0][0];
285 const int f2 = b_intra ?
286 (f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1])
288 (f_deadzone_inter[0][0][0] * ( 2 << i_qbits ) / f_deadzone_inter[0][0][1]);
290 for( y = 0; y < 2; y++ )
292 for( x = 0; x < 2; x++ )
296 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
300 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
309 static inline int array_non_zero_count( int *v, int i_count )
314 for( i = 0, i_nz = 0; i < i_count; i++ )
325 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
326 * to 0 (low score means set it to null)
327 * Used in inter macroblock (luma and chroma)
328 * luma: for a 8x8 block: if score < 4 -> null
329 * for the complete mb: if score < 6 -> null
330 * chroma: for the complete mb: if score < 7 -> null
332 static int x264_mb_decimate_score( int *dct, int i_max )
334 static const int i_ds_table[16] = { 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
339 while( idx >= 0 && dct[idx] == 0 )
348 if( abs( dct[idx--] ) > 1 )
354 while( idx >= 0 && dct[idx] == 0 )
359 i_score += i_ds_table[i_run];
365 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
367 const int i_stride = h->mb.pic.i_stride[0];
368 uint8_t *p_src = &h->mb.pic.p_fenc[0][4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride];
369 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride];
371 int16_t dct4x4[4][4];
373 h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
375 quant_4x4( dct4x4, i_qscale, 1 );
377 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
379 x264_mb_dequant_4x4( dct4x4, i_qscale );
381 /* output samples to fdec */
382 h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
385 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
387 const int i_stride = h->mb.pic.i_stride[0];
388 uint8_t *p_src = h->mb.pic.p_fenc[0];
389 uint8_t *p_dst = h->mb.pic.p_fdec[0];
391 int16_t dct4x4[16+1][4][4];
395 h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
396 for( i = 0; i < 16; i++ )
399 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
401 /* quant/scan/dequant */
402 quant_4x4( dct4x4[1+i], i_qscale, 1 );
403 scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
404 x264_mb_dequant_4x4( dct4x4[1+i], i_qscale );
407 h->dctf.dct4x4dc( dct4x4[0] );
408 quant_4x4_dc( dct4x4[0], i_qscale );
409 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
411 /* output samples to fdec */
412 h->dctf.idct4x4dc( dct4x4[0] );
413 x264_mb_dequant_4x4_dc( dct4x4[0], i_qscale ); /* XXX not inversed */
415 /* calculate dct coeffs */
416 for( i = 0; i < 16; i++ )
419 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
421 /* put pixels to fdec */
422 h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
425 static void x264_mb_encode_8x8( x264_t *h, int b_inter, int i_qscale )
429 for( ch = 0; ch < 2; ch++ )
431 const int i_stride = h->mb.pic.i_stride[1+ch];
432 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
433 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
434 int i_decimate_score = 0;
436 int16_t dct2x2[2][2];
437 int16_t dct4x4[4][4][4];
439 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
440 /* calculate dct coeffs */
441 for( i = 0; i < 4; i++ )
444 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
446 quant_4x4( dct4x4[i], i_qscale, b_inter ? 0 : 1 );
447 scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
448 x264_mb_dequant_4x4( dct4x4[i], i_qscale );
452 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
456 h->dctf.dct2x2dc( dct2x2 );
457 quant_2x2_dc( dct2x2, i_qscale, b_inter ? 0 : 1 );
458 scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
460 /* output samples to fdec */
461 h->dctf.idct2x2dc( dct2x2 );
462 x264_mb_dequant_2x2_dc( dct2x2, i_qscale ); /* XXX not inversed */
464 if( b_inter && i_decimate_score < 7 )
466 /* Near null chroma 8x8 block so make it null (bits saving) */
467 for( i = 0; i < 4; i++ )
470 for( x = 0; x < 15; x++ )
472 h->dct.block[16+i+ch*4].residual_ac[x] = 0;
474 for( x = 0; x < 4; x++ )
476 for( y = 0; y < 4; y++ )
484 /* calculate dct coeffs */
485 for( i = 0; i < 4; i++ )
488 dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
490 h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
494 static void x264_macroblock_encode_skip( x264_t *h )
497 h->mb.i_cbp_luma = 0x00;
498 h->mb.i_cbp_chroma = 0x00;
500 for( i = 0; i < 16+8; i++ )
502 h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
506 h->mb.cbp[h->mb.i_mb_xy] = 0;
509 /*****************************************************************************
510 * x264_macroblock_encode_pskip:
511 * Encode an already marked skip block
512 *****************************************************************************/
513 void x264_macroblock_encode_pskip( x264_t *h )
515 const int mvx = h->mb.cache.mv[0][x264_scan8[0]][0];
516 const int mvy = h->mb.cache.mv[0][x264_scan8[0]][1];
518 /* Motion compensation XXX probably unneeded */
519 h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
520 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
524 h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1], h->mb.pic.i_stride[1],
525 h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1],
528 h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][2], h->mb.pic.i_stride[2],
529 h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
532 x264_macroblock_encode_skip( h );
535 /*****************************************************************************
536 * x264_macroblock_encode:
537 *****************************************************************************/
538 void x264_macroblock_encode( x264_t *h )
544 if( h->mb.i_type == P_SKIP )
547 x264_macroblock_encode_pskip( h );
550 if( h->mb.i_type == B_SKIP )
552 /* XXX motion compensation is probably unneeded */
554 x264_macroblock_encode_skip( h );
558 /* quantification scale */
559 i_qscale = h->mb.qp[h->mb.i_mb_xy];
561 if( h->mb.i_type == I_16x16 )
563 const int i_mode = h->mb.i_intra16x16_pred_mode;
564 /* do the right prediction */
565 h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
567 /* encode the 16x16 macroblock */
568 x264_mb_encode_i16x16( h, i_qscale );
570 /* fix the pred mode value */
571 h->mb.i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix[i_mode];
573 else if( h->mb.i_type == I_4x4 )
575 for( i = 0; i < 16; i++ )
577 const int i_dst = h->mb.pic.i_stride[0];
578 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
579 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
581 /* Do the right prediction */
582 h->predict_4x4[i_mode]( p_dst, i_dst );
584 /* encode one 4x4 block */
585 x264_mb_encode_i4x4( h, i, i_qscale );
587 /* fix the pred mode value */
588 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix[i_mode];
593 int16_t dct4x4[16][4][4];
596 int i_decimate_mb = 0;
598 /* Motion compensation */
601 h->dctf.sub16x16_dct( dct4x4,
602 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
603 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
605 for( i8x8 = 0; i8x8 < 4; i8x8++ )
609 /* encode one 4x4 block */
611 for( i4x4 = 0; i4x4 < 4; i4x4++ )
613 idx = i8x8 * 4 + i4x4;
615 quant_4x4( dct4x4[idx], i_qscale, 0 );
616 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
617 x264_mb_dequant_4x4( dct4x4[idx], i_qscale );
619 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
622 /* decimate this 8x8 block */
623 i_decimate_mb += i_decimate_8x8;
624 if( i_decimate_8x8 < 4 )
626 for( i4x4 = 0; i4x4 < 4; i4x4++ )
629 idx = i8x8 * 4 + i4x4;
630 for( i = 0; i < 16; i++ )
632 h->dct.block[idx].luma4x4[i] = 0;
634 for( x = 0; x < 4; x++ )
636 for( y = 0; y < 4; y++ )
638 dct4x4[idx][x][y] = 0;
645 if( i_decimate_mb < 6 )
647 for( idx = 0; idx < 16; idx++ )
649 for( i = 0; i < 16; i++ )
651 h->dct.block[idx].luma4x4[i] = 0;
657 h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
662 i_qscale = i_chroma_qp_table[x264_clip3( i_qscale + h->pps->i_chroma_qp_index_offset, 0, 51 )];
663 if( IS_INTRA( h->mb.i_type ) )
665 const int i_mode = h->mb.i_chroma_pred_mode;
666 /* do the right prediction */
667 h->predict_8x8[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
668 h->predict_8x8[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
670 /* fix the pred mode value */
671 h->mb.i_chroma_pred_mode = x264_mb_pred_mode8x8_fix[i_mode];
674 /* encode the 8x8 blocks */
675 x264_mb_encode_8x8( h, !IS_INTRA( h->mb.i_type ), i_qscale );
677 /* Calculate the Luma/Chroma patern and non_zero_count */
678 if( h->mb.i_type == I_16x16 )
680 h->mb.i_cbp_luma = 0x00;
681 for( i = 0; i < 16; i++ )
683 const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
684 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
687 h->mb.i_cbp_luma = 0x0f;
693 h->mb.i_cbp_luma = 0x00;
694 for( i = 0; i < 16; i++ )
696 const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
697 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
700 h->mb.i_cbp_luma |= 1 << (i/4);
705 /* Calculate the chroma patern */
706 h->mb.i_cbp_chroma = 0x00;
707 for( i = 0; i < 8; i++ )
709 const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
710 h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
713 h->mb.i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
716 if( h->mb.i_cbp_chroma == 0x00 &&
717 ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
719 h->mb.i_cbp_chroma = 0x01; /* dc only */
722 if( h->param.b_cabac )
724 if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
729 if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
731 if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
736 h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
738 if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
740 /* It won'y change anything at the decoder side but it is needed else the
741 * decoder will fail to read the next QP */
742 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;
747 * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
748 * (if multiple mv give same result)*/
749 if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
750 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
752 if( h->mb.cache.ref[0][x264_scan8[0]] == 0 )
756 x264_mb_predict_mv_pskip( h, mvp );
757 if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
758 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
760 h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = P_SKIP;
761 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
766 /* Check for B_SKIP */
767 if( h->mb.i_type == B_DIRECT &&
768 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
770 h->mb.type[h->mb.i_mb_xy] = h->mb.i_type = B_SKIP;
771 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
775 /*****************************************************************************
776 * x264_macroblock_probe_skip:
777 * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
779 *****************************************************************************/
780 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
782 DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
783 DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
784 DECLARE_ALIGNED( int, dctscan[16], 16 );
794 /* quantization scale */
795 i_qp = h->mb.qp[h->mb.i_mb_xy];
800 x264_mb_predict_mv_pskip( h, mvp );
802 /* Special case, need to clip the vector */
803 n = 16 * h->mb.i_mb_x + mvp[0];
805 mvp[0] = -24 - 16*h->mb.i_mb_x;
806 else if( n > 16 * h->sps->i_mb_width + 24 )
807 mvp[0] = 16 * ( h->sps->i_mb_width - h->mb.i_mb_x ) + 24;
809 n = 16 * h->mb.i_mb_y + mvp[1];
811 mvp[1] = -24 - 16*h->mb.i_mb_y;
812 else if( n > 16 * h->sps->i_mb_height + 8 )
813 mvp[1] = 16 * ( h->sps->i_mb_height - h->mb.i_mb_y ) + 8;
816 /* Motion compensation */
817 h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
818 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
819 mvp[0], mvp[1], 16, 16 );
823 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
824 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
826 for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
828 /* encode one 4x4 block */
829 for( i4x4 = 0; i4x4 < 4; i4x4++ )
831 const int idx = i8x8 * 4 + i4x4;
833 quant_4x4( dct4x4[idx], i_qp, 0 );
834 scan_zigzag_4x4full( dctscan, dct4x4[idx] );
836 i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
838 if( i_decimate_mb >= 6 )
847 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
849 for( ch = 0; ch < 2; ch++ )
851 const int i_stride = h->mb.pic.i_stride[1+ch];
852 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
853 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
857 h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride,
858 h->mb.pic.p_fdec[1+ch], i_stride,
859 mvp[0], mvp[1], 8, 8 );
862 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
864 /* calculate dct DC */
865 dct2x2[0][0] = dct4x4[0][0][0];
866 dct2x2[0][1] = dct4x4[1][0][0];
867 dct2x2[1][0] = dct4x4[2][0][0];
868 dct2x2[1][1] = dct4x4[3][0][0];
869 h->dctf.dct2x2dc( dct2x2 );
870 quant_2x2_dc( dct2x2, i_qp, 0 );
871 if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
877 /* calculate dct coeffs */
878 for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
880 quant_4x4( dct4x4[i4x4], i_qp, 0 );
881 scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
883 i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
884 if( i_decimate_mb >= 7 )