1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: edge-detec.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
34 static inline int x264_median( int a, int b, int c )
43 max = b; /* no need to do 'b > max' (more consuming than always doing affectation) */
54 return a + b + c - min - max;
57 static const uint8_t intra4x4_cbp_to_golomb[48]=
59 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2,
60 16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1,
61 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0
63 static const uint8_t inter_cbp_to_golomb[48]=
65 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11,
66 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
67 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
70 static const uint8_t block_idx_x[16] =
72 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
74 static const uint8_t block_idx_y[16] =
76 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
78 static const uint8_t block_idx_xy[4][4] =
86 static const int quant_mf[6][4][4] =
88 { { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243},
89 { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243} },
90 { { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660},
91 { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660} },
92 { { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194},
93 { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194} },
94 { { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647},
95 { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647} },
96 { { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355},
97 { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355} },
98 { { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893},
99 { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893} }
102 static const int dequant_mf[6][4][4] =
104 { {10, 13, 10, 13}, {13, 16, 13, 16}, {10, 13, 10, 13}, {13, 16, 13, 16} },
105 { {11, 14, 11, 14}, {14, 18, 14, 18}, {11, 14, 11, 14}, {14, 18, 14, 18} },
106 { {13, 16, 13, 16}, {16, 20, 16, 20}, {13, 16, 13, 16}, {16, 20, 16, 20} },
107 { {14, 18, 14, 18}, {18, 23, 18, 23}, {14, 18, 14, 18}, {18, 23, 18, 23} },
108 { {16, 20, 16, 20}, {20, 25, 20, 25}, {16, 20, 16, 20}, {20, 25, 20, 25} },
109 { {18, 23, 18, 23}, {23, 29, 23, 29}, {18, 23, 18, 23}, {23, 29, 23, 29} }
113 static int predict_pred_intra4x4_mode( x264_t *h, x264_macroblock_t *mb, int idx )
115 x264_macroblock_t *mba = mb->context->block[idx].mba;
116 x264_macroblock_t *mbb = mb->context->block[idx].mbb;
118 int i_mode_a = I_PRED_4x4_DC;
119 int i_mode_b = I_PRED_4x4_DC;
123 return I_PRED_4x4_DC;
126 if( mba->i_type == I_4x4 )
128 i_mode_a = mb->context->block[idx].bka->i_intra4x4_pred_mode;
130 if( mbb->i_type == I_4x4 )
132 i_mode_b = mb->context->block[idx].bkb->i_intra4x4_pred_mode;
135 return X264_MIN( i_mode_a, i_mode_b );
138 static int predict_non_zero_code( x264_t *h, x264_macroblock_t *mb, int idx )
140 x264_macroblock_t *mba = mb->context->block[idx].mba;
141 x264_macroblock_t *mbb = mb->context->block[idx].mbb;
143 int i_z_a = 0x80, i_z_b = 0x80;
146 /* none avail -> 0, one avail -> this one, both -> (a+b+1)>>1 */
149 i_z_a = mb->context->block[idx].bka->i_non_zero_count;
153 i_z_b = mb->context->block[idx].bkb->i_non_zero_count;
159 i_ret = ( i_ret + 1 ) >> 1;
169 static void predict_16x16_mode_available( x264_macroblock_t *mb, int *mode, int *pi_count )
171 if( ( mb->i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
173 /* top and left avaible */
174 *mode++ = I_PRED_16x16_DC;
175 *mode++ = I_PRED_16x16_V;
176 *mode++ = I_PRED_16x16_H;
177 *mode++ = I_PRED_16x16_P;
180 else if( ( mb->i_neighbour & MB_LEFT ) )
183 *mode++ = I_PRED_16x16_DC_LEFT;
184 *mode++ = I_PRED_16x16_H;
187 else if( ( mb->i_neighbour & MB_TOP ) )
190 *mode++ = I_PRED_16x16_DC_TOP;
191 *mode++ = I_PRED_16x16_V;
197 *mode = I_PRED_16x16_DC_128;
203 static void predict_8x8_mode_available( x264_macroblock_t *mb, int *mode, int *pi_count )
205 if( ( mb->i_neighbour & (MB_LEFT|MB_TOP) ) == (MB_LEFT|MB_TOP) )
207 /* top and left avaible */
208 *mode++ = I_PRED_CHROMA_DC;
209 *mode++ = I_PRED_CHROMA_V;
210 *mode++ = I_PRED_CHROMA_H;
211 *mode++ = I_PRED_CHROMA_P;
214 else if( ( mb->i_neighbour & MB_LEFT ) )
217 *mode++ = I_PRED_CHROMA_DC_LEFT;
218 *mode++ = I_PRED_CHROMA_H;
221 else if( ( mb->i_neighbour & MB_TOP ) )
224 *mode++ = I_PRED_CHROMA_DC_TOP;
225 *mode++ = I_PRED_CHROMA_V;
231 *mode = I_PRED_CHROMA_DC_128;
237 static void predict_4x4_mode_available( x264_macroblock_t *mb, int idx, int *mode, int *pi_count )
240 static const int needmb[16] =
242 MB_LEFT|MB_TOP, MB_TOP,
244 MB_TOP, MB_TOP|MB_TOPRIGHT,
252 /* FIXME even when b_c == 0 there is some case where missing pixels
253 * are emulated and thus more mode are available TODO
254 * analysis and encode should be fixed too */
255 b_a = (needmb[idx]&mb->i_neighbour&MB_LEFT) == (needmb[idx]&MB_LEFT);
256 b_b = (needmb[idx]&mb->i_neighbour&MB_TOP) == (needmb[idx]&MB_TOP);
257 b_c = (needmb[idx]&mb->i_neighbour&(MB_TOPRIGHT|MB_PRIVATE)) == (needmb[idx]&(MB_TOPRIGHT|MB_PRIVATE));
261 *mode++ = I_PRED_4x4_DC;
262 *mode++ = I_PRED_4x4_H;
263 *mode++ = I_PRED_4x4_V;
264 *mode++ = I_PRED_4x4_DDR;
265 *mode++ = I_PRED_4x4_VR;
266 *mode++ = I_PRED_4x4_HD;
267 *mode++ = I_PRED_4x4_HU;
273 *mode++ = I_PRED_4x4_DDL;
274 *mode++ = I_PRED_4x4_VL;
278 else if( b_a && !b_b )
280 *mode++ = I_PRED_4x4_DC_LEFT;
281 *mode++ = I_PRED_4x4_H;
284 else if( !b_a && b_b )
286 *mode++ = I_PRED_4x4_DC_TOP;
287 *mode++ = I_PRED_4x4_V;
292 *mode++ = I_PRED_4x4_DC_128;
297 /****************************************************************************
298 * Scan and Quant functions
299 ****************************************************************************/
300 static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
301 static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
303 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
307 for( i = 0; i < 16; i++ )
309 level[i] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
312 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
316 for( i = 1; i < 16; i++ )
318 level[i - 1] = dct[scan_zigzag_y[i]][scan_zigzag_x[i]];
322 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
324 level[0] = dct[0][0];
325 level[1] = dct[0][1];
326 level[2] = dct[1][0];
327 level[3] = dct[1][1];
331 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
333 int i_qbits = 15 + i_qscale / 6;
334 int i_mf = i_qscale % 6;
335 int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
338 for( y = 0; y < 4; y++ )
340 for( x = 0; x < 4; x++ )
344 dct[y][x] =( f + (int64_t)dct[y][x] * (int64_t)quant_mf[i_mf][y][x] ) >> i_qbits;
348 dct[y][x] = - ( ( f - (int64_t)dct[y][x] * (int64_t)quant_mf[i_mf][y][x] ) >> i_qbits );
353 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale, int b_intra )
355 int i_qbits = 15 + i_qscale / 6;
356 int i_mf = i_qscale % 6;
357 int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
360 for( y = 0; y < 4; y++ )
362 for( x = 0; x < 4; x++ )
366 dct[y][x] =( 2*f + (int64_t)dct[y][x] * (int64_t)quant_mf[i_mf][0][0] ) >> ( 1 + i_qbits );
370 dct[y][x] = - ( ( 2*f - (int64_t)dct[y][x] * (int64_t)quant_mf[i_mf][0][0] ) >> (1 + i_qbits ) );
375 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
377 int i_qbits = 15 + i_qscale / 6;
378 int i_mf = i_qscale % 6;
379 int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
382 for( y = 0; y < 2; y++ )
384 for( x = 0; x < 2; x++ )
386 /* XXX: is int64_t really needed ? */
389 dct[y][x] =( 2*f + (int64_t)dct[y][x] * (int64_t)quant_mf[i_mf][0][0] ) >> ( 1 + i_qbits );
393 dct[y][x] = - ( ( 2*f - (int64_t)dct[y][x] * (int64_t)quant_mf[i_mf][0][0] ) >> (1 + i_qbits ) );
399 static void dequant_4x4_dc( int16_t dct[4][4], int i_qscale )
401 int i_mf = i_qscale%6;
402 int i_qbits = i_qscale/6;
408 f = 1 << ( 1 - i_qbits );
415 for( y = 0; y < 4; y++ )
417 for( x = 0; x < 4; x++ )
421 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] ) << (i_qbits - 2);
425 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] + f ) >> ( 2 -i_qbits );
431 static void dequant_2x2_dc( int16_t dct[2][2], int i_qscale )
433 int i_mf = i_qscale%6;
434 int i_qbits = i_qscale/6;
437 for( y = 0; y < 2; y++ )
439 for( x = 0; x < 2; x++ )
443 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] ) << (i_qbits - 1);
447 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][0][0] ) >> 1;
452 static void dequant_4x4( int16_t dct[4][4], int i_qscale )
454 int i_mf = i_qscale%6;
455 int i_qbits = i_qscale/6;
458 for( y = 0; y < 4; y++ )
460 for( x = 0; x < 4; x++ )
462 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][x][y] ) << i_qbits;
467 static inline int array_non_zero_count( int *v, int i_count )
472 for( i = 0, i_nz = 0; i < i_count; i++ )
482 /* TODO : use a table instead */
483 static int mb_partition_count( int i_partition )
485 switch( i_partition )
495 /* should never occur */
500 static int mb_sub_partition_count( int i_partition )
502 switch( i_partition )
521 /* should never occur */
526 static inline void x264_macroblock_partition_getxy( x264_macroblock_t *mb, int i_part, int i_sub, int *x, int *y )
528 if( mb->i_partition == D_16x16 )
533 else if( mb->i_partition == D_16x8 )
538 else if( mb->i_partition == D_8x16 )
543 else if( mb->i_partition == D_8x8 )
548 if( IS_SUB4x4( mb->i_sub_partition[i_part] ) )
553 else if( IS_SUB4x8( mb->i_sub_partition[i_part] ) )
557 else if( IS_SUB8x4( mb->i_sub_partition[i_part] ) )
563 static inline void x264_macroblock_partition_size( x264_macroblock_t *mb, int i_part, int i_sub, int *w, int *h )
565 if( mb->i_partition == D_16x16 )
570 else if( mb->i_partition == D_16x8 )
575 else if( mb->i_partition == D_8x16 )
580 else if( mb->i_partition == D_8x8 )
582 if( IS_SUB4x4( mb->i_sub_partition[i_part] ) )
587 else if( IS_SUB4x8( mb->i_sub_partition[i_part] ) )
592 else if( IS_SUB8x4( mb->i_sub_partition[i_part] ) )
605 void x264_macroblock_partition_set( x264_macroblock_t *mb, int i_list, int i_part, int i_sub, int i_ref, int mx, int my )
611 x264_macroblock_partition_getxy( mb, i_part, i_sub, &x, &y );
612 x264_macroblock_partition_size ( mb, i_part, i_sub, &w, &h );
614 for( dx = 0; dx < w; dx++ )
616 for( dy = 0; dy < h; dy++ )
618 mb->partition[x+dx][y+dy].i_ref[i_list] = i_ref;
619 mb->partition[x+dx][y+dy].mv[i_list][0] = mx;
620 mb->partition[x+dx][y+dy].mv[i_list][1] = my;
625 void x264_macroblock_partition_get( x264_macroblock_t *mb, int i_list, int i_part, int i_sub, int *pi_ref, int *pi_mx, int *pi_my )
629 x264_macroblock_partition_getxy( mb, i_part, i_sub, &x, &y );
633 *pi_ref = mb->partition[x][y].i_ref[i_list];
637 *pi_mx = mb->partition[x][y].mv[i_list][0];
638 *pi_my = mb->partition[x][y].mv[i_list][1];
642 /* ARrrrg so unbeautifull, and unoptimised for common case */
643 void x264_macroblock_predict_mv( x264_macroblock_t *mb, int i_list, int i_part, int i_subpart, int *mvxp, int *mvyp )
653 int mvxa = 0, mvxb = 0, mvxc = 0;
654 int mvya = 0, mvyb = 0, mvyc = 0;
656 x264_macroblock_t *mbn;
659 x264_macroblock_partition_getxy( mb, i_part, i_subpart, &x, &y );
660 x264_macroblock_partition_size( mb, i_part, i_subpart, &w, &h );
661 i_ref = mb->partition[x][y].i_ref[i_list];
663 /* Left pixel (-1,0)*/
674 if( !IS_INTRA( mbn->i_type ) )
676 i_refa = mbn->partition[xn][y].i_ref[i_list];
677 mvxa = mbn->partition[xn][y].mv[i_list][0];
678 mvya = mbn->partition[xn][y].mv[i_list][1];
682 /* Up ( pixel(0,-1)*/
693 if( !IS_INTRA( mbn->i_type ) )
695 i_refb = mbn->partition[x][yn].i_ref[i_list];
696 mvxb = mbn->partition[x][yn].mv[i_list][0];
697 mvyb = mbn->partition[x][yn].mv[i_list][1];
701 /* Up right pixel(width,-1)*/
706 if( yn < 0 && xn >= 4 )
724 else if( xn >= 4 || ( xn == 2 && ( yn == 0 || yn == 2 ) ) )
726 mbn = NULL; /* not yet decoded */
731 /* load top left pixel(-1,-1) */
736 if( yn < 0 && xn < 0 )
738 if( mb->mba && mb->mbb )
764 if( !IS_INTRA( mbn->i_type ) )
766 i_refc = mbn->partition[xn][yn].i_ref[i_list];
767 mvxc = mbn->partition[xn][yn].mv[i_list][0];
768 mvyc = mbn->partition[xn][yn].mv[i_list][1];
772 if( mb->i_partition == D_16x8 && i_part == 0 && i_refb == i_ref )
777 else if( mb->i_partition == D_16x8 && i_part == 1 && i_refa == i_ref )
782 else if( mb->i_partition == D_8x16 && i_part == 0 && i_refa == i_ref )
787 else if( mb->i_partition == D_8x16 && i_part == 1 && i_refc == i_ref )
797 if( i_refa == i_ref ) i_count++;
798 if( i_refb == i_ref ) i_count++;
799 if( i_refc == i_ref ) i_count++;
803 *mvxp = x264_median( mvxa, mvxb, mvxc );
804 *mvyp = x264_median( mvya, mvyb, mvyc );
806 else if( i_count == 1 )
808 if( i_refa == i_ref )
813 else if( i_refb == i_ref )
824 else if( i_refb == -1 && i_refc == -1 && i_refa != -1 )
831 *mvxp = x264_median( mvxa, mvxb, mvxc );
832 *mvyp = x264_median( mvya, mvyb, mvyc );
837 void x264_macroblock_predict_mv_pskip( x264_macroblock_t *mb, int *mvxp, int *mvyp )
844 int mvxa = 0, mvxb = 0;
845 int mvya = 0, mvyb = 0;
847 x264_macroblock_t *mbn;
850 x264_macroblock_partition_getxy( mb, 0, 0, &x, &y );
852 /* Left pixel (-1,0)*/
863 if( !IS_INTRA( mbn->i_type ) )
865 i_refa = mbn->partition[xn][y].i_ref[0];
866 mvxa = mbn->partition[xn][y].mv[0][0];
867 mvya = mbn->partition[xn][y].mv[0][1];
871 /* Up ( pixel(0,-1)*/
882 if( !IS_INTRA( mbn->i_type ) )
884 i_refb = mbn->partition[x][yn].i_ref[0];
885 mvxb = mbn->partition[x][yn].mv[0][0];
886 mvyb = mbn->partition[x][yn].mv[0][1];
890 if( i_refa == -1 || i_refb == -1 ||
891 ( i_refa == 0 && mvxa == 0 && mvya == 0 ) ||
892 ( i_refb == 0 && mvxb == 0 && mvyb == 0 ) )
899 x264_macroblock_predict_mv( mb, 0, 0, 0, mvxp, mvyp );
903 static const int i_chroma_qp_table[52] =
905 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
906 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
907 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
908 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
909 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
913 static void x264_macroblock_mc( x264_t *h, x264_macroblock_t *mb, int b_luma )
915 x264_mb_context_t *ctx = mb->context;
921 if( mb->i_type == P_L0 )
925 for( i_part = 0; i_part < mb_partition_count( mb->i_partition ); i_part++ )
927 int i_width, i_height;
930 x264_macroblock_partition_get( mb, 0, i_part, 0, &i_ref, &mx, &my );
931 x264_macroblock_partition_getxy( mb, i_part, 0, &x, &y );
932 x264_macroblock_partition_size( mb, i_part, 0, &i_width, &i_height );
936 int i_src = ctx->i_fref0[i_ref][0];
937 uint8_t *p_src= ctx->p_fref0[i_ref][0];
938 int i_dst = ctx->i_fdec[0];
939 uint8_t *p_dst= ctx->p_fdec[0];
941 h->mc[MC_LUMA]( &p_src[4*(x+y*i_src)], i_src,
942 &p_dst[4*(x+y*i_dst)], i_dst,
943 mx, my, 4*i_width, 4*i_height );
948 uint8_t *p_src, *p_dst;
950 for( ch = 0; ch < 2; ch++ )
952 i_src = ctx->i_fref0[i_ref][1+ch];
953 p_src = ctx->p_fref0[i_ref][1+ch];
954 i_dst = ctx->i_fdec[1+ch];
955 p_dst = ctx->p_fdec[1+ch];
957 h->mc[MC_CHROMA]( &p_src[2*(x+y*i_src)], i_src,
958 &p_dst[2*(x+y*i_dst)], i_dst,
959 mx, my, 2*i_width, 2*i_height );
964 else if( mb->i_type == P_8x8 )
968 for( i_part = 0; i_part < 4; i_part++ )
972 for( i_sub = 0; i_sub < mb_sub_partition_count( mb->i_sub_partition[i_part] ); i_sub++ )
974 int i_width, i_height;
977 x264_macroblock_partition_get( mb, 0, i_part, i_sub, &i_ref, &mx, &my );
978 x264_macroblock_partition_getxy( mb, i_part, i_sub, &x, &y );
979 x264_macroblock_partition_size( mb, i_part, i_sub, &i_width, &i_height );
983 int i_src = ctx->i_fref0[i_ref][0];
984 uint8_t *p_src= ctx->p_fref0[i_ref][0];
985 int i_dst = ctx->i_fdec[0];
986 uint8_t *p_dst= ctx->p_fdec[0];
988 h->mc[MC_LUMA]( &p_src[4*(x+y*i_src)], i_src,
989 &p_dst[4*(x+y*i_dst)], i_dst,
990 mx, my, 4*i_width, 4*i_height );
995 uint8_t *p_src, *p_dst;
997 for( ch = 0; ch < 2; ch++ )
999 i_src = ctx->i_fref0[i_ref][1+ch];
1000 p_src = ctx->p_fref0[i_ref][1+ch];
1001 i_dst = ctx->i_fdec[1+ch];
1002 p_dst = ctx->p_fdec[1+ch];
1004 h->mc[MC_CHROMA]( &p_src[2*(x+y*i_src)], i_src,
1005 &p_dst[2*(x+y*i_dst)], i_dst,
1006 mx, my, 2*i_width, 2*i_height );
1014 /*****************************************************************************
1015 * x264_macroblock_neighbour_load:
1016 *****************************************************************************/
1017 void x264_macroblock_context_load( x264_t *h, x264_macroblock_t *mb, x264_mb_context_t *context )
1021 x264_macroblock_t *a = NULL;
1022 x264_macroblock_t *b = NULL;
1024 if( mb->i_neighbour&MB_LEFT )
1028 if( mb->i_neighbour&MB_TOP )
1030 b = mb - h->sps.i_mb_width;
1032 #define LOAD_PTR( dst, src ) \
1033 context->p_##dst[0] = (src)->plane[0] + 16 * ( mb->i_mb_x + mb->i_mb_y * (src)->i_stride[0] ); \
1034 context->p_##dst[1] = (src)->plane[1] + 8 * ( mb->i_mb_x + mb->i_mb_y * (src)->i_stride[1] ); \
1035 context->p_##dst[2] = (src)->plane[2] + 8 * ( mb->i_mb_x + mb->i_mb_y * (src)->i_stride[2] ); \
1036 context->i_##dst[0] = (src)->i_stride[0]; \
1037 context->i_##dst[1] = (src)->i_stride[1]; \
1038 context->i_##dst[2] = (src)->i_stride[2]
1040 LOAD_PTR( img, h->picture );
1041 LOAD_PTR( fdec, h->fdec );
1042 for( i = 0; i < h->i_ref0; i++ )
1044 LOAD_PTR( fref0[i], h->fref0[i] );
1046 for( i = 0; i < h->i_ref1; i++ )
1048 LOAD_PTR( fref1[i], h->fref1[i] );
1052 for( y = 0; y < 4; y++ )
1054 for( x = 0; x < 4; x++ )
1058 x264_macroblock_t *mba;
1059 x264_macroblock_t *mbb;
1061 idx = block_idx_xy[x][y];
1079 context->block[idx].mba = mba;
1080 context->block[idx].mbb = mbb;
1081 context->block[idx].bka = mba ? &mba->block[block_idx_xy[xa][y]] : NULL;
1082 context->block[idx].bkb = mbb ? &mbb->block[block_idx_xy[x][yb]] : NULL;
1084 if( x < 2 && y < 2 )
1087 if( xa > 1 ) xa -= 2; /* we have wrap but here step is 2 not 4 */
1088 if( yb > 1 ) yb -= 2; /* idem */
1090 for( ch = 0; ch < 2; ch++ )
1092 context->block[16+4*ch+idx].mba = mba;
1093 context->block[16+4*ch+idx].mbb = mbb;
1094 context->block[16+4*ch+idx].bka = mba ? &mba->block[16+4*ch+block_idx_xy[xa][y]] : NULL;
1095 context->block[16+4*ch+idx].bkb = mbb ? &mbb->block[16+4*ch+block_idx_xy[x][yb]] : NULL;
1101 mb->context = context;
1105 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
1106 * to 0 (low score means set it to null)
1107 * Used in inter macroblock (luma and chroma)
1108 * luma: for a 8x8 block: if score < 4 -> null
1109 * for the complete mb: if score < 6 -> null
1110 * chroma: for the complete mb: if score < 7 -> null
1112 static int x264_mb_decimate_score( int *dct, int i_max )
1114 static const int i_ds_table[16] = { 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1117 int idx = i_max - 1;
1119 while( idx >= 0 && dct[idx] == 0 )
1128 if( abs( dct[idx--] ) > 1 )
1134 while( idx >= 0 && dct[idx] == 0 )
1139 i_score += i_ds_table[i_run];
1145 static void x264_mb_encode_4x4( x264_t *h, x264_macroblock_t *mb, int idx, int i_qscale )
1147 x264_mb_context_t *ctx = mb->context;
1149 uint8_t *p_src = ctx->p_img[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_img[0];
1150 int i_src = ctx->i_img[0];
1151 uint8_t *p_dst = ctx->p_fdec[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_fdec[0];
1152 int i_dst = ctx->i_fdec[0];
1155 int16_t dct4x4[4][4];
1157 /* we calculate diff */
1158 h->pixf.sub4x4( luma, p_src, i_src, p_dst, i_dst );
1160 /* calculate dct coeffs */
1161 h->dctf.dct4x4( dct4x4, luma );
1162 quant_4x4( dct4x4, i_qscale, 1 );
1164 scan_zigzag_4x4full( mb->block[idx].luma4x4, dct4x4 );
1166 /* output samples to fdec */
1167 dequant_4x4( dct4x4, i_qscale );
1168 h->dctf.idct4x4( luma, dct4x4 );
1170 /* put pixel to fdec */
1171 h->pixf.add4x4( p_dst, i_dst, luma );
1174 static void x264_mb_encode_i16x16( x264_t *h, x264_macroblock_t *mb, int i_qscale )
1176 x264_mb_context_t *ctx = mb->context;
1178 uint8_t *p_src = ctx->p_img[0];
1179 int i_src = ctx->i_img[0];
1180 uint8_t *p_dst = ctx->p_fdec[0];
1181 int i_dst = ctx->i_fdec[0];
1183 int16_t luma[16][4][4];
1184 int16_t dct4x4[16+1][4][4];
1188 /* calculate the diff */
1189 h->pixf.sub16x16( luma, p_src, i_src, p_dst, i_dst );
1191 /* calculate dct coeffs */
1192 for( i = 0; i < 16; i++ )
1194 h->dctf.dct4x4( dct4x4[i+1], luma[i] );
1197 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
1199 quant_4x4( dct4x4[1+i], i_qscale, 1 );
1200 scan_zigzag_4x4( mb->block[i].residual_ac, dct4x4[1+i] );
1203 h->dctf.dct4x4dc( dct4x4[0], dct4x4[0] );
1204 quant_4x4_dc( dct4x4[0], i_qscale, 1 );
1205 scan_zigzag_4x4full( mb->luma16x16_dc, dct4x4[0] );
1207 /* output samples to fdec */
1208 h->dctf.idct4x4dc( dct4x4[0], dct4x4[0] );
1209 dequant_4x4_dc( dct4x4[0], i_qscale ); /* XXX not inversed */
1211 /* calculate dct coeffs */
1212 for( i = 0; i < 16; i++ )
1214 dequant_4x4( dct4x4[1+i], i_qscale );
1217 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
1219 h->dctf.idct4x4( luma[i], dct4x4[i+1] );
1221 /* put pixels to fdec */
1222 h->pixf.add16x16( p_dst, i_dst, luma );
1225 static void x264_mb_encode_8x8( x264_t *h, x264_macroblock_t *mb, int b_inter, int i_qscale )
1227 x264_mb_context_t *ctx = mb->context;
1229 uint8_t *p_src, *p_dst;
1233 int i_decimate_score = 0;
1235 for( ch = 0; ch < 2; ch++ )
1237 int16_t chroma[4][4][4];
1238 int16_t dct2x2[2][2];
1239 int16_t dct4x4[4][4][4];
1241 p_src = ctx->p_img[1+ch];
1242 i_src = ctx->i_img[1+ch];
1243 p_dst = ctx->p_fdec[1+ch];
1244 i_dst = ctx->i_fdec[1+ch];
1246 /* calculate the diff */
1247 h->pixf.sub8x8( chroma, p_src, i_src, p_dst, i_dst );
1249 /* calculate dct coeffs */
1250 for( i = 0; i < 4; i++ )
1252 h->dctf.dct4x4( dct4x4[i], chroma[i] );
1255 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
1257 quant_4x4( dct4x4[i], i_qscale, 1 );
1258 scan_zigzag_4x4( mb->block[16+i+ch*4].residual_ac, dct4x4[i] );
1260 i_decimate_score += x264_mb_decimate_score( mb->block[16+i+ch*4].residual_ac, 15 );
1263 h->dctf.dct2x2dc( dct2x2, dct2x2 );
1264 quant_2x2_dc( dct2x2, i_qscale, 1 );
1265 scan_zigzag_2x2_dc( mb->chroma_dc[ch], dct2x2 );
1267 if( i_decimate_score < 7 && b_inter )
1269 /* Near null chroma 8x8 block so make it null (bits saving) */
1270 for( i = 0; i < 4; i++ )
1273 for( x = 0; x < 15; x++ )
1275 mb->block[16+i+ch*4].residual_ac[x] = 0;
1277 for( x = 0; x < 4; x++ )
1279 for( y = 0; y < 4; y++ )
1281 dct4x4[i][x][y] = 0;
1287 /* output samples to fdec */
1288 h->dctf.idct2x2dc( dct2x2, dct2x2 );
1289 dequant_2x2_dc( dct2x2, i_qscale ); /* XXX not inversed */
1291 /* calculate dct coeffs */
1292 for( i = 0; i < 4; i++ )
1294 dequant_4x4( dct4x4[i], i_qscale );
1297 dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
1299 h->dctf.idct4x4( chroma[i], dct4x4[i] );
1301 h->pixf.add8x8( p_dst, i_dst, chroma );
1305 static int x264_mb_pred_mode4x4_fix( int i_mode )
1307 if( i_mode == I_PRED_4x4_DC_LEFT || i_mode == I_PRED_4x4_DC_TOP || i_mode == I_PRED_4x4_DC_128 )
1309 return I_PRED_4x4_DC;
1313 static int x264_mb_pred_mode16x16_fix( int i_mode )
1315 if( i_mode == I_PRED_16x16_DC_LEFT || i_mode == I_PRED_16x16_DC_TOP || i_mode == I_PRED_16x16_DC_128 )
1317 return I_PRED_16x16_DC;
1321 static int x264_mb_pred_mode8x8_fix( int i_mode )
1323 if( i_mode == I_PRED_CHROMA_DC_LEFT || i_mode == I_PRED_CHROMA_DC_TOP || i_mode == I_PRED_CHROMA_DC_128 )
1325 return I_PRED_CHROMA_DC;
1332 /* conduct the analysis using this lamda and QP */
1336 /* Edge histogramme (only luma) */
1337 int i_edge_4x4[4][4][9]; /* mode 2 isn't calculated (DC) */
1338 int i_edge_16x16[4]; /* mode 2 isn't calculated (DC) */
1341 /* Luma part 16x16 and 4x4 modes stats */
1346 int i_predict4x4[4][4];
1352 /* II: Inter part */
1359 int i_mv_p16x8[2][2];
1363 int i_mv_p8x16[2][2];
1367 int i_sub_partition_p8x8[4];
1368 int i_mv_p8x8[4][4][2];
1370 } x264_mb_analysis_t;
1373 static const int i_qp0_cost_table[52] =
1375 1, 1, 1, 1, 1, 1, 1, 1,
1377 1, 1, 1, 1, 2, 2, 2, 2,
1378 3, 3, 3, 4, 4, 4, 5, 6,
1379 6, 7, 8, 9,10,11,13,14,
1380 16,18,20,23,25,29,32,36,
1381 40,45,51,57,64,72,81,91
1385 static void x264_macroblock_analyse_edge( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1387 uint8_t *p_img = mb->context->p_img[0];;
1388 int i_img = mb->context->i_img[0];
1394 #define FIX8( f ) ( (int)((f) * 256))
1395 /* init stats (16x16) */
1396 for( i = 0; i < 4; i++ )
1398 res->i_edge_16x16[i] = 0;
1401 for( y = 0; y < 4; y++ )
1403 for( x = 0; x < 4; x++ )
1405 /* init stats (4x4) */
1406 for( i = 0; i < 9; i++ )
1408 res->i_edge_4x4[y][x][i] = 0;
1411 /* FIXME real interval 0-4 except for border mb */
1412 for( dy = (y==0 ? 1:0); dy < (y==3?3:4); dy++ )
1414 for( dx = (x==0?1:0); dx < (x==3?3:4); dx++ )
1416 uint8_t *pix = &p_img[(y*4+dy)*i_img+(x+dx)];
1423 dgx = (pix[-1*i_img-1]+2*pix[-1*i_img+0]+pix[-1*i_img+1]) -
1424 (pix[ 1*i_img-1]+2*pix[ 1*i_img+0]+pix[ 1*i_img+1]);
1427 dgy = (pix[-1*i_img+1]+2*pix[ 0*i_img+1]+pix[ 1*i_img+1]) -
1428 (pix[-1*i_img-1]+2*pix[ 0*i_img-1]+pix[ 1*i_img-1]);
1430 /* XXX angle to test/verify */
1431 Ag = abs( dgx ) + abs( dgy );
1439 Ryx = ( dgy << 8 )/ dgx;
1442 if( abs(Ryx) >= FIX8(5.027339) )
1446 else if( abs(Ryx) <= FIX8(0.198912) )
1450 else if( Ryx > FIX8(0.198912) && Ryx <= FIX8(0.668179) )
1454 else if( Ryx > FIX8(0.668179) && Ryx <= FIX8(1.496606) )
1456 Dg = I_PRED_4x4_DDR;
1458 else if( Ryx > FIX8(1.496606) && Ryx <= FIX8(5.027339) )
1462 else if( Ryx > FIX8(-5.027339) && Ryx <= FIX8(-1.496606) )
1466 else if( Ryx > FIX8(-1.496606) && Ryx <= FIX8(-0.668179) )
1468 Dg = I_PRED_4x4_DDL;
1470 else if( Ryx > FIX8(-0.668179) && Ryx <= FIX8(-0.198912) )
1476 /* Should never occur */
1477 fprintf( stderr, "mmh bad edge dectection function\n" );
1480 res->i_edge_4x4[y][x][Dg] += Ag;
1482 if( abs(Ryx) > FIX8(2.414214) )
1484 Dg = I_PRED_16x16_V;
1486 else if( abs(Ryx) < FIX8(0.414214) )
1488 Dg = I_PRED_16x16_H;
1492 Dg = I_PRED_16x16_P;
1494 res->i_edge_16x16[Dg] += Ag;
1502 static void x264_macroblock_analyse_i16x16( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1504 uint8_t *p_dst = mb->context->p_fdec[0];
1505 uint8_t *p_src = mb->context->p_img[0];
1506 int i_dst = mb->context->i_fdec[0];
1507 int i_src = mb->context->i_img[0];
1511 int predict_mode[4];
1513 res->i_sad_i16x16 = -1;
1515 /* 16x16 prediction selection */
1516 predict_16x16_mode_available( mb, predict_mode, &i_max );
1517 for( i = 0; i < i_max; i++ )
1522 i_mode = predict_mode[i];
1524 /* we do the prediction */
1525 h->predict_16x16[i_mode]( p_dst, i_dst );
1527 /* we calculate the diff and get the square sum of the diff */
1528 i_sad = h->pixf.satd[PIXEL_16x16]( p_dst, i_dst, p_src, i_src ) +
1529 res->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix(i_mode) );
1530 /* if i_score is lower it is better */
1531 if( res->i_sad_i16x16 == -1 || res->i_sad_i16x16 > i_sad )
1533 res->i_predict16x16 = i_mode;
1534 res->i_sad_i16x16 = i_sad;
1539 static void x264_macroblock_analyse_i4x4( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1544 int predict_mode[9];
1546 uint8_t *p_dst = mb->context->p_fdec[0];
1547 uint8_t *p_src = mb->context->p_img[0];
1548 int i_dst = mb->context->i_fdec[0];
1549 int i_src = mb->context->i_img[0];
1551 res->i_sad_i4x4 = 0;
1553 /* 4x4 prediction selection */
1554 for( idx = 0; idx < 16; idx++ )
1563 i_pred_mode= predict_pred_intra4x4_mode( h, mb, idx );
1564 x = block_idx_x[idx];
1565 y = block_idx_y[idx];
1567 i_th = res->i_edge_4x4[y][x][0];
1568 if( i_th < res->i_edge_4x4[y][x][1] ) i_th = res->i_edge_4x4[y][x][1];
1569 if( i_th < res->i_edge_4x4[y][x][3] ) i_th = res->i_edge_4x4[y][x][3];
1570 if( i_th < res->i_edge_4x4[y][x][4] ) i_th = res->i_edge_4x4[y][x][4];
1571 if( i_th < res->i_edge_4x4[y][x][5] ) i_th = res->i_edge_4x4[y][x][5];
1572 if( i_th < res->i_edge_4x4[y][x][6] ) i_th = res->i_edge_4x4[y][x][6];
1573 if( i_th < res->i_edge_4x4[y][x][7] ) i_th = res->i_edge_4x4[y][x][7];
1574 if( i_th < res->i_edge_4x4[y][x][8] ) i_th = res->i_edge_4x4[y][x][8];
1577 res->i_edge_4x4[y][x][2] = i_th;
1579 p_src_by = p_src + 4 * x + 4 * y * i_src;
1580 p_dst_by = p_dst + 4 * x + 4 * y * i_dst;
1583 predict_4x4_mode_available( mb, idx, predict_mode, &i_max );
1584 for( i = 0; i < i_max; i++ )
1590 i_mode = predict_mode[i];
1591 i_fmode = x264_mb_pred_mode4x4_fix( i_mode );
1593 if( res->i_edge_4x4[y][x][i_fmode] < i_th )
1598 /* we do the prediction */
1599 h->predict_4x4[i_mode]( p_dst_by, i_dst );
1601 /* we calculate diff and get the square sum of the diff */
1602 i_sad = h->pixf.satd[PIXEL_4x4]( p_dst_by, i_dst, p_src_by, i_src );
1604 i_sad += res->i_lambda * (i_pred_mode == i_fmode ? 1 : 4);
1606 /* if i_score is lower it is better */
1607 if( i_best == -1 || i_best > i_sad )
1609 res->i_predict4x4[x][y] = i_mode;
1613 res->i_sad_i4x4 += i_best;
1615 /* we need to encode this mb now (for next ones) */
1616 mb->block[idx].i_intra4x4_pred_mode = res->i_predict4x4[x][y];
1617 h->predict_4x4[res->i_predict4x4[x][y]]( p_dst_by, i_dst );
1618 x264_mb_encode_4x4( h, mb, idx, res->i_qp );
1620 res->i_sad_i4x4 += res->i_lambda * 24; /* from JVT (SATD0) */
1623 static void x264_macroblock_analyse_intra_chroma( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1628 int predict_mode[9];
1630 uint8_t *p_dstc[2], *p_srcc[2];
1631 int i_dstc[2], i_srcc[2];
1633 /* 8x8 prediction selection for chroma */
1634 p_dstc[0] = mb->context->p_fdec[1]; i_dstc[0] = mb->context->i_fdec[1];
1635 p_dstc[1] = mb->context->p_fdec[2]; i_dstc[1] = mb->context->i_fdec[2];
1636 p_srcc[0] = mb->context->p_img[1]; i_srcc[0] = mb->context->i_img[1];
1637 p_srcc[1] = mb->context->p_img[2]; i_srcc[1] = mb->context->i_img[2];
1639 predict_8x8_mode_available( mb, predict_mode, &i_max );
1640 res->i_sad_i8x8 = -1;
1641 for( i = 0; i < i_max; i++ )
1646 i_mode = predict_mode[i];
1648 /* we do the prediction */
1649 h->predict_8x8[i_mode]( p_dstc[0], i_dstc[0] );
1650 h->predict_8x8[i_mode]( p_dstc[1], i_dstc[1] );
1652 /* we calculate the cost */
1653 i_sad = h->pixf.satd[PIXEL_8x8]( p_dstc[0], i_dstc[0], p_srcc[0], i_srcc[0] ) +
1654 h->pixf.satd[PIXEL_8x8]( p_dstc[1], i_dstc[1], p_srcc[1], i_srcc[1] ) +
1655 res->i_lambda * bs_size_ue( x264_mb_pred_mode8x8_fix(i_mode) );
1657 /* if i_score is lower it is better */
1658 if( res->i_sad_i8x8 == -1 || res->i_sad_i8x8 > i_sad )
1660 res->i_predict8x8 = i_mode;
1661 res->i_sad_i8x8 = i_sad;
1666 static void x264_macroblock_analyse_inter_p8x8( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1668 x264_mb_context_t *ctx = mb->context;
1669 int i_ref = res->i_ref_p16x16;
1671 uint8_t *p_fref = ctx->p_fref0[i_ref][0];
1672 int i_fref = ctx->i_fref0[i_ref][0];
1673 uint8_t *p_img = ctx->p_img[0];
1674 int i_img = ctx->i_img[0];
1678 res->i_ref_p8x8 = i_ref;
1679 res->i_sad_p8x8 = 0;
1680 mb->i_partition = D_8x8;
1682 for( i = 0; i < 4; i++ )
1684 static const int test8x8_mode[4] = { D_L0_8x8, D_L0_8x4, D_L0_4x8, D_L0_4x4 };
1685 static const int test8x8_pix[4] = { PIXEL_8x8, PIXEL_8x4, PIXEL_4x8, PIXEL_4x4 };
1686 static const int test8x8_pos_x[4][4] = { { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 0, 0 }, { 0, 4, 0, 4 } };
1687 static const int test8x8_pos_y[4][4] = { { 0, 0, 0, 0 }, { 0, 4, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 4, 4 } };
1701 /* FIXME as it's tooooooo slow test only 8x8 */
1702 //for( i_test = 0; i_test < 4; i_test++ )
1708 mb->i_sub_partition[i] = test8x8_mode[i_test];
1710 for( i_sub = 0; i_sub < mb_sub_partition_count( test8x8_mode[i_test] ); i_sub++ )
1712 x264_macroblock_predict_mv( mb, 0, i, i_sub, &mvp[i_sub][0], &mvp[i_sub][1] );
1713 mv[i_sub][0] = mvp[i_sub][0];
1714 mv[i_sub][1] = mvp[i_sub][1];
1716 i_satd += x264_me_p_umhexagons( h,
1717 &p_fref[(y+test8x8_pos_y[i_test][i_sub])*i_fref +x+test8x8_pos_x[i_test][i_sub]], i_fref,
1718 &p_img[(y+test8x8_pos_y[i_test][i_sub])*i_img +x+test8x8_pos_x[i_test][i_sub]], i_img,
1719 test8x8_pix[i_test],
1721 &mv[i_sub][0], &mv[i_sub][1] );
1722 i_satd += res->i_lambda * ( bs_size_se( mv[i_sub][0] - mvp[i_sub][0] ) +
1723 bs_size_se( mv[i_sub][1] - mvp[i_sub][1] ) );
1726 switch( test8x8_mode[i_test] )
1729 i_satd += res->i_lambda * bs_size_ue( 0 );
1732 i_satd += res->i_lambda * bs_size_ue( 1 );
1735 i_satd += res->i_lambda * bs_size_ue( 2 );
1738 i_satd += res->i_lambda * bs_size_ue( 3 );
1741 fprintf( stderr, "internal error (invalid sub type)\n" );
1745 if( i_b_satd == -1 || i_b_satd > i_satd )
1748 res->i_sub_partition_p8x8[i] = test8x8_mode[i_test];;
1749 for( i_sub = 0; i_sub < mb_sub_partition_count( test8x8_mode[i_test] ); i_sub++ )
1751 res->i_mv_p8x8[i][i_sub][0] = mv[i_sub][0];
1752 res->i_mv_p8x8[i][i_sub][1] = mv[i_sub][1];
1757 res->i_sad_p8x8 += i_b_satd;
1758 /* needed for the next block */
1759 mb->i_sub_partition[i] = res->i_sub_partition_p8x8[i];
1760 for( i_sub = 0; i_sub < mb_sub_partition_count( res->i_sub_partition_p8x8[i] ); i_sub++ )
1762 x264_macroblock_partition_set( mb, 0, i, i_sub,
1764 res->i_mv_p8x8[i][i_sub][0],
1765 res->i_mv_p8x8[i][i_sub][1] );
1769 res->i_sad_p8x8 += 4*res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1772 static void x264_macroblock_analyse_inter( x264_t *h, x264_macroblock_t *mb, x264_mb_analysis_t *res )
1774 x264_mb_context_t *ctx = mb->context;
1779 res->i_sad_p16x16 = -1;
1780 res->i_sad_p16x8 = -1;
1781 res->i_sad_p8x16 = -1;
1782 res->i_sad_p8x8 = -1;
1784 /* 16x16 Search on all ref frame */
1785 mb->i_type = P_L0; /* beurk fix that */
1786 mb->i_partition = D_16x16;
1787 for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
1793 /* Get the predicted MV */
1794 x264_macroblock_partition_set( mb, 0, 0, 0, i_ref, 0, 0 );
1795 x264_macroblock_predict_mv( mb, 0, 0, 0, &mvxp, &mvyp );
1797 mvx = mvxp; mvy = mvyp;
1798 i_sad = x264_me_p_umhexagons( h, ctx->p_fref0[i_ref][0], ctx->i_fref0[i_ref][0],
1799 ctx->p_img[0], ctx->i_img[0],
1800 PIXEL_16x16, res->i_lambda, &mvx, &mvy );
1801 if( mvx == mvxp && mvy == mvyp )
1803 i_sad -= 16 * res->i_lambda;
1807 i_sad += res->i_lambda * (bs_size_se(mvx - mvxp) + bs_size_se(mvy - mvyp));
1809 i_sad += res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1811 if( res->i_sad_p16x16 == -1 || i_sad < res->i_sad_p16x16 )
1813 res->i_sad_p16x16 = i_sad;
1814 res->i_ref_p16x16 = i_ref;
1815 res->i_mv_p16x16[0] = mvx;
1816 res->i_mv_p16x16[1] = mvy;
1820 /* Now do the rafinement (using the ref found in 16x16 mode) */
1821 i_ref = res->i_ref_p16x16;
1822 x264_macroblock_partition_set( mb, 0, 0, 0, i_ref, 0, 0 );
1825 /* XXX we test i_predict16x16 to try shape with the same direction than edge
1826 * We should do a better algo of course (the one with edge dectection to be used
1827 * for intra mode too)
1830 if( res->i_predict16x16 != I_PRED_16x16_V )
1834 mb->i_partition = D_16x8;
1836 res->i_ref_p16x8 = i_ref;
1837 x264_macroblock_predict_mv( mb, 0, 0, 0, &mvp[0][0], &mvp[0][1] );
1838 x264_macroblock_predict_mv( mb, 0, 1, 0, &mvp[1][0], &mvp[1][1] );
1840 res->i_mv_p16x8[0][0] = mvp[0][0]; res->i_mv_p16x8[0][1] = mvp[0][1];
1841 res->i_mv_p16x8[1][0] = mvp[1][0]; res->i_mv_p16x8[1][1] = mvp[1][1];
1843 res->i_sad_p16x8 = x264_me_p_umhexagons( h,
1844 ctx->p_fref0[i_ref][0], ctx->i_fref0[i_ref][0],
1845 ctx->p_img[0], ctx->i_img[0],
1848 &res->i_mv_p16x8[0][0], &res->i_mv_p16x8[0][1] ) +
1849 x264_me_p_umhexagons( h,
1850 &ctx->p_fref0[i_ref][0][8*ctx->i_fref0[i_ref][0]], ctx->i_fref0[i_ref][0],
1851 &ctx->p_img[0][8*ctx->i_img[0]], ctx->i_img[0],
1854 &res->i_mv_p16x8[1][0], &res->i_mv_p16x8[1][1] );
1856 res->i_sad_p16x8 += res->i_lambda * ( bs_size_se(res->i_mv_p16x8[0][0] - mvp[0][0] ) +
1857 bs_size_se(res->i_mv_p16x8[0][1] - mvp[0][1] ) +
1858 bs_size_se(res->i_mv_p16x8[1][0] - mvp[1][0] ) +
1859 bs_size_se(res->i_mv_p16x8[1][1] - mvp[1][1] ) );
1861 res->i_sad_p16x8 += 2*res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1865 if( res->i_predict16x16 != I_PRED_16x16_H )
1869 mb->i_partition = D_8x16;
1871 res->i_ref_p8x16 = i_ref;
1872 x264_macroblock_predict_mv( mb, 0, 0, 0, &mvp[0][0], &mvp[0][1] );
1873 x264_macroblock_predict_mv( mb, 0, 1, 0, &mvp[1][0], &mvp[1][1] );
1875 res->i_mv_p8x16[0][0] = mvp[0][0]; res->i_mv_p8x16[0][1] = mvp[0][1];
1876 res->i_mv_p8x16[1][0] = mvp[1][0]; res->i_mv_p8x16[1][1] = mvp[1][1];
1878 res->i_sad_p8x16 = x264_me_p_umhexagons( h,
1879 ctx->p_fref0[i_ref][0], ctx->i_fref0[i_ref][0],
1880 ctx->p_img[0], ctx->i_img[0],
1883 &res->i_mv_p8x16[0][0], &res->i_mv_p8x16[0][1] ) +
1884 x264_me_p_umhexagons( h,
1885 &ctx->p_fref0[i_ref][0][8], ctx->i_fref0[i_ref][0],
1886 &ctx->p_img[0][8], ctx->i_img[0],
1889 &res->i_mv_p8x16[1][0], &res->i_mv_p8x16[1][1] );
1891 res->i_sad_p8x16 += res->i_lambda * ( bs_size_se(res->i_mv_p8x16[0][0] - mvp[0][0] ) +
1892 bs_size_se(res->i_mv_p8x16[0][1] - mvp[0][1] ) +
1893 bs_size_se(res->i_mv_p8x16[1][0] - mvp[1][0] ) +
1894 bs_size_se(res->i_mv_p8x16[1][1] - mvp[1][1] ) );
1895 res->i_sad_p8x16 += 2*res->i_lambda * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
1900 // x264_macroblock_analyse_inter_p8x8( h,mb, res );
1904 /*****************************************************************************
1905 * x264_macroblock_analyse:
1906 *****************************************************************************/
1907 void x264_macroblock_analyse( x264_t *h, x264_macroblock_t *mb, int i_slice_type )
1909 x264_mb_analysis_t analysis;
1916 analysis.i_qp = x264_clip3( h->pps.i_pic_init_qp + h->sh.i_qp_delta + mb->i_qp_delta, 0, 51 );
1917 analysis.i_lambda = i_qp0_cost_table[analysis.i_qp];
1919 x264_macroblock_analyse_edge( h, mb, &analysis );
1921 /*--------------------------- Do the analysis ---------------------------*/
1922 x264_macroblock_analyse_i16x16( h, mb, &analysis );
1923 x264_macroblock_analyse_i4x4 ( h, mb, &analysis );
1924 if( i_slice_type == SLICE_TYPE_P )
1926 x264_macroblock_analyse_inter( h, mb, &analysis );
1929 /*-------------------- Chose the macroblock mode ------------------------*/
1930 /* Do the MB decision */
1931 if( i_slice_type == SLICE_TYPE_I )
1933 mb->i_type = analysis.i_sad_i4x4 < analysis.i_sad_i16x16 ? I_4x4 : I_16x16;
1938 #define BEST_TYPE( type, partition, satd ) \
1939 if( satd != -1 && satd < i_satd ) \
1942 mb->i_type = type; \
1943 mb->i_partition = partition; \
1946 i_satd = analysis.i_sad_i4x4;
1949 BEST_TYPE( I_16x16, -1, analysis.i_sad_i16x16 );
1950 BEST_TYPE( P_L0, D_16x16, analysis.i_sad_p16x16 );
1951 BEST_TYPE( P_L0, D_16x8 , analysis.i_sad_p16x8 );
1952 BEST_TYPE( P_L0, D_8x16 , analysis.i_sad_p8x16 );
1953 BEST_TYPE( P_8x8, D_8x8 , analysis.i_sad_p8x8 );
1958 if( IS_INTRA( mb->i_type ) )
1960 x264_macroblock_analyse_intra_chroma( h, mb, &analysis );
1963 /*-------------------- Update MB from the analysis ----------------------*/
1964 switch( mb->i_type )
1967 for( i = 0; i < 16; i++ )
1969 mb->block[i].i_intra4x4_pred_mode = analysis.i_predict4x4[block_idx_x[i]][block_idx_y[i]];
1971 mb->i_chroma_pred_mode = analysis.i_predict8x8;
1974 mb->i_intra16x16_pred_mode = analysis.i_predict16x16;
1975 mb->i_chroma_pred_mode = analysis.i_predict8x8;
1978 switch( mb->i_partition )
1981 x264_macroblock_partition_set( mb, 0, 0, 0,
1982 analysis.i_ref_p16x16, analysis.i_mv_p16x16[0], analysis.i_mv_p16x16[1] );
1985 x264_macroblock_partition_set( mb, 0, 0, 0,
1986 analysis.i_ref_p16x8, analysis.i_mv_p16x8[0][0], analysis.i_mv_p16x8[0][1] );
1987 x264_macroblock_partition_set( mb, 0, 1, 0,
1988 analysis.i_ref_p16x8, analysis.i_mv_p16x8[1][0], analysis.i_mv_p16x8[1][1] );
1991 x264_macroblock_partition_set( mb, 0, 0, 0,
1992 analysis.i_ref_p8x16, analysis.i_mv_p8x16[0][0], analysis.i_mv_p8x16[0][1] );
1993 x264_macroblock_partition_set( mb, 0, 1, 0,
1994 analysis.i_ref_p8x16, analysis.i_mv_p8x16[1][0], analysis.i_mv_p8x16[1][1] );
1997 fprintf( stderr, "internal error\n" );
2003 for( i = 0; i < 4; i++ )
2007 mb->i_sub_partition[i] = analysis.i_sub_partition_p8x8[i];
2008 for( i_sub = 0; i_sub < mb_sub_partition_count( mb->i_sub_partition[i] ); i_sub++ )
2010 x264_macroblock_partition_set( mb, 0, i, i_sub,
2011 analysis.i_ref_p8x8,
2012 analysis.i_mv_p8x8[i][i_sub][0],
2013 analysis.i_mv_p8x8[i][i_sub][1] );
2019 fprintf( stderr, "internal error\n" );
2026 /*****************************************************************************
2027 * x264_macroblock_encode:
2028 *****************************************************************************/
2029 void x264_macroblock_encode( x264_t *h, x264_macroblock_t *mb )
2035 /* quantification scale */
2036 i_qscale = x264_clip3( h->pps.i_pic_init_qp + h->sh.i_qp_delta + mb->i_qp_delta, 0, 51 );
2038 if( mb->i_type == I_16x16 )
2040 /* do the right prediction */
2041 h->predict_16x16[mb->i_intra16x16_pred_mode]( mb->context->p_fdec[0], mb->context->i_fdec[0] );
2043 /* encode the 16x16 macroblock */
2044 x264_mb_encode_i16x16( h, mb, i_qscale );
2046 /* fix the pred mode value */
2047 mb->i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix( mb->i_intra16x16_pred_mode );
2049 else if( mb->i_type == I_4x4 )
2051 for( i = 0; i < 16; i++ )
2055 /* Do the right prediction */
2056 p_dst_by = mb->context->p_fdec[0] + 4 * block_idx_x[i] + 4 * block_idx_y[i] * mb->context->i_fdec[0];
2057 h->predict_4x4[mb->block[i].i_intra4x4_pred_mode]( p_dst_by, mb->context->i_fdec[0] );
2059 /* encode one 4x4 block */
2060 x264_mb_encode_4x4( h, mb, i, i_qscale );
2062 /* fix the pred mode value */
2063 mb->block[i].i_intra4x4_pred_mode = x264_mb_pred_mode4x4_fix( mb->block[i].i_intra4x4_pred_mode );
2068 x264_mb_context_t *ctx = mb->context;
2069 int16_t dct4x4[16][4][4];
2071 int i8x8, i4x4, idx;
2072 int i_decimate_mb = 0;
2074 /* Motion compensation */
2075 x264_macroblock_mc( h, mb, 1 );
2077 for( i8x8 = 0; i8x8 < 4; i8x8++ )
2082 /* encode one 4x4 block */
2084 for( i4x4 = 0; i4x4 < 4; i4x4++ )
2086 uint8_t *p_src, *p_dst;
2088 idx = i8x8 * 4 + i4x4;
2090 p_src = ctx->p_img[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_img[0];
2091 p_dst = ctx->p_fdec[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_fdec[0];
2093 /* we calculate diff */
2094 h->pixf.sub4x4( luma, p_src, ctx->i_img[0],p_dst, ctx->i_fdec[0] );
2096 /* calculate dct coeffs */
2097 h->dctf.dct4x4( dct4x4[idx], luma );
2098 quant_4x4( dct4x4[idx], i_qscale, 1 );
2100 scan_zigzag_4x4full( mb->block[idx].luma4x4, dct4x4[idx] );
2101 i_decimate_8x8 += x264_mb_decimate_score( mb->block[idx].luma4x4, 16 );
2104 /* decimate this 8x8 block */
2105 i_decimate_mb += i_decimate_8x8;
2106 if( i_decimate_8x8 < 4 )
2108 for( i4x4 = 0; i4x4 < 4; i4x4++ )
2111 idx = i8x8 * 4 + i4x4;
2112 for( i = 0; i < 16; i++ )
2114 mb->block[idx].luma4x4[i] = 0;
2116 for( x = 0; x < 4; x++ )
2118 for( y = 0; y < 4; y++ )
2120 dct4x4[idx][x][y] = 0;
2127 if( i_decimate_mb < 6 )
2129 for( i8x8 = 0; i8x8 < 4; i8x8++ )
2131 for( i4x4 = 0; i4x4 < 4; i4x4++ )
2133 for( i = 0; i < 16; i++ )
2135 mb->block[i8x8 * 4 + i4x4].luma4x4[i] = 0;
2142 for( i8x8 = 0; i8x8 < 4; i8x8++ )
2145 /* TODO we could avoid it if we had decimate this 8x8 block */
2146 /* output samples to fdec */
2147 for( i4x4 = 0; i4x4 < 4; i4x4++ )
2151 idx = i8x8 * 4 + i4x4;
2153 dequant_4x4( dct4x4[idx], i_qscale );
2154 h->dctf.idct4x4( luma, dct4x4[idx] );
2156 /* put pixel to fdec */
2157 p_dst = ctx->p_fdec[0] + 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * ctx->i_fdec[0];
2158 h->pixf.add4x4( p_dst, ctx->i_fdec[0], luma );
2165 i_qscale = i_chroma_qp_table[x264_clip3( i_qscale + h->pps.i_chroma_qp_index_offset, 0, 51 )];
2166 if( IS_INTRA( mb->i_type ) )
2168 /* do the right prediction */
2169 h->predict_8x8[mb->i_chroma_pred_mode]( mb->context->p_fdec[1], mb->context->i_fdec[1] );
2170 h->predict_8x8[mb->i_chroma_pred_mode]( mb->context->p_fdec[2], mb->context->i_fdec[2] );
2174 /* Motion compensation */
2175 x264_macroblock_mc( h, mb, 0 );
2177 /* encode the 8x8 blocks */
2178 x264_mb_encode_8x8( h, mb, !IS_INTRA( mb->i_type ), i_qscale );
2180 /* fix the pred mode value */
2181 if( IS_INTRA( mb->i_type ) )
2183 mb->i_chroma_pred_mode = x264_mb_pred_mode8x8_fix( mb->i_chroma_pred_mode );
2186 /* Calculate the Luma/Chroma patern and non_zero_count */
2187 if( mb->i_type == I_16x16 )
2189 mb->i_cbp_luma = 0x00;
2190 for( i = 0; i < 16; i++ )
2192 mb->block[i].i_non_zero_count = array_non_zero_count( mb->block[i].residual_ac, 15 );
2193 if( mb->block[i].i_non_zero_count > 0 )
2195 mb->i_cbp_luma = 0x0f;
2201 mb->i_cbp_luma = 0x00;
2202 for( i = 0; i < 16; i++ )
2204 mb->block[i].i_non_zero_count = array_non_zero_count( mb->block[i].luma4x4, 16 );
2205 if( mb->block[i].i_non_zero_count > 0 )
2207 mb->i_cbp_luma |= 1 << (i/4);
2212 /* Calculate the chroma patern */
2213 mb->i_cbp_chroma = 0x00;
2214 for( i = 0; i < 8; i++ )
2216 mb->block[16+i].i_non_zero_count = array_non_zero_count( mb->block[16+i].residual_ac, 15 );
2217 if( mb->block[16+i].i_non_zero_count > 0 )
2219 mb->i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
2222 if( mb->i_cbp_chroma == 0x00 &&
2223 ( array_non_zero_count( mb->chroma_dc[0], 4 ) > 0 || array_non_zero_count( mb->chroma_dc[1], 4 ) ) > 0 )
2225 mb->i_cbp_chroma = 0x01; /* dc only */
2229 * XXX: in the me perhaps we should take x264_macroblock_predict_mv_pskip into account
2230 * (if multiple mv give same result)*/
2231 if( mb->i_type == P_L0 && mb->i_partition == D_16x16 &&
2232 mb->i_cbp_luma == 0x00 && mb->i_cbp_chroma == 0x00 )
2237 x264_macroblock_partition_get( mb, 0, 0, 0, &i_ref, &mvx, &mvy );
2243 x264_macroblock_predict_mv_pskip( mb, &mvxp, &mvyp );
2244 if( mvxp == mvx && mvyp == mvy )
2246 mb->i_type = P_SKIP;
2253 #define BLOCK_INDEX_CHROMA_DC (-1)
2254 #define BLOCK_INDEX_LUMA_DC (-2)
2256 /****************************************************************************
2257 * block_residual_write:
2258 ****************************************************************************/
2259 static void block_residual_write( x264_t *h, bs_t *s, x264_macroblock_t *mb, int i_idx, int *l, int i_count )
2261 int level[16], run[16];
2262 int i_total, i_trailing;
2265 unsigned int i_sign;
2269 int i_suffix_length;
2271 /* first find i_last */
2272 i_last = i_count - 1;
2273 while( i_last >= 0 && l[i_last] == 0 )
2288 /* level and run and total */
2289 while( i_last >= 0 )
2291 level[idx] = l[i_last--];
2294 while( i_last >= 0 && l[i_last] == 0 )
2301 i_total_zero += run[idx];
2303 if( b_trailing && abs( level[idx] ) == 1 && i_trailing < 3 )
2306 if( level[idx] < 0 )
2322 /* total/trailing */
2323 if( i_idx == BLOCK_INDEX_CHROMA_DC )
2325 bs_write_vlc( s, x264_coeff_token[4][i_total][i_trailing] );
2329 /* predict_non_zero_code return 0 <-> (16+16+1)>>1 = 16 */
2330 static const int ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3 };
2333 if( i_idx == BLOCK_INDEX_LUMA_DC )
2335 nC = predict_non_zero_code( h, mb, 0 );
2339 nC = predict_non_zero_code( h, mb, i_idx );
2342 bs_write_vlc( s, x264_coeff_token[ct_index[nC]][i_total][i_trailing] );
2350 i_suffix_length = i_total > 10 && i_trailing < 3 ? 1 : 0;
2351 if( i_trailing > 0 )
2353 bs_write( s, i_trailing, i_sign );
2355 for( i = i_trailing; i < i_total; i++ )
2359 /* calculate level code */
2362 i_level_code = -2*level[i] - 1;
2364 else /* if( level[i] > 0 ) */
2366 i_level_code = 2 * level[i] - 2;
2368 if( i == i_trailing && i_trailing < 3 )
2370 i_level_code -=2; /* as level[i] can't be 1 for the first one if i_trailing < 3 */
2373 if( ( i_level_code >> i_suffix_length ) < 14 )
2375 bs_write_vlc( s, x264_level_prefix[i_level_code >> i_suffix_length] );
2376 if( i_suffix_length > 0 )
2378 bs_write( s, i_suffix_length, i_level_code );
2381 else if( i_suffix_length == 0 && i_level_code < 30 )
2383 bs_write_vlc( s, x264_level_prefix[14] );
2384 bs_write( s, 4, i_level_code - 14 );
2386 else if( i_suffix_length > 0 && ( i_level_code >> i_suffix_length ) == 14 )
2388 bs_write_vlc( s, x264_level_prefix[14] );
2389 bs_write( s, i_suffix_length, i_level_code );
2393 bs_write_vlc( s, x264_level_prefix[15] );
2394 i_level_code -= 15 << i_suffix_length;
2395 if( i_suffix_length == 0 )
2400 if( i_level_code >= ( 1 << 12 ) || i_level_code < 0 )
2402 fprintf( stderr, "OVERFLOW levelcode=%d\n", i_level_code );
2405 bs_write( s, 12, i_level_code ); /* check overflow ?? */
2408 if( i_suffix_length == 0 )
2412 if( abs( level[i] ) > ( 3 << ( i_suffix_length - 1 ) ) && i_suffix_length < 6 )
2418 if( i_total < i_count )
2420 if( i_idx == BLOCK_INDEX_CHROMA_DC )
2422 bs_write_vlc( s, x264_total_zeros_dc[i_total-1][i_total_zero] );
2426 bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] );
2430 for( i = 0, i_zero_left = i_total_zero; i < i_total - 1; i++ )
2434 if( i_zero_left <= 0 )
2439 i_zl = X264_MIN( i_zero_left - 1, 6 );
2441 bs_write_vlc( s, x264_run_before[i_zl][run[i]] );
2443 i_zero_left -= run[i];
2450 /*****************************************************************************
2451 * x264_macroblock_write:
2452 *****************************************************************************/
2453 void x264_macroblock_write( x264_t *h, bs_t *s, int i_slice_type, x264_macroblock_t *mb )
2458 /* int b_sub_ref1 = 1; */
2460 switch( i_slice_type )
2469 i_mb_i_offset = 23 + 5;
2472 fprintf( stderr, "internal error or slice unsupported\n" );
2476 /* PCM special block type UNTESTED */
2477 if( mb->i_type == I_PCM )
2479 bs_write_ue( s, i_mb_i_offset + 25 ); /* I_PCM */
2482 for( i = 0; i < 16*16; i++ )
2484 bs_write( s, 8, h->picture->plane[0][mb->i_mb_y * 16 * h->picture->i_stride[0] + mb->i_mb_x * 16+i] );
2487 for( i = 0; i < 8*8; i++ )
2489 bs_write( s, 8, h->picture->plane[1][mb->i_mb_y * 8 * h->picture->i_stride[1] + mb->i_mb_x * 8+i] );
2492 for( i = 0; i < 8*8; i++ )
2494 bs_write( s, 8, h->picture->plane[2][mb->i_mb_y * 8 * h->picture->i_stride[2] + mb->i_mb_x * 8+i] );
2497 for( i = 0; i < 16 + 8; i++ )
2500 mb->block[i].i_non_zero_count = 16;
2505 if( mb->i_type == I_4x4 )
2507 bs_write_ue( s, i_mb_i_offset + 0 ); /* I_4x4 */
2509 else if( mb->i_type == I_16x16 )
2511 int i_type = 1 + mb->i_intra16x16_pred_mode + mb->i_cbp_chroma * 4 + ( mb->i_cbp_luma == 0 ? 0 : 12 );
2513 bs_write_ue( s, i_mb_i_offset + i_type );
2515 else if( mb->i_type == P_L0 )
2517 if( mb->i_partition == D_16x16 )
2519 bs_write_ue( s, 0 );
2521 else if( mb->i_partition == D_16x8 )
2523 bs_write_ue( s, 1 );
2525 else if( mb->i_partition == D_8x16 )
2527 bs_write_ue( s, 2 );
2530 else if( mb->i_type == P_8x8 )
2532 if( mb->partition[0][0].i_ref[0] == 0 &&
2533 mb->partition[0][2].i_ref[0] == 0 &&
2534 mb->partition[2][0].i_ref[0] == 0 &&
2535 mb->partition[2][2].i_ref[0] == 0 )
2538 bs_write_ue( s, 4 ); /* P_8x8ref0 */
2543 bs_write_ue( s, 3 );
2551 if( IS_INTRA( mb->i_type ) )
2554 if( mb->i_type == I_4x4 )
2556 for( i = 0; i < 16; i++ )
2558 int i_predicted_mode = predict_pred_intra4x4_mode( h, mb, i );
2559 int i_mode = mb->block[i].i_intra4x4_pred_mode;
2561 if( i_predicted_mode == i_mode)
2563 bs_write( s, 1, 1 ); /* b_prev_intra4x4_pred_mode */
2567 bs_write( s, 1, 0 ); /* b_prev_intra4x4_pred_mode */
2568 if( i_mode < i_predicted_mode )
2570 bs_write( s, 3, i_mode );
2574 bs_write( s, 3, i_mode - 1 );
2579 bs_write_ue( s, mb->i_chroma_pred_mode );
2581 else if( mb->i_type == P_8x8 )
2584 for( i = 0; i < 4; i++ )
2586 switch( mb->i_sub_partition[i] )
2589 bs_write_ue( s, 0 );
2592 bs_write_ue( s, 1 );
2595 bs_write_ue( s, 2 );
2598 bs_write_ue( s, 3 );
2603 if( h->sh.i_num_ref_idx_l0_active > 1 && b_sub_ref0 )
2605 for( i = 0; i < 4; i++ )
2608 x264_macroblock_partition_get( mb, 0, i, 0, &i_ref, NULL, NULL );
2610 bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, i_ref );
2613 for( i = 0; i < 4; i++ )
2616 for( i_part = 0; i_part < mb_sub_partition_count( mb->i_sub_partition[i] ); i_part++ )
2621 x264_macroblock_partition_get( mb, 0, i, i_part, NULL, &mvx, &mvy );
2622 x264_macroblock_predict_mv( mb, 0, i, i_part, &mvxp, &mvyp );
2624 bs_write_se( s, mvx - mvxp );
2625 bs_write_se( s, mvy - mvyp);
2629 else if( mb->i_type == B_8x8 )
2631 /* TODO for B-frame (merge it with P_8x8 ?)*/
2633 else if( mb->i_type != B_DIRECT )
2635 /* FIXME -> invalid for B frame */
2638 int i_part = 1 + ( mb->i_partition != D_16x16 ? 1 : 0 );
2640 if( h->sh.i_num_ref_idx_l0_active > 1 )
2642 for( i = 0; i < i_part; i++ )
2644 if( mb->i_type == P_L0 ) /* fixme B-frame */
2647 x264_macroblock_partition_get( mb, 0, i, 0, &i_ref, NULL, NULL );
2648 bs_write_te( s, h->sh.i_num_ref_idx_l0_active - 1, i_ref ); /* -1 is correct ? */
2652 if( h->sh.i_num_ref_idx_l1_active > 1 )
2654 for( i = 0; i < i_part; i++ )
2656 /* ref idx part L1 TODO when needed */
2660 for( i = 0; i < i_part; i++ )
2662 if( mb->i_type == P_L0 )
2667 x264_macroblock_partition_get( mb, 0, i, 0, NULL, &mvx, &mvy );
2668 x264_macroblock_predict_mv( mb, 0, i, 0, &mvxp, &mvyp );
2670 bs_write_se( s, mvx - mvxp );
2671 bs_write_se( s, mvy - mvyp);
2674 /* Same for L1 for B frame */
2677 if( mb->i_type != I_16x16 )
2679 if( mb->i_type == I_4x4 )
2681 bs_write_ue( s, intra4x4_cbp_to_golomb[( mb->i_cbp_chroma << 4 )|mb->i_cbp_luma] );
2685 bs_write_ue( s, inter_cbp_to_golomb[( mb->i_cbp_chroma << 4 )|mb->i_cbp_luma] );
2689 if( mb->i_cbp_luma > 0 || mb->i_cbp_chroma > 0 || mb->i_type == I_16x16 )
2691 bs_write_se( s, mb->i_qp_delta );
2693 /* write residual */
2694 if( mb->i_type == I_16x16 )
2697 block_residual_write( h, s, mb, BLOCK_INDEX_LUMA_DC , mb->luma16x16_dc, 16 );
2699 if( mb->i_cbp_luma != 0 )
2702 for( i = 0; i < 16; i++ )
2704 block_residual_write( h, s, mb, i, mb->block[i].residual_ac, 15 );
2710 for( i = 0; i < 16; i++ )
2712 if( mb->i_cbp_luma & ( 1 << ( i / 4 ) ) )
2714 block_residual_write( h, s, mb, i, mb->block[i].luma4x4, 16 );
2719 if( mb->i_cbp_chroma &0x03 ) /* Chroma DC residual present */
2721 block_residual_write( h, s, mb, BLOCK_INDEX_CHROMA_DC, mb->chroma_dc[0], 4 );
2722 block_residual_write( h, s, mb, BLOCK_INDEX_CHROMA_DC, mb->chroma_dc[1], 4 );
2724 if( mb->i_cbp_chroma&0x02 ) /* Chroma AC residual present */
2726 for( i = 0; i < 8; i++ )
2728 block_residual_write( h, s, mb, 16 + i, mb->block[16+i].residual_ac, 15 );