1 /*****************************************************************************
2 * cabac.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Fiona Glaser <fiona@x264.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #include "common/common.h"
26 #include "macroblock.h"
32 static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
33 int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
35 if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
37 x264_cabac_encode_decision_noup( cb, ctx0, 0 );
40 else if( i_mb_type == I_PCM )
42 x264_cabac_encode_decision_noup( cb, ctx0, 1 );
43 x264_cabac_encode_flush( h, cb );
48 int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
50 x264_cabac_encode_decision_noup( cb, ctx0, 1 );
51 x264_cabac_encode_terminal( cb );
53 x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
54 if( h->mb.i_cbp_chroma == 0 )
55 x264_cabac_encode_decision_noup( cb, ctx2, 0 );
58 x264_cabac_encode_decision( cb, ctx2, 1 );
59 x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
61 x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
62 x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
66 static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
68 const int i_mb_type = h->mb.i_type;
71 (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
73 x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
76 if( h->sh.i_type == SLICE_TYPE_I )
79 if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
81 if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
84 x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
86 else if( h->sh.i_type == SLICE_TYPE_P )
88 /* prefix: 14, suffix: 17 */
89 if( i_mb_type == P_L0 )
91 x264_cabac_encode_decision_noup( cb, 14, 0 );
92 x264_cabac_encode_decision_noup( cb, 15, h->mb.i_partition != D_16x16 );
93 x264_cabac_encode_decision_noup( cb, 17-(h->mb.i_partition == D_16x16), h->mb.i_partition == D_16x8 );
95 else if( i_mb_type == P_8x8 )
97 x264_cabac_encode_decision_noup( cb, 14, 0 );
98 x264_cabac_encode_decision_noup( cb, 15, 0 );
99 x264_cabac_encode_decision_noup( cb, 16, 1 );
104 x264_cabac_encode_decision_noup( cb, 14, 1 );
107 x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
110 else //if( h->sh.i_type == SLICE_TYPE_B )
113 if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
115 if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
118 if( i_mb_type == B_DIRECT )
120 x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
123 x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
125 if( i_mb_type == B_8x8 )
127 x264_cabac_encode_decision_noup( cb, 27+3, 1 );
128 x264_cabac_encode_decision_noup( cb, 27+4, 1 );
129 x264_cabac_encode_decision( cb, 27+5, 1 );
130 x264_cabac_encode_decision( cb, 27+5, 1 );
131 x264_cabac_encode_decision_noup( cb, 27+5, 1 );
133 else if( IS_INTRA( i_mb_type ) )
136 x264_cabac_encode_decision_noup( cb, 27+3, 1 );
137 x264_cabac_encode_decision_noup( cb, 27+4, 1 );
138 x264_cabac_encode_decision( cb, 27+5, 1 );
139 x264_cabac_encode_decision( cb, 27+5, 0 );
140 x264_cabac_encode_decision( cb, 27+5, 1 );
143 x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
147 static const uint8_t i_mb_bits[9*3] =
149 0x31, 0x29, 0x4, /* L0 L0 */
150 0x35, 0x2d, 0, /* L0 L1 */
151 0x43, 0x63, 0, /* L0 BI */
152 0x3d, 0x2f, 0, /* L1 L0 */
153 0x39, 0x25, 0x6, /* L1 L1 */
154 0x53, 0x73, 0, /* L1 BI */
155 0x4b, 0x6b, 0, /* BI L0 */
156 0x5b, 0x7b, 0, /* BI L1 */
157 0x47, 0x67, 0x21 /* BI BI */
160 const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
161 int bits = i_mb_bits[idx];
163 x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
164 x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
167 x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
168 x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
169 x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
171 x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
177 static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
179 if( i_pred == i_mode )
180 x264_cabac_encode_decision( cb, 68, 1 );
183 x264_cabac_encode_decision( cb, 68, 0 );
184 if( i_mode > i_pred )
186 x264_cabac_encode_decision( cb, 69, (i_mode )&0x01 );
187 x264_cabac_encode_decision( cb, 69, (i_mode >> 1)&0x01 );
188 x264_cabac_encode_decision( cb, 69, (i_mode >> 2) );
192 static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
194 const int i_mode = x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ];
197 /* No need to test for I4x4 or I_16x16 as cache_save handle that */
198 if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 )
200 if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
203 x264_cabac_encode_decision_noup( cb, 64 + ctx, i_mode > 0 );
206 x264_cabac_encode_decision( cb, 64 + 3, i_mode > 1 );
208 x264_cabac_encode_decision_noup( cb, 64 + 3, i_mode > 2 );
212 static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb )
214 int cbp = h->mb.i_cbp_luma;
215 int cbp_l = h->mb.cache.i_cbp_left;
216 int cbp_t = h->mb.cache.i_cbp_top;
217 x264_cabac_encode_decision ( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (cbp >> 0) & 1 );
218 x264_cabac_encode_decision ( cb, 76 - ((cbp >> 0) & 1) - ((cbp_t >> 2) & 2), (cbp >> 1) & 1 );
219 x264_cabac_encode_decision ( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp << 1) & 2), (cbp >> 2) & 1 );
220 x264_cabac_encode_decision_noup( cb, 76 - ((cbp >> 2) & 1) - ((cbp >> 0) & 2), (cbp >> 3) & 1 );
223 static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
225 int cbp_a = h->mb.cache.i_cbp_left & 0x30;
226 int cbp_b = h->mb.cache.i_cbp_top & 0x30;
229 if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++;
230 if( cbp_b && h->mb.cache.i_cbp_top != -1 ) ctx+=2;
231 if( h->mb.i_cbp_chroma == 0 )
232 x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 );
235 x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 );
238 if( cbp_a == 0x20 ) ctx++;
239 if( cbp_b == 0x20 ) ctx += 2;
240 x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 );
244 static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
246 int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
249 /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
250 if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
253 h->mb.i_qp = h->mb.i_last_qp;
258 /* Since, per the above, empty-CBP I16x16 blocks never have delta quants,
259 * we don't have to check for them. */
260 ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy];
264 int val = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
265 /* dqp is interpreted modulo 52 */
266 if( val >= 51 && val != 52 )
270 x264_cabac_encode_decision( cb, 60 + ctx, 1 );
274 x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 );
278 void x264_cabac_mb_skip( x264_t *h, int b_skip )
280 int ctx = (h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left ))
281 + (h->mb.i_mb_type_top >= 0 && !IS_SKIP( h->mb.i_mb_type_top ))
282 + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
283 x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
287 static inline void x264_cabac_mb_sub_p_partition( x264_cabac_t *cb, int i_sub )
289 if( i_sub == D_L0_8x8 )
291 x264_cabac_encode_decision( cb, 21, 1 );
294 x264_cabac_encode_decision( cb, 21, 0 );
295 if( i_sub == D_L0_8x4 )
296 x264_cabac_encode_decision( cb, 22, 0 );
299 x264_cabac_encode_decision( cb, 22, 1 );
300 x264_cabac_encode_decision( cb, 23, i_sub == D_L0_4x8 );
304 static inline void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub )
306 if( i_sub == D_DIRECT_8x8 )
308 x264_cabac_encode_decision( cb, 36, 0 );
311 x264_cabac_encode_decision( cb, 36, 1 );
312 if( i_sub == D_BI_8x8 )
314 x264_cabac_encode_decision( cb, 37, 1 );
315 x264_cabac_encode_decision( cb, 38, 0 );
316 x264_cabac_encode_decision( cb, 39, 0 );
317 x264_cabac_encode_decision( cb, 39, 0 );
320 x264_cabac_encode_decision( cb, 37, 0 );
321 x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
324 static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
326 int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
327 x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
330 static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
332 const int i8 = x264_scan8[idx];
333 const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
334 const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
335 int i_ref = h->mb.cache.ref[i_list][i8];
338 if( i_refa > 0 && !h->mb.cache.skip[i8 - 1] )
340 if( i_refb > 0 && !h->mb.cache.skip[i8 - 8] )
345 x264_cabac_encode_decision( cb, 54 + ctx, 1 );
349 x264_cabac_encode_decision( cb, 54 + ctx, 0 );
352 static inline int x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
354 const int i_abs = abs( mvd );
355 const int ctxbase = l ? 47 : 40;
359 x264_cabac_encode_decision( cb, ctxbase + ctx, 0 );
362 x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
365 for( i = 1; i < i_abs; i++ )
366 x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 );
367 x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 );
368 x264_cabac_encode_bypass( cb, mvd < 0 );
372 x264_cabac_encode_decision( cb, ctxbase + 3, 1 );
373 x264_cabac_encode_decision( cb, ctxbase + 4, 1 );
374 x264_cabac_encode_decision( cb, ctxbase + 5, 1 );
377 cb->f8_bits_encoded += cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
378 cb->state[ctxbase+6] = cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
382 cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]];
383 cb->state[ctxbase+6] = cabac_transition_5ones[cb->state[ctxbase+6]];
384 x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
389 static const uint8_t ctxes[8] = { 3,4,5,6,6,6,6,6 };
392 x264_cabac_encode_decision( cb, ctxbase + ctx, 0 );
395 x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
398 for( i = 1; i < i_abs; i++ )
399 x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
400 x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs-1], 0 );
404 for( i = 1; i < 9; i++ )
405 x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
406 x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
408 x264_cabac_encode_bypass( cb, mvd < 0 );
411 /* Since we don't need to keep track of MVDs larger than 33, just cap the value.
412 * This lets us store MVDs as 8-bit values instead of 16-bit. */
413 return X264_MIN( i_abs, 33 );
416 static NOINLINE uint16_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
418 ALIGNED_4( int16_t mvp[2] );
422 x264_mb_predict_mv( h, i_list, idx, width, mvp );
423 mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
424 mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
425 uint16_t amvd = x264_cabac_mvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
426 h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
429 mdx = x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF );
430 mdy = x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 );
432 return pack8to16(mdx,mdy);
435 #define x264_cabac_mb_mvd(h,cb,i_list,idx,width,height)\
438 uint16_t mvd = x264_cabac_mb_mvd(h,cb,i_list,idx,width);\
439 x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\
442 static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
444 switch( h->mb.i_sub_partition[i] )
447 x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
450 x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 2, 1 );
451 x264_cabac_mb_mvd( h, cb, 0, 4*i+2, 2, 1 );
454 x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 1, 2 );
455 x264_cabac_mb_mvd( h, cb, 0, 4*i+1, 1, 2 );
458 x264_cabac_mb_mvd( h, cb, 0, 4*i+0, 1, 1 );
459 x264_cabac_mb_mvd( h, cb, 0, 4*i+1, 1, 1 );
460 x264_cabac_mb_mvd( h, cb, 0, 4*i+2, 1, 1 );
461 x264_cabac_mb_mvd( h, cb, 0, 4*i+3, 1, 1 );
468 /* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0
469 * 1-> AC 16x16 i_idx = luma4x4idx
470 * 2-> Luma4x4 i_idx = luma4x4idx
471 * 3-> DC Chroma i_idx = iCbCr
472 * 4-> AC Chroma i_idx = 4 * iCbCr + chroma4x4idx
473 * 5-> Luma8x8 i_idx = luma8x8idx
476 static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra )
486 /* no need to test for skip/pcm */
487 i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
488 i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
489 if( x264_constant_p(b_intra) && !b_intra )
490 return 85 + 4*i_cat + ((2*i_nzb + i_nza)&0x7f);
493 i_nza &= 0x7f + (b_intra << 7);
494 i_nzb &= 0x7f + (b_intra << 7);
495 return 85 + 4*i_cat + 2*!!i_nzb + !!i_nza;
498 i_nza = (h->mb.cache.i_cbp_left >> 8) & 1;
499 i_nzb = (h->mb.cache.i_cbp_top >> 8) & 1;
500 return 85 + 4*i_cat + 2*i_nzb + i_nza;
502 /* no need to test skip/pcm */
504 i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (9 + i_idx)) & 1 : b_intra;
505 i_nzb = h->mb.cache.i_cbp_top != -1 ? (h->mb.cache.i_cbp_top >> (9 + i_idx)) & 1 : b_intra;
506 return 85 + 4*i_cat + 2*i_nzb + i_nza;
513 static const uint16_t significant_coeff_flag_offset[2][6] = {
514 { 105, 120, 134, 149, 152, 402 },
515 { 277, 292, 306, 321, 324, 436 }
517 static const uint16_t last_coeff_flag_offset[2][6] = {
518 { 166, 181, 195, 210, 213, 417 },
519 { 338, 353, 367, 382, 385, 451 }
521 static const uint16_t coeff_abs_level_m1_offset[6] =
522 { 227, 237, 247, 257, 266, 426 };
523 static const uint8_t significant_coeff_flag_offset_8x8[2][63] =
525 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
526 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
527 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
528 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
530 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
531 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
532 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
533 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
535 static const uint8_t last_coeff_flag_offset_8x8[63] = {
536 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
537 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
538 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
539 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
542 // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
543 // 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
544 /* map node ctx => cabac ctx for level=1 */
545 static const int coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
546 /* map node ctx => cabac ctx for level>1 */
547 static const int coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
548 static const uint8_t coeff_abs_level_transition[2][8] = {
549 /* update node ctx after coding a level=1 */
550 { 1, 2, 3, 3, 4, 5, 6, 7 },
551 /* update node ctx after coding a level>1 */
552 { 4, 4, 4, 4, 5, 6, 7, 7 }
554 static const int count_cat_m1[5] = {15, 14, 15, 3, 14};
557 static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l )
559 const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
560 const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
561 const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat];
562 const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
563 int i_coeff_abs_m1[64];
564 int i_coeff_sign[64];
570 i_last = h->quantf.coeff_last[i_ctxBlockCat](l);
572 #define WRITE_SIGMAP( l8x8 )\
577 i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
578 i_coeff_sign[i_coeff] = l[i] < 0;\
580 x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 1 );\
583 x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 1 );\
587 x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 0 );\
590 x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 0 );\
592 if( i == i_count_m1 )\
594 i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
595 i_coeff_sign[i_coeff] = l[i] < 0;\
601 if( i_ctxBlockCat == DCT_LUMA_8x8 )
603 const int i_count_m1 = 63;
608 const int i_count_m1 = count_cat_m1[i_ctxBlockCat];
617 /* write coeff_abs - 1 */
618 i_prefix = X264_MIN( i_coeff_abs_m1[i_coeff], 14 );
619 ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level;
623 x264_cabac_encode_decision( cb, ctx, 1 );
624 ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
625 for( i = 0; i < i_prefix - 1; i++ )
626 x264_cabac_encode_decision( cb, ctx, 1 );
628 x264_cabac_encode_decision( cb, ctx, 0 );
630 x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs_m1[i_coeff] - 14 );
632 node_ctx = coeff_abs_level_transition[1][node_ctx];
636 x264_cabac_encode_decision( cb, ctx, 0 );
637 node_ctx = coeff_abs_level_transition[0][node_ctx];
640 x264_cabac_encode_bypass( cb, i_coeff_sign[i_coeff] );
641 } while( i_coeff > 0 );
643 #define block_residual_write_cabac_8x8( h, cb, l ) block_residual_write_cabac( h, cb, DCT_LUMA_8x8, l )
647 /* Faster RDO by merging sigmap and level coding. Note that for 8x8dct
648 * this is slightly incorrect because the sigmap is not reversible
649 * (contexts are repeated). However, there is nearly no quality penalty
650 * for this (~0.001db) and the speed boost (~30%) is worth it. */
651 static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l, int b_8x8 )
653 const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
654 const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
655 const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat];
656 const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
657 int i_last, i_coeff_abs, ctx, i, node_ctx;
659 i_last = h->quantf.coeff_last[i_ctxBlockCat](l);
661 i_coeff_abs = abs(l[i_last]);
662 ctx = coeff_abs_level1_ctx[0] + i_ctx_level;
664 if( i_last != (b_8x8 ? 63 : count_cat_m1[i_ctxBlockCat]) )
666 x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i_last]:i_last), 1 );
667 x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i_last]:i_last), 1 );
670 if( i_coeff_abs > 1 )
672 x264_cabac_encode_decision( cb, ctx, 1 );
673 ctx = coeff_abs_levelgt1_ctx[0] + i_ctx_level;
674 if( i_coeff_abs < 15 )
676 cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]];
677 cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]];
681 cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
682 cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
683 x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 );
685 node_ctx = coeff_abs_level_transition[1][0];
689 x264_cabac_encode_decision( cb, ctx, 0 );
690 node_ctx = coeff_abs_level_transition[0][0];
691 x264_cabac_encode_bypass( cb, 0 ); // sign
694 for( i = i_last-1 ; i >= 0; i-- )
698 i_coeff_abs = abs(l[i]);
699 x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i]:i), 1 );
700 x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i]:i), 0 );
701 ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level;
703 if( i_coeff_abs > 1 )
705 x264_cabac_encode_decision( cb, ctx, 1 );
706 ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
707 if( i_coeff_abs < 15 )
709 cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]];
710 cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]];
714 cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
715 cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
716 x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 );
718 node_ctx = coeff_abs_level_transition[1][node_ctx];
722 x264_cabac_encode_decision( cb, ctx, 0 );
723 node_ctx = coeff_abs_level_transition[0][node_ctx];
724 x264_cabac_encode_bypass( cb, 0 );
728 x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?significant_coeff_flag_offset[i]:i), 0 );
732 static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, int16_t *l )
734 block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, l, 1 );
736 static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l )
738 block_residual_write_cabac_internal( h, cb, i_ctxBlockCat, l, 0 );
742 #define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra ) \
744 int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra ); \
745 if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
747 x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
748 block_residual_write_cabac( h, cb, i_ctxBlockCat, l ); \
751 x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
754 void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
756 const int i_mb_type = h->mb.i_type;
761 const int i_mb_pos_start = x264_cabac_pos( cb );
765 /* Write the MB type */
766 x264_cabac_mb_type( h, cb );
769 if( i_mb_type == I_PCM )
771 i_mb_pos_tex = x264_cabac_pos( cb );
772 h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
774 memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
776 for( i = 0; i < 8; i++ )
777 memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
779 for( i = 0; i < 8; i++ )
780 memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
784 cb->i_range = 0x01FE;
786 cb->i_bytes_outstanding = 0;
788 /* if PCM is chosen, we need to store reconstructed frame data */
789 h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
790 h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
791 h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
793 h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
798 if( IS_INTRA( i_mb_type ) )
800 if( h->pps->b_transform_8x8_mode && i_mb_type != I_16x16 )
801 x264_cabac_mb_transform_size( h, cb );
803 if( i_mb_type != I_16x16 )
805 int di = h->mb.b_transform_8x8 ? 4 : 1;
806 for( i = 0; i < 16; i += di )
808 const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
809 const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
810 x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
814 x264_cabac_mb_intra_chroma_pred_mode( h, cb );
816 else if( i_mb_type == P_L0 )
818 if( h->mb.i_partition == D_16x16 )
820 if( h->mb.pic.i_fref[0] > 1 )
822 x264_cabac_mb_ref( h, cb, 0, 0 );
824 x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
826 else if( h->mb.i_partition == D_16x8 )
828 if( h->mb.pic.i_fref[0] > 1 )
830 x264_cabac_mb_ref( h, cb, 0, 0 );
831 x264_cabac_mb_ref( h, cb, 0, 8 );
833 x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
834 x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
836 else //if( h->mb.i_partition == D_8x16 )
838 if( h->mb.pic.i_fref[0] > 1 )
840 x264_cabac_mb_ref( h, cb, 0, 0 );
841 x264_cabac_mb_ref( h, cb, 0, 4 );
843 x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
844 x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
847 else if( i_mb_type == P_8x8 )
850 for( i = 0; i < 4; i++ )
851 x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
854 if( h->mb.pic.i_fref[0] > 1 )
856 x264_cabac_mb_ref( h, cb, 0, 0 );
857 x264_cabac_mb_ref( h, cb, 0, 4 );
858 x264_cabac_mb_ref( h, cb, 0, 8 );
859 x264_cabac_mb_ref( h, cb, 0, 12 );
862 for( i = 0; i < 4; i++ )
863 x264_cabac_mb8x8_mvd( h, cb, i );
865 else if( i_mb_type == B_8x8 )
868 for( i = 0; i < 4; i++ )
869 x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
872 if( h->mb.pic.i_fref[0] > 1 )
873 for( i = 0; i < 4; i++ )
874 if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
875 x264_cabac_mb_ref( h, cb, 0, 4*i );
877 if( h->mb.pic.i_fref[1] > 1 )
878 for( i = 0; i < 4; i++ )
879 if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
880 x264_cabac_mb_ref( h, cb, 1, 4*i );
882 for( i = 0; i < 4; i++ )
883 if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
884 x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
886 for( i = 0; i < 4; i++ )
887 if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
888 x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
890 else if( i_mb_type != B_DIRECT )
893 const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
894 if( h->mb.pic.i_fref[0] > 1 )
897 x264_cabac_mb_ref( h, cb, 0, 0 );
898 if( b_list[0][1] && h->mb.i_partition != D_16x16 )
899 x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
901 if( h->mb.pic.i_fref[1] > 1 )
904 x264_cabac_mb_ref( h, cb, 1, 0 );
905 if( b_list[1][1] && h->mb.i_partition != D_16x16 )
906 x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
908 for( i_list = 0; i_list < 2; i_list++ )
910 if( h->mb.i_partition == D_16x16 )
912 if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
914 else if( h->mb.i_partition == D_16x8 )
916 if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
917 if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
919 else //if( h->mb.i_partition == D_8x16 )
921 if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
922 if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
928 i_mb_pos_tex = x264_cabac_pos( cb );
929 h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
932 if( i_mb_type != I_16x16 )
934 x264_cabac_mb_cbp_luma( h, cb );
935 x264_cabac_mb_cbp_chroma( h, cb );
938 if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
940 x264_cabac_mb_transform_size( h, cb );
943 if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 )
945 const int b_intra = IS_INTRA( i_mb_type );
946 x264_cabac_mb_qp_delta( h, cb );
949 if( i_mb_type == I_16x16 )
952 block_residual_write_cabac_cbf( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 1 );
955 if( h->mb.i_cbp_luma != 0 )
956 for( i = 0; i < 16; i++ )
957 block_residual_write_cabac_cbf( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 1 );
959 else if( h->mb.b_transform_8x8 )
961 for( i = 0; i < 4; i++ )
962 if( h->mb.i_cbp_luma & ( 1 << i ) )
963 block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i] );
967 for( i = 0; i < 16; i++ )
968 if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
969 block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
972 if( h->mb.i_cbp_chroma&0x03 ) /* Chroma DC residual present */
974 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra );
975 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra );
976 if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
977 for( i = 16; i < 24; i++ )
978 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, b_intra );
983 h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
988 /*****************************************************************************
989 * RD only; doesn't generate a valid bitstream
990 * doesn't write cbp or chroma dc (I don't know how much this matters)
991 * doesn't write ref (never varies between calls, so no point in doing so)
992 * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
993 * works on all partition sizes except 16x16
994 *****************************************************************************/
995 static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
997 const int i_mb_type = h->mb.i_type;
998 int b_8x16 = h->mb.i_partition == D_8x16;
1001 if( i_mb_type == P_8x8 )
1003 x264_cabac_mb8x8_mvd( h, cb, i8 );
1004 x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
1006 else if( i_mb_type == P_L0 )
1007 x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1008 else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
1010 if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1011 if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1013 else //if( i_mb_type == B_8x8 )
1015 if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
1016 x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
1017 if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
1018 x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
1021 for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
1023 if( h->mb.i_cbp_luma & (1 << i8) )
1025 if( h->mb.b_transform_8x8 )
1026 block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] );
1030 for( i4 = 0; i4 < 4; i4++ )
1031 block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 0 );
1035 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
1036 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 0 );
1038 i8 += x264_pixel_size[i_pixel].h >> 3;
1042 static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
1044 int b_8x4 = i_pixel == PIXEL_8x4;
1045 block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 0 );
1046 if( i_pixel == PIXEL_4x4 )
1048 x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 );
1052 x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
1053 block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+2-b_8x4, h->dct.luma4x4[i4+2-b_8x4], 0 );
1057 static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
1059 const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
1060 i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1061 x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
1062 x264_cabac_mb_cbp_luma( h, cb );
1063 if( h->mb.i_cbp_luma & (1 << i8) )
1064 block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] );
1067 static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
1069 const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
1070 i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1071 x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
1072 block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 1 );
1075 static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
1077 x264_cabac_mb_intra_chroma_pred_mode( h, cb );
1078 x264_cabac_mb_cbp_chroma( h, cb );
1079 if( h->mb.i_cbp_chroma > 0 )
1081 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 1 );
1082 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 1 );
1084 if( h->mb.i_cbp_chroma == 2 )
1087 for( i = 16; i < 24; i++ )
1088 block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 1 );