2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t coeff_token_len[4][4*17]={
68 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
69 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
70 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
71 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
75 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
76 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
77 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
78 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
82 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
83 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
84 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
85 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
89 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
92 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
96 static const uint8_t coeff_token_bits[4][4*17]={
99 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
100 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
101 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
102 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
106 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
107 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
108 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
109 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
113 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
114 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
115 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
116 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
120 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
121 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
122 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
123 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
127 static const uint8_t total_zeros_len[16][16]= {
128 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131 {5,3,4,4,3,3,3,4,3,4,5,5,5},
132 {4,4,4,3,3,3,3,3,4,5,4,5},
133 {6,5,3,3,3,3,3,3,4,3,6},
134 {6,5,3,3,3,2,3,4,3,6},
145 static const uint8_t total_zeros_bits[16][16]= {
146 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149 {3,7,5,4,6,5,4,3,3,2,2,1,0},
150 {5,4,3,7,6,5,4,3,2,1,1,0},
151 {1,1,7,6,5,4,3,2,1,1,0},
152 {1,1,5,4,3,3,2,1,1,0},
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
175 static const uint8_t run_len[7][16]={
182 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
185 static const uint8_t run_bits[7][16]={
192 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
224 * gets the predicted number of non-zero coefficients.
225 * @param n block index
227 static inline int pred_non_zero_count(H264Context *h, int n){
228 const int index8= scan8[n];
229 const int left= h->non_zero_count_cache[index8 - 1];
230 const int top = h->non_zero_count_cache[index8 - 8];
233 if(i<64) i= (i+1)>>1;
235 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
240 static av_cold void init_cavlc_level_tab(void){
241 int suffix_length, mask;
244 for(suffix_length=0; suffix_length<7; suffix_length++){
245 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
249 mask= -(level_code&1);
250 level_code= (((2+level_code)>>1) ^ mask) - mask;
251 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252 cavlc_level_tab[suffix_length][i][0]= level_code;
253 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254 }else if(prefix + 1 <= LEVEL_TAB_BITS){
255 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
258 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
265 av_cold void ff_h264_decode_init_vlc(void){
273 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276 &chroma_dc_coeff_token_len [0], 1, 1,
277 &chroma_dc_coeff_token_bits[0], 1, 1,
278 INIT_VLC_USE_NEW_STATIC);
282 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285 &coeff_token_len [i][0], 1, 1,
286 &coeff_token_bits[i][0], 1, 1,
287 INIT_VLC_USE_NEW_STATIC);
288 offset += coeff_token_vlc_tables_size[i];
291 * This is a one time safety check to make sure that
292 * the packed static coeff_token_vlc table sizes
293 * were initialized correctly.
295 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
298 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300 init_vlc(&chroma_dc_total_zeros_vlc[i],
301 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302 &chroma_dc_total_zeros_len [i][0], 1, 1,
303 &chroma_dc_total_zeros_bits[i][0], 1, 1,
304 INIT_VLC_USE_NEW_STATIC);
307 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309 init_vlc(&total_zeros_vlc[i],
310 TOTAL_ZEROS_VLC_BITS, 16,
311 &total_zeros_len [i][0], 1, 1,
312 &total_zeros_bits[i][0], 1, 1,
313 INIT_VLC_USE_NEW_STATIC);
317 run_vlc[i].table = run_vlc_tables[i];
318 run_vlc[i].table_allocated = run_vlc_tables_size;
319 init_vlc(&run_vlc[i],
321 &run_len [i][0], 1, 1,
322 &run_bits[i][0], 1, 1,
323 INIT_VLC_USE_NEW_STATIC);
325 run7_vlc.table = run7_vlc_table,
326 run7_vlc.table_allocated = run7_vlc_table_size;
327 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328 &run_len [6][0], 1, 1,
329 &run_bits[6][0], 1, 1,
330 INIT_VLC_USE_NEW_STATIC);
332 init_cavlc_level_tab();
339 static inline int get_level_prefix(GetBitContext *gb){
344 UPDATE_CACHE(re, gb);
345 buf=GET_CACHE(re, gb);
347 log= 32 - av_log2(buf);
349 print_bin(buf>>(32-log), log);
350 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
353 LAST_SKIP_BITS(re, gb, log);
354 CLOSE_READER(re, gb);
360 * decodes a residual block.
361 * @param n block index
362 * @param scantable scantable
363 * @param max_coeff number of coefficients in the block
364 * @return <0 if an error occurred
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367 MpegEncContext * const s = &h->s;
368 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
370 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
372 //FIXME put trailing_onex into the context
374 if(n >= CHROMA_DC_BLOCK_INDEX){
375 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376 total_coeff= coeff_token>>2;
378 if(n == LUMA_DC_BLOCK_INDEX){
379 total_coeff= pred_non_zero_count(h, 0);
380 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381 total_coeff= coeff_token>>2;
383 total_coeff= pred_non_zero_count(h, n);
384 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385 total_coeff= coeff_token>>2;
388 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
390 //FIXME set last_non_zero?
394 if(total_coeff > (unsigned)max_coeff) {
395 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
399 trailing_ones= coeff_token&3;
400 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401 assert(total_coeff<=16);
403 i = show_bits(gb, 3);
404 skip_bits(gb, trailing_ones);
405 level[0] = 1-((i&4)>>1);
406 level[1] = 1-((i&2) );
407 level[2] = 1-((i&1)<<1);
409 if(trailing_ones<total_coeff) {
411 int suffix_length = total_coeff > 10 & trailing_ones < 3;
412 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
415 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416 if(level_code >= 100){
417 prefix= level_code - 100;
418 if(prefix == LEVEL_TAB_BITS)
419 prefix += get_level_prefix(gb);
421 //first coefficient has suffix_length equal to 0 or 1
422 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
424 level_code= (prefix<<1) + get_bits1(gb); //part
426 level_code= prefix; //part
427 }else if(prefix==14){
429 level_code= (prefix<<1) + get_bits1(gb); //part
431 level_code= prefix + get_bits(gb, 4); //part
433 level_code= 30 + get_bits(gb, prefix-3); //part
436 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
439 level_code += (1<<(prefix-3))-4096;
443 if(trailing_ones < 3) level_code += 2;
446 mask= -(level_code&1);
447 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
449 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
451 suffix_length = 1 + (level_code + 3U > 6U);
452 level[trailing_ones]= level_code;
455 //remaining coefficients have suffix_length > 0
456 for(i=trailing_ones+1;i<total_coeff;i++) {
457 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
458 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
459 level_code= cavlc_level_tab[suffix_length][bitsi][0];
461 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
462 if(level_code >= 100){
463 prefix= level_code - 100;
464 if(prefix == LEVEL_TAB_BITS){
465 prefix += get_level_prefix(gb);
468 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
470 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
472 level_code += (1<<(prefix-3))-4096;
474 mask= -(level_code&1);
475 level_code= (((2+level_code)>>1) ^ mask) - mask;
477 level[i]= level_code;
478 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
482 if(total_coeff == max_coeff)
485 if(n >= CHROMA_DC_BLOCK_INDEX)
486 zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
488 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
491 #define STORE_BLOCK(type) \
492 scantable += zeros_left + total_coeff - 1; \
493 if(n >= LUMA_DC_BLOCK_INDEX){ \
494 ((type*)block)[*scantable] = level[0]; \
495 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
497 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
499 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
500 zeros_left -= run_before; \
501 scantable -= 1 + run_before; \
502 ((type*)block)[*scantable]= level[i]; \
504 for(;i<total_coeff;i++) { \
506 ((type*)block)[*scantable]= level[i]; \
509 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
510 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
512 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
514 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
515 zeros_left -= run_before; \
516 scantable -= 1 + run_before; \
517 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
519 for(;i<total_coeff;i++) { \
521 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
525 if (h->pixel_shift) {
532 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
539 int ff_h264_decode_mb_cavlc(H264Context *h){
540 MpegEncContext * const s = &h->s;
543 unsigned int mb_type, cbp;
544 int dct8x8_allowed= h->pps.transform_8x8_mode;
546 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
548 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
549 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
551 if(h->slice_type_nos != FF_I_TYPE){
552 if(s->mb_skip_run==-1)
553 s->mb_skip_run= get_ue_golomb(&s->gb);
555 if (s->mb_skip_run--) {
556 if(FRAME_MBAFF && (s->mb_y&1) == 0){
557 if(s->mb_skip_run==0)
558 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
565 if( (s->mb_y&1) == 0 )
566 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
569 h->prev_mb_skipped= 0;
571 mb_type= get_ue_golomb(&s->gb);
572 if(h->slice_type_nos == FF_B_TYPE){
574 partition_count= b_mb_type_info[mb_type].partition_count;
575 mb_type= b_mb_type_info[mb_type].type;
578 goto decode_intra_mb;
580 }else if(h->slice_type_nos == FF_P_TYPE){
582 partition_count= p_mb_type_info[mb_type].partition_count;
583 mb_type= p_mb_type_info[mb_type].type;
586 goto decode_intra_mb;
589 assert(h->slice_type_nos == FF_I_TYPE);
590 if(h->slice_type == FF_SI_TYPE && mb_type)
594 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
598 cbp= i_mb_type_info[mb_type].cbp;
599 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
600 mb_type= i_mb_type_info[mb_type].type;
604 mb_type |= MB_TYPE_INTERLACED;
606 h->slice_table[ mb_xy ]= h->slice_num;
608 if(IS_INTRA_PCM(mb_type)){
611 // We assume these blocks are very rare so we do not optimize it.
612 align_get_bits(&s->gb);
614 // The pixels are stored in the same order as levels in h->mb array.
615 for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
616 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
619 // In deblocking, the quantizer is 0
620 s->current_picture.qscale_table[mb_xy]= 0;
621 // All coeffs are present
622 memset(h->non_zero_count[mb_xy], 16, 32);
624 s->current_picture.mb_type[mb_xy]= mb_type;
629 h->ref_count[0] <<= 1;
630 h->ref_count[1] <<= 1;
633 fill_decode_neighbors(h, mb_type);
634 fill_decode_caches(h, mb_type);
637 if(IS_INTRA(mb_type)){
639 // init_top_left_availability(h);
640 if(IS_INTRA4x4(mb_type)){
643 if(dct8x8_allowed && get_bits1(&s->gb)){
644 mb_type |= MB_TYPE_8x8DCT;
648 // fill_intra4x4_pred_table(h);
649 for(i=0; i<16; i+=di){
650 int mode= pred_intra_mode(h, i);
652 if(!get_bits1(&s->gb)){
653 const int rem_mode= get_bits(&s->gb, 3);
654 mode = rem_mode + (rem_mode >= mode);
658 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
660 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
662 ff_h264_write_back_intra_pred_mode(h);
663 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
666 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
667 if(h->intra16x16_pred_mode < 0)
671 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
674 h->chroma_pred_mode= pred_mode;
676 h->chroma_pred_mode = DC_128_PRED8x8;
678 }else if(partition_count==4){
679 int i, j, sub_partition_count[4], list, ref[2][4];
681 if(h->slice_type_nos == FF_B_TYPE){
683 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
684 if(h->sub_mb_type[i] >=13){
685 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
688 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
689 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
691 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
692 ff_h264_pred_direct_motion(h, &mb_type);
693 h->ref_cache[0][scan8[4]] =
694 h->ref_cache[1][scan8[4]] =
695 h->ref_cache[0][scan8[12]] =
696 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
699 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
701 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
702 if(h->sub_mb_type[i] >=4){
703 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
706 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
707 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
711 for(list=0; list<h->list_count; list++){
712 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
714 if(IS_DIRECT(h->sub_mb_type[i])) continue;
715 if(IS_DIR(h->sub_mb_type[i], 0, list)){
719 }else if(ref_count == 2){
720 tmp= get_bits1(&s->gb)^1;
722 tmp= get_ue_golomb_31(&s->gb);
724 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
737 dct8x8_allowed = get_dct8x8_allowed(h);
739 for(list=0; list<h->list_count; list++){
741 if(IS_DIRECT(h->sub_mb_type[i])) {
742 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
745 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
746 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
748 if(IS_DIR(h->sub_mb_type[i], 0, list)){
749 const int sub_mb_type= h->sub_mb_type[i];
750 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
751 for(j=0; j<sub_partition_count[i]; j++){
753 const int index= 4*i + block_width*j;
754 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
755 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
756 mx += get_se_golomb(&s->gb);
757 my += get_se_golomb(&s->gb);
758 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
760 if(IS_SUB_8X8(sub_mb_type)){
762 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
764 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
765 }else if(IS_SUB_8X4(sub_mb_type)){
766 mv_cache[ 1 ][0]= mx;
767 mv_cache[ 1 ][1]= my;
768 }else if(IS_SUB_4X8(sub_mb_type)){
769 mv_cache[ 8 ][0]= mx;
770 mv_cache[ 8 ][1]= my;
772 mv_cache[ 0 ][0]= mx;
773 mv_cache[ 0 ][1]= my;
776 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
782 }else if(IS_DIRECT(mb_type)){
783 ff_h264_pred_direct_motion(h, &mb_type);
784 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
787 //FIXME we should set ref_idx_l? to 0 if we use that later ...
788 if(IS_16X16(mb_type)){
789 for(list=0; list<h->list_count; list++){
791 if(IS_DIR(mb_type, 0, list)){
792 if(h->ref_count[list]==1){
794 }else if(h->ref_count[list]==2){
795 val= get_bits1(&s->gb)^1;
797 val= get_ue_golomb_31(&s->gb);
798 if(val >= h->ref_count[list]){
799 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
803 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
806 for(list=0; list<h->list_count; list++){
807 if(IS_DIR(mb_type, 0, list)){
808 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
809 mx += get_se_golomb(&s->gb);
810 my += get_se_golomb(&s->gb);
811 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
813 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
817 else if(IS_16X8(mb_type)){
818 for(list=0; list<h->list_count; list++){
821 if(IS_DIR(mb_type, i, list)){
822 if(h->ref_count[list] == 1){
824 }else if(h->ref_count[list] == 2){
825 val= get_bits1(&s->gb)^1;
827 val= get_ue_golomb_31(&s->gb);
828 if(val >= h->ref_count[list]){
829 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
834 val= LIST_NOT_USED&0xFF;
835 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
838 for(list=0; list<h->list_count; list++){
841 if(IS_DIR(mb_type, i, list)){
842 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
843 mx += get_se_golomb(&s->gb);
844 my += get_se_golomb(&s->gb);
845 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
847 val= pack16to32(mx,my);
850 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
854 assert(IS_8X16(mb_type));
855 for(list=0; list<h->list_count; list++){
858 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
859 if(h->ref_count[list]==1){
861 }else if(h->ref_count[list]==2){
862 val= get_bits1(&s->gb)^1;
864 val= get_ue_golomb_31(&s->gb);
865 if(val >= h->ref_count[list]){
866 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
871 val= LIST_NOT_USED&0xFF;
872 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
875 for(list=0; list<h->list_count; list++){
878 if(IS_DIR(mb_type, i, list)){
879 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
880 mx += get_se_golomb(&s->gb);
881 my += get_se_golomb(&s->gb);
882 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
884 val= pack16to32(mx,my);
887 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
893 if(IS_INTER(mb_type))
894 write_back_motion(h, mb_type);
896 if(!IS_INTRA16x16(mb_type)){
897 cbp= get_ue_golomb(&s->gb);
899 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
904 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
905 else cbp= golomb_to_inter_cbp [cbp];
907 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
908 else cbp= golomb_to_inter_cbp_gray[cbp];
912 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
913 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
916 h->cbp_table[mb_xy]= cbp;
917 s->current_picture.mb_type[mb_xy]= mb_type;
919 if(cbp || IS_INTRA16x16(mb_type)){
920 int i8x8, i4x4, chroma_idx;
922 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
923 const uint8_t *scan, *scan8x8;
924 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
926 if(IS_INTERLACED(mb_type)){
927 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
928 scan= s->qscale ? h->field_scan : h->field_scan_q0;
930 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
931 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
934 dquant= get_se_golomb(&s->gb);
938 if(((unsigned)s->qscale) > max_qp){
939 if(s->qscale<0) s->qscale+= max_qp+1;
940 else s->qscale-= max_qp+1;
941 if(((unsigned)s->qscale) > max_qp){
942 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
947 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
948 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
949 if(IS_INTRA16x16(mb_type)){
950 AV_ZERO128(h->mb_luma_dc+0);
951 AV_ZERO128(h->mb_luma_dc+8);
952 AV_ZERO128(h->mb_luma_dc+16);
953 AV_ZERO128(h->mb_luma_dc+24);
954 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
955 return -1; //FIXME continue if partitioned and other return -1 too
958 assert((cbp&15) == 0 || (cbp&15) == 15);
961 for(i8x8=0; i8x8<4; i8x8++){
962 for(i4x4=0; i4x4<4; i4x4++){
963 const int index= i4x4 + 4*i8x8;
964 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index<<h->pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
970 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
973 for(i8x8=0; i8x8<4; i8x8++){
975 if(IS_8x8DCT(mb_type)){
976 DCTELEM *buf = &h->mb[64*i8x8<<h->pixel_shift];
978 for(i4x4=0; i4x4<4; i4x4++){
979 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
980 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
983 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
984 nnz[0] += nnz[1] + nnz[8] + nnz[9];
986 for(i4x4=0; i4x4<4; i4x4++){
987 const int index= i4x4 + 4*i8x8;
989 if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
995 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
996 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
1002 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1003 if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx)<<h->pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1009 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1010 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1011 for(i4x4=0; i4x4<4; i4x4++){
1012 const int index= 16 + 4*chroma_idx + i4x4;
1013 if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan + 1, qmul, 15) < 0){
1019 uint8_t * const nnz= &h->non_zero_count_cache[0];
1020 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1021 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1024 uint8_t * const nnz= &h->non_zero_count_cache[0];
1025 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1026 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1027 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1029 s->current_picture.qscale_table[mb_xy]= s->qscale;
1030 write_back_non_zero_count(h);
1033 h->ref_count[0] >>= 1;
1034 h->ref_count[1] >>= 1;