2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264_cavlc.c
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t coeff_token_len[4][4*17]={
68 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
69 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
70 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
71 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
75 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
76 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
77 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
78 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
82 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
83 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
84 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
85 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
89 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
92 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
96 static const uint8_t coeff_token_bits[4][4*17]={
99 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
100 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
101 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
102 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
106 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
107 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
108 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
109 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
113 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
114 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
115 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
116 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
120 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
121 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
122 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
123 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
127 static const uint8_t total_zeros_len[16][16]= {
128 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131 {5,3,4,4,3,3,3,4,3,4,5,5,5},
132 {4,4,4,3,3,3,3,3,4,5,4,5},
133 {6,5,3,3,3,3,3,3,4,3,6},
134 {6,5,3,3,3,2,3,4,3,6},
145 static const uint8_t total_zeros_bits[16][16]= {
146 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149 {3,7,5,4,6,5,4,3,3,2,2,1,0},
150 {5,4,3,7,6,5,4,3,2,1,1,0},
151 {1,1,7,6,5,4,3,2,1,1,0},
152 {1,1,5,4,3,3,2,1,1,0},
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
175 static const uint8_t run_len[7][16]={
182 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
185 static const uint8_t run_bits[7][16]={
192 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
224 * gets the predicted number of non-zero coefficients.
225 * @param n block index
227 static inline int pred_non_zero_count(H264Context *h, int n){
228 const int index8= scan8[n];
229 const int left= h->non_zero_count_cache[index8 - 1];
230 const int top = h->non_zero_count_cache[index8 - 8];
233 if(i<64) i= (i+1)>>1;
235 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
240 static av_cold void init_cavlc_level_tab(void){
241 int suffix_length, mask;
244 for(suffix_length=0; suffix_length<7; suffix_length++){
245 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
249 mask= -(level_code&1);
250 level_code= (((2+level_code)>>1) ^ mask) - mask;
251 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252 cavlc_level_tab[suffix_length][i][0]= level_code;
253 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254 }else if(prefix + 1 <= LEVEL_TAB_BITS){
255 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
258 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
265 av_cold void ff_h264_decode_init_vlc(void){
273 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276 &chroma_dc_coeff_token_len [0], 1, 1,
277 &chroma_dc_coeff_token_bits[0], 1, 1,
278 INIT_VLC_USE_NEW_STATIC);
282 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285 &coeff_token_len [i][0], 1, 1,
286 &coeff_token_bits[i][0], 1, 1,
287 INIT_VLC_USE_NEW_STATIC);
288 offset += coeff_token_vlc_tables_size[i];
291 * This is a one time safety check to make sure that
292 * the packed static coeff_token_vlc table sizes
293 * were initialized correctly.
295 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
298 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300 init_vlc(&chroma_dc_total_zeros_vlc[i],
301 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302 &chroma_dc_total_zeros_len [i][0], 1, 1,
303 &chroma_dc_total_zeros_bits[i][0], 1, 1,
304 INIT_VLC_USE_NEW_STATIC);
307 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309 init_vlc(&total_zeros_vlc[i],
310 TOTAL_ZEROS_VLC_BITS, 16,
311 &total_zeros_len [i][0], 1, 1,
312 &total_zeros_bits[i][0], 1, 1,
313 INIT_VLC_USE_NEW_STATIC);
317 run_vlc[i].table = run_vlc_tables[i];
318 run_vlc[i].table_allocated = run_vlc_tables_size;
319 init_vlc(&run_vlc[i],
321 &run_len [i][0], 1, 1,
322 &run_bits[i][0], 1, 1,
323 INIT_VLC_USE_NEW_STATIC);
325 run7_vlc.table = run7_vlc_table,
326 run7_vlc.table_allocated = run7_vlc_table_size;
327 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328 &run_len [6][0], 1, 1,
329 &run_bits[6][0], 1, 1,
330 INIT_VLC_USE_NEW_STATIC);
332 init_cavlc_level_tab();
339 static inline int get_level_prefix(GetBitContext *gb){
344 UPDATE_CACHE(re, gb);
345 buf=GET_CACHE(re, gb);
347 log= 32 - av_log2(buf);
349 print_bin(buf>>(32-log), log);
350 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
353 LAST_SKIP_BITS(re, gb, log);
354 CLOSE_READER(re, gb);
360 * decodes a residual block.
361 * @param n block index
362 * @param scantable scantable
363 * @param max_coeff number of coefficients in the block
364 * @return <0 if an error occurred
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367 MpegEncContext * const s = &h->s;
368 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
370 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
372 //FIXME put trailing_onex into the context
374 if(n == CHROMA_DC_BLOCK_INDEX){
375 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376 total_coeff= coeff_token>>2;
378 if(n == LUMA_DC_BLOCK_INDEX){
379 total_coeff= pred_non_zero_count(h, 0);
380 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381 total_coeff= coeff_token>>2;
383 total_coeff= pred_non_zero_count(h, n);
384 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385 total_coeff= coeff_token>>2;
386 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
390 //FIXME set last_non_zero?
394 if(total_coeff > (unsigned)max_coeff) {
395 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
399 trailing_ones= coeff_token&3;
400 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401 assert(total_coeff<=16);
403 i = show_bits(gb, 3);
404 skip_bits(gb, trailing_ones);
405 level[0] = 1-((i&4)>>1);
406 level[1] = 1-((i&2) );
407 level[2] = 1-((i&1)<<1);
409 if(trailing_ones<total_coeff) {
411 int suffix_length = total_coeff > 10 & trailing_ones < 3;
412 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
415 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416 if(level_code >= 100){
417 prefix= level_code - 100;
418 if(prefix == LEVEL_TAB_BITS)
419 prefix += get_level_prefix(gb);
421 //first coefficient has suffix_length equal to 0 or 1
422 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
424 level_code= (prefix<<1) + get_bits1(gb); //part
426 level_code= prefix; //part
427 }else if(prefix==14){
429 level_code= (prefix<<1) + get_bits1(gb); //part
431 level_code= prefix + get_bits(gb, 4); //part
433 level_code= 30 + get_bits(gb, prefix-3); //part
435 level_code += (1<<(prefix-3))-4096;
438 if(trailing_ones < 3) level_code += 2;
441 mask= -(level_code&1);
442 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
444 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
446 suffix_length = 1 + (level_code + 3U > 6U);
447 level[trailing_ones]= level_code;
450 //remaining coefficients have suffix_length > 0
451 for(i=trailing_ones+1;i<total_coeff;i++) {
452 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
453 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
454 level_code= cavlc_level_tab[suffix_length][bitsi][0];
456 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
457 if(level_code >= 100){
458 prefix= level_code - 100;
459 if(prefix == LEVEL_TAB_BITS){
460 prefix += get_level_prefix(gb);
463 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
465 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
467 level_code += (1<<(prefix-3))-4096;
469 mask= -(level_code&1);
470 level_code= (((2+level_code)>>1) ^ mask) - mask;
472 level[i]= level_code;
473 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
477 if(total_coeff == max_coeff)
480 if(n == CHROMA_DC_BLOCK_INDEX)
481 zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
483 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
486 coeff_num = zeros_left + total_coeff - 1;
487 j = scantable[coeff_num];
490 for(i=1;i<total_coeff;i++) {
493 else if(zeros_left < 7){
494 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
496 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
498 zeros_left -= run_before;
499 coeff_num -= 1 + run_before;
500 j= scantable[ coeff_num ];
505 block[j] = (level[0] * qmul[j] + 32)>>6;
506 for(i=1;i<total_coeff;i++) {
509 else if(zeros_left < 7){
510 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
512 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
514 zeros_left -= run_before;
515 coeff_num -= 1 + run_before;
516 j= scantable[ coeff_num ];
518 block[j]= (level[i] * qmul[j] + 32)>>6;
523 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
530 int ff_h264_decode_mb_cavlc(H264Context *h){
531 MpegEncContext * const s = &h->s;
534 unsigned int mb_type, cbp;
535 int dct8x8_allowed= h->pps.transform_8x8_mode;
537 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
539 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
540 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
542 if(h->slice_type_nos != FF_I_TYPE){
543 if(s->mb_skip_run==-1)
544 s->mb_skip_run= get_ue_golomb(&s->gb);
546 if (s->mb_skip_run--) {
547 if(FRAME_MBAFF && (s->mb_y&1) == 0){
548 if(s->mb_skip_run==0)
549 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
556 if( (s->mb_y&1) == 0 )
557 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
560 h->prev_mb_skipped= 0;
562 mb_type= get_ue_golomb(&s->gb);
563 if(h->slice_type_nos == FF_B_TYPE){
565 partition_count= b_mb_type_info[mb_type].partition_count;
566 mb_type= b_mb_type_info[mb_type].type;
569 goto decode_intra_mb;
571 }else if(h->slice_type_nos == FF_P_TYPE){
573 partition_count= p_mb_type_info[mb_type].partition_count;
574 mb_type= p_mb_type_info[mb_type].type;
577 goto decode_intra_mb;
580 assert(h->slice_type_nos == FF_I_TYPE);
581 if(h->slice_type == FF_SI_TYPE && mb_type)
585 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
589 cbp= i_mb_type_info[mb_type].cbp;
590 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
591 mb_type= i_mb_type_info[mb_type].type;
595 mb_type |= MB_TYPE_INTERLACED;
597 h->slice_table[ mb_xy ]= h->slice_num;
599 if(IS_INTRA_PCM(mb_type)){
602 // We assume these blocks are very rare so we do not optimize it.
603 align_get_bits(&s->gb);
605 // The pixels are stored in the same order as levels in h->mb array.
606 for(x=0; x < (CHROMA ? 384 : 256); x++){
607 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
610 // In deblocking, the quantizer is 0
611 s->current_picture.qscale_table[mb_xy]= 0;
612 // All coeffs are present
613 memset(h->non_zero_count[mb_xy], 16, 32);
615 s->current_picture.mb_type[mb_xy]= mb_type;
620 h->ref_count[0] <<= 1;
621 h->ref_count[1] <<= 1;
624 fill_decode_neighbors(h, mb_type);
625 fill_decode_caches(h, mb_type);
628 if(IS_INTRA(mb_type)){
630 // init_top_left_availability(h);
631 if(IS_INTRA4x4(mb_type)){
634 if(dct8x8_allowed && get_bits1(&s->gb)){
635 mb_type |= MB_TYPE_8x8DCT;
639 // fill_intra4x4_pred_table(h);
640 for(i=0; i<16; i+=di){
641 int mode= pred_intra_mode(h, i);
643 if(!get_bits1(&s->gb)){
644 const int rem_mode= get_bits(&s->gb, 3);
645 mode = rem_mode + (rem_mode >= mode);
649 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
651 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
653 ff_h264_write_back_intra_pred_mode(h);
654 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
657 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
658 if(h->intra16x16_pred_mode < 0)
662 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
665 h->chroma_pred_mode= pred_mode;
667 }else if(partition_count==4){
668 int i, j, sub_partition_count[4], list, ref[2][4];
670 if(h->slice_type_nos == FF_B_TYPE){
672 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
673 if(h->sub_mb_type[i] >=13){
674 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
677 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
678 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
680 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
681 ff_h264_pred_direct_motion(h, &mb_type);
682 h->ref_cache[0][scan8[4]] =
683 h->ref_cache[1][scan8[4]] =
684 h->ref_cache[0][scan8[12]] =
685 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
688 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
690 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
691 if(h->sub_mb_type[i] >=4){
692 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
695 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
696 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
700 for(list=0; list<h->list_count; list++){
701 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
703 if(IS_DIRECT(h->sub_mb_type[i])) continue;
704 if(IS_DIR(h->sub_mb_type[i], 0, list)){
708 }else if(ref_count == 2){
709 tmp= get_bits1(&s->gb)^1;
711 tmp= get_ue_golomb_31(&s->gb);
713 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
726 dct8x8_allowed = get_dct8x8_allowed(h);
728 for(list=0; list<h->list_count; list++){
730 if(IS_DIRECT(h->sub_mb_type[i])) {
731 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
734 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
735 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
737 if(IS_DIR(h->sub_mb_type[i], 0, list)){
738 const int sub_mb_type= h->sub_mb_type[i];
739 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
740 for(j=0; j<sub_partition_count[i]; j++){
742 const int index= 4*i + block_width*j;
743 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
744 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
745 mx += get_se_golomb(&s->gb);
746 my += get_se_golomb(&s->gb);
747 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
749 if(IS_SUB_8X8(sub_mb_type)){
751 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
753 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
754 }else if(IS_SUB_8X4(sub_mb_type)){
755 mv_cache[ 1 ][0]= mx;
756 mv_cache[ 1 ][1]= my;
757 }else if(IS_SUB_4X8(sub_mb_type)){
758 mv_cache[ 8 ][0]= mx;
759 mv_cache[ 8 ][1]= my;
761 mv_cache[ 0 ][0]= mx;
762 mv_cache[ 0 ][1]= my;
765 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
771 }else if(IS_DIRECT(mb_type)){
772 ff_h264_pred_direct_motion(h, &mb_type);
773 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
776 //FIXME we should set ref_idx_l? to 0 if we use that later ...
777 if(IS_16X16(mb_type)){
778 for(list=0; list<h->list_count; list++){
780 if(IS_DIR(mb_type, 0, list)){
781 if(h->ref_count[list]==1){
783 }else if(h->ref_count[list]==2){
784 val= get_bits1(&s->gb)^1;
786 val= get_ue_golomb_31(&s->gb);
787 if(val >= h->ref_count[list]){
788 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
792 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
795 for(list=0; list<h->list_count; list++){
796 if(IS_DIR(mb_type, 0, list)){
797 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
798 mx += get_se_golomb(&s->gb);
799 my += get_se_golomb(&s->gb);
800 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
802 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
806 else if(IS_16X8(mb_type)){
807 for(list=0; list<h->list_count; list++){
810 if(IS_DIR(mb_type, i, list)){
811 if(h->ref_count[list] == 1){
813 }else if(h->ref_count[list] == 2){
814 val= get_bits1(&s->gb)^1;
816 val= get_ue_golomb_31(&s->gb);
817 if(val >= h->ref_count[list]){
818 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
823 val= LIST_NOT_USED&0xFF;
824 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
827 for(list=0; list<h->list_count; list++){
830 if(IS_DIR(mb_type, i, list)){
831 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
832 mx += get_se_golomb(&s->gb);
833 my += get_se_golomb(&s->gb);
834 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
836 val= pack16to32(mx,my);
839 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
843 assert(IS_8X16(mb_type));
844 for(list=0; list<h->list_count; list++){
847 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
848 if(h->ref_count[list]==1){
850 }else if(h->ref_count[list]==2){
851 val= get_bits1(&s->gb)^1;
853 val= get_ue_golomb_31(&s->gb);
854 if(val >= h->ref_count[list]){
855 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
860 val= LIST_NOT_USED&0xFF;
861 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
864 for(list=0; list<h->list_count; list++){
867 if(IS_DIR(mb_type, i, list)){
868 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
869 mx += get_se_golomb(&s->gb);
870 my += get_se_golomb(&s->gb);
871 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
873 val= pack16to32(mx,my);
876 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
882 if(IS_INTER(mb_type))
883 write_back_motion(h, mb_type);
885 if(!IS_INTRA16x16(mb_type)){
886 cbp= get_ue_golomb(&s->gb);
888 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
893 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
894 else cbp= golomb_to_inter_cbp [cbp];
896 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
897 else cbp= golomb_to_inter_cbp_gray[cbp];
901 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
902 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
905 h->cbp_table[mb_xy]= cbp;
906 s->current_picture.mb_type[mb_xy]= mb_type;
908 if(cbp || IS_INTRA16x16(mb_type)){
909 int i8x8, i4x4, chroma_idx;
911 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
912 const uint8_t *scan, *scan8x8, *dc_scan;
914 if(IS_INTERLACED(mb_type)){
915 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
916 scan= s->qscale ? h->field_scan : h->field_scan_q0;
917 dc_scan= luma_dc_field_scan;
919 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
920 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
921 dc_scan= luma_dc_zigzag_scan;
924 dquant= get_se_golomb(&s->gb);
928 if(((unsigned)s->qscale) > 51){
929 if(s->qscale<0) s->qscale+= 52;
931 if(((unsigned)s->qscale) > 51){
932 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
937 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
938 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
939 if(IS_INTRA16x16(mb_type)){
940 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
941 return -1; //FIXME continue if partitioned and other return -1 too
944 assert((cbp&15) == 0 || (cbp&15) == 15);
947 for(i8x8=0; i8x8<4; i8x8++){
948 for(i4x4=0; i4x4<4; i4x4++){
949 const int index= i4x4 + 4*i8x8;
950 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
956 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
959 for(i8x8=0; i8x8<4; i8x8++){
961 if(IS_8x8DCT(mb_type)){
962 DCTELEM *buf = &h->mb[64*i8x8];
964 for(i4x4=0; i4x4<4; i4x4++){
965 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
966 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
969 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
970 nnz[0] += nnz[1] + nnz[8] + nnz[9];
972 for(i4x4=0; i4x4<4; i4x4++){
973 const int index= i4x4 + 4*i8x8;
975 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
981 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
982 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
988 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
989 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
995 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
996 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
997 for(i4x4=0; i4x4<4; i4x4++){
998 const int index= 16 + 4*chroma_idx + i4x4;
999 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1005 uint8_t * const nnz= &h->non_zero_count_cache[0];
1006 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1007 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1010 uint8_t * const nnz= &h->non_zero_count_cache[0];
1011 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1012 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1013 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1015 s->current_picture.qscale_table[mb_xy]= s->qscale;
1016 write_back_non_zero_count(h);
1019 h->ref_count[0] >>= 1;
1020 h->ref_count[1] >>= 1;