2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264_cavlc.c
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t coeff_token_len[4][4*17]={
68 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
69 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
70 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
71 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
75 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
76 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
77 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
78 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
82 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
83 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
84 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
85 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
89 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
92 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
96 static const uint8_t coeff_token_bits[4][4*17]={
99 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
100 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
101 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
102 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
106 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
107 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
108 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
109 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
113 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
114 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
115 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
116 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
120 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
121 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
122 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
123 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
127 static const uint8_t total_zeros_len[16][16]= {
128 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131 {5,3,4,4,3,3,3,4,3,4,5,5,5},
132 {4,4,4,3,3,3,3,3,4,5,4,5},
133 {6,5,3,3,3,3,3,3,4,3,6},
134 {6,5,3,3,3,2,3,4,3,6},
145 static const uint8_t total_zeros_bits[16][16]= {
146 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149 {3,7,5,4,6,5,4,3,3,2,2,1,0},
150 {5,4,3,7,6,5,4,3,2,1,1,0},
151 {1,1,7,6,5,4,3,2,1,1,0},
152 {1,1,5,4,3,3,2,1,1,0},
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
175 static const uint8_t run_len[7][16]={
182 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
185 static const uint8_t run_bits[7][16]={
192 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
224 * gets the predicted number of non-zero coefficients.
225 * @param n block index
227 static inline int pred_non_zero_count(H264Context *h, int n){
228 const int index8= scan8[n];
229 const int left= h->non_zero_count_cache[index8 - 1];
230 const int top = h->non_zero_count_cache[index8 - 8];
233 if(i<64) i= (i+1)>>1;
235 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
240 static av_cold void init_cavlc_level_tab(void){
241 int suffix_length, mask;
244 for(suffix_length=0; suffix_length<7; suffix_length++){
245 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
249 mask= -(level_code&1);
250 level_code= (((2+level_code)>>1) ^ mask) - mask;
251 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252 cavlc_level_tab[suffix_length][i][0]= level_code;
253 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254 }else if(prefix + 1 <= LEVEL_TAB_BITS){
255 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
258 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
265 av_cold void ff_h264_decode_init_vlc(void){
273 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276 &chroma_dc_coeff_token_len [0], 1, 1,
277 &chroma_dc_coeff_token_bits[0], 1, 1,
278 INIT_VLC_USE_NEW_STATIC);
282 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285 &coeff_token_len [i][0], 1, 1,
286 &coeff_token_bits[i][0], 1, 1,
287 INIT_VLC_USE_NEW_STATIC);
288 offset += coeff_token_vlc_tables_size[i];
291 * This is a one time safety check to make sure that
292 * the packed static coeff_token_vlc table sizes
293 * were initialized correctly.
295 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
298 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300 init_vlc(&chroma_dc_total_zeros_vlc[i],
301 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302 &chroma_dc_total_zeros_len [i][0], 1, 1,
303 &chroma_dc_total_zeros_bits[i][0], 1, 1,
304 INIT_VLC_USE_NEW_STATIC);
307 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309 init_vlc(&total_zeros_vlc[i],
310 TOTAL_ZEROS_VLC_BITS, 16,
311 &total_zeros_len [i][0], 1, 1,
312 &total_zeros_bits[i][0], 1, 1,
313 INIT_VLC_USE_NEW_STATIC);
317 run_vlc[i].table = run_vlc_tables[i];
318 run_vlc[i].table_allocated = run_vlc_tables_size;
319 init_vlc(&run_vlc[i],
321 &run_len [i][0], 1, 1,
322 &run_bits[i][0], 1, 1,
323 INIT_VLC_USE_NEW_STATIC);
325 run7_vlc.table = run7_vlc_table,
326 run7_vlc.table_allocated = run7_vlc_table_size;
327 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328 &run_len [6][0], 1, 1,
329 &run_bits[6][0], 1, 1,
330 INIT_VLC_USE_NEW_STATIC);
332 init_cavlc_level_tab();
339 static inline int get_level_prefix(GetBitContext *gb){
344 UPDATE_CACHE(re, gb);
345 buf=GET_CACHE(re, gb);
347 log= 32 - av_log2(buf);
349 print_bin(buf>>(32-log), log);
350 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
353 LAST_SKIP_BITS(re, gb, log);
354 CLOSE_READER(re, gb);
360 * decodes a residual block.
361 * @param n block index
362 * @param scantable scantable
363 * @param max_coeff number of coefficients in the block
364 * @return <0 if an error occurred
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367 MpegEncContext * const s = &h->s;
368 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
370 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
372 //FIXME put trailing_onex into the context
374 if(n == CHROMA_DC_BLOCK_INDEX){
375 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376 total_coeff= coeff_token>>2;
378 if(n == LUMA_DC_BLOCK_INDEX){
379 total_coeff= pred_non_zero_count(h, 0);
380 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381 total_coeff= coeff_token>>2;
383 total_coeff= pred_non_zero_count(h, n);
384 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385 total_coeff= coeff_token>>2;
386 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
390 //FIXME set last_non_zero?
394 if(total_coeff > (unsigned)max_coeff) {
395 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
399 trailing_ones= coeff_token&3;
400 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401 assert(total_coeff<=16);
403 i = show_bits(gb, 3);
404 skip_bits(gb, trailing_ones);
405 level[0] = 1-((i&4)>>1);
406 level[1] = 1-((i&2) );
407 level[2] = 1-((i&1)<<1);
409 if(trailing_ones<total_coeff) {
411 int suffix_length = total_coeff > 10 & trailing_ones < 3;
412 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
415 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416 if(level_code >= 100){
417 prefix= level_code - 100;
418 if(prefix == LEVEL_TAB_BITS)
419 prefix += get_level_prefix(gb);
421 //first coefficient has suffix_length equal to 0 or 1
422 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
424 level_code= (prefix<<1) + get_bits1(gb); //part
426 level_code= prefix; //part
427 }else if(prefix==14){
429 level_code= (prefix<<1) + get_bits1(gb); //part
431 level_code= prefix + get_bits(gb, 4); //part
433 level_code= 30 + get_bits(gb, prefix-3); //part
435 level_code += (1<<(prefix-3))-4096;
438 if(trailing_ones < 3) level_code += 2;
441 mask= -(level_code&1);
442 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
444 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
447 if(level_code + 3U > 6U)
449 level[trailing_ones]= level_code;
452 //remaining coefficients have suffix_length > 0
453 for(i=trailing_ones+1;i<total_coeff;i++) {
454 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
455 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
456 level_code= cavlc_level_tab[suffix_length][bitsi][0];
458 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
459 if(level_code >= 100){
460 prefix= level_code - 100;
461 if(prefix == LEVEL_TAB_BITS){
462 prefix += get_level_prefix(gb);
465 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
467 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
469 level_code += (1<<(prefix-3))-4096;
471 mask= -(level_code&1);
472 level_code= (((2+level_code)>>1) ^ mask) - mask;
474 level[i]= level_code;
476 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
481 if(total_coeff == max_coeff)
484 if(n == CHROMA_DC_BLOCK_INDEX)
485 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
487 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
490 coeff_num = zeros_left + total_coeff - 1;
491 j = scantable[coeff_num];
494 for(i=1;i<total_coeff;i++) {
497 else if(zeros_left < 7){
498 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
500 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
502 zeros_left -= run_before;
503 coeff_num -= 1 + run_before;
504 j= scantable[ coeff_num ];
509 block[j] = (level[0] * qmul[j] + 32)>>6;
510 for(i=1;i<total_coeff;i++) {
513 else if(zeros_left < 7){
514 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
516 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
518 zeros_left -= run_before;
519 coeff_num -= 1 + run_before;
520 j= scantable[ coeff_num ];
522 block[j]= (level[i] * qmul[j] + 32)>>6;
527 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
534 int ff_h264_decode_mb_cavlc(H264Context *h){
535 MpegEncContext * const s = &h->s;
538 unsigned int mb_type, cbp;
539 int dct8x8_allowed= h->pps.transform_8x8_mode;
541 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
543 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
544 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
546 if(h->slice_type_nos != FF_I_TYPE){
547 if(s->mb_skip_run==-1)
548 s->mb_skip_run= get_ue_golomb(&s->gb);
550 if (s->mb_skip_run--) {
551 if(FRAME_MBAFF && (s->mb_y&1) == 0){
552 if(s->mb_skip_run==0)
553 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
555 predict_field_decoding_flag(h);
562 if( (s->mb_y&1) == 0 )
563 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
566 h->prev_mb_skipped= 0;
568 mb_type= get_ue_golomb(&s->gb);
569 if(h->slice_type_nos == FF_B_TYPE){
571 partition_count= b_mb_type_info[mb_type].partition_count;
572 mb_type= b_mb_type_info[mb_type].type;
575 goto decode_intra_mb;
577 }else if(h->slice_type_nos == FF_P_TYPE){
579 partition_count= p_mb_type_info[mb_type].partition_count;
580 mb_type= p_mb_type_info[mb_type].type;
583 goto decode_intra_mb;
586 assert(h->slice_type_nos == FF_I_TYPE);
587 if(h->slice_type == FF_SI_TYPE && mb_type)
591 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
595 cbp= i_mb_type_info[mb_type].cbp;
596 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
597 mb_type= i_mb_type_info[mb_type].type;
601 mb_type |= MB_TYPE_INTERLACED;
603 h->slice_table[ mb_xy ]= h->slice_num;
605 if(IS_INTRA_PCM(mb_type)){
608 // We assume these blocks are very rare so we do not optimize it.
609 align_get_bits(&s->gb);
611 // The pixels are stored in the same order as levels in h->mb array.
612 for(x=0; x < (CHROMA ? 384 : 256); x++){
613 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
616 // In deblocking, the quantizer is 0
617 s->current_picture.qscale_table[mb_xy]= 0;
618 // All coeffs are present
619 memset(h->non_zero_count[mb_xy], 16, 32);
621 s->current_picture.mb_type[mb_xy]= mb_type;
626 h->ref_count[0] <<= 1;
627 h->ref_count[1] <<= 1;
630 fill_decode_caches(h, mb_type);
633 if(IS_INTRA(mb_type)){
635 // init_top_left_availability(h);
636 if(IS_INTRA4x4(mb_type)){
639 if(dct8x8_allowed && get_bits1(&s->gb)){
640 mb_type |= MB_TYPE_8x8DCT;
644 // fill_intra4x4_pred_table(h);
645 for(i=0; i<16; i+=di){
646 int mode= pred_intra_mode(h, i);
648 if(!get_bits1(&s->gb)){
649 const int rem_mode= get_bits(&s->gb, 3);
650 mode = rem_mode + (rem_mode >= mode);
654 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
656 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
658 ff_h264_write_back_intra_pred_mode(h);
659 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
662 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
663 if(h->intra16x16_pred_mode < 0)
667 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
670 h->chroma_pred_mode= pred_mode;
672 }else if(partition_count==4){
673 int i, j, sub_partition_count[4], list, ref[2][4];
675 if(h->slice_type_nos == FF_B_TYPE){
677 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
678 if(h->sub_mb_type[i] >=13){
679 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
682 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
683 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
685 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
686 ff_h264_pred_direct_motion(h, &mb_type);
687 h->ref_cache[0][scan8[4]] =
688 h->ref_cache[1][scan8[4]] =
689 h->ref_cache[0][scan8[12]] =
690 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
693 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
695 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
696 if(h->sub_mb_type[i] >=4){
697 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
700 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
701 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
705 for(list=0; list<h->list_count; list++){
706 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
708 if(IS_DIRECT(h->sub_mb_type[i])) continue;
709 if(IS_DIR(h->sub_mb_type[i], 0, list)){
713 }else if(ref_count == 2){
714 tmp= get_bits1(&s->gb)^1;
716 tmp= get_ue_golomb_31(&s->gb);
718 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
731 dct8x8_allowed = get_dct8x8_allowed(h);
733 for(list=0; list<h->list_count; list++){
735 if(IS_DIRECT(h->sub_mb_type[i])) {
736 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
739 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
740 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
742 if(IS_DIR(h->sub_mb_type[i], 0, list)){
743 const int sub_mb_type= h->sub_mb_type[i];
744 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
745 for(j=0; j<sub_partition_count[i]; j++){
747 const int index= 4*i + block_width*j;
748 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
749 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
750 mx += get_se_golomb(&s->gb);
751 my += get_se_golomb(&s->gb);
752 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
754 if(IS_SUB_8X8(sub_mb_type)){
756 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
758 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
759 }else if(IS_SUB_8X4(sub_mb_type)){
760 mv_cache[ 1 ][0]= mx;
761 mv_cache[ 1 ][1]= my;
762 }else if(IS_SUB_4X8(sub_mb_type)){
763 mv_cache[ 8 ][0]= mx;
764 mv_cache[ 8 ][1]= my;
766 mv_cache[ 0 ][0]= mx;
767 mv_cache[ 0 ][1]= my;
770 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
776 }else if(IS_DIRECT(mb_type)){
777 ff_h264_pred_direct_motion(h, &mb_type);
778 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
781 //FIXME we should set ref_idx_l? to 0 if we use that later ...
782 if(IS_16X16(mb_type)){
783 for(list=0; list<h->list_count; list++){
785 if(IS_DIR(mb_type, 0, list)){
786 if(h->ref_count[list]==1){
788 }else if(h->ref_count[list]==2){
789 val= get_bits1(&s->gb)^1;
791 val= get_ue_golomb_31(&s->gb);
792 if(val >= h->ref_count[list]){
793 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
798 val= LIST_NOT_USED&0xFF;
799 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
801 for(list=0; list<h->list_count; list++){
803 if(IS_DIR(mb_type, 0, list)){
804 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
805 mx += get_se_golomb(&s->gb);
806 my += get_se_golomb(&s->gb);
807 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
809 val= pack16to32(mx,my);
812 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
815 else if(IS_16X8(mb_type)){
816 for(list=0; list<h->list_count; list++){
819 if(IS_DIR(mb_type, i, list)){
820 if(h->ref_count[list] == 1){
822 }else if(h->ref_count[list] == 2){
823 val= get_bits1(&s->gb)^1;
825 val= get_ue_golomb_31(&s->gb);
826 if(val >= h->ref_count[list]){
827 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
832 val= LIST_NOT_USED&0xFF;
833 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
836 for(list=0; list<h->list_count; list++){
839 if(IS_DIR(mb_type, i, list)){
840 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
841 mx += get_se_golomb(&s->gb);
842 my += get_se_golomb(&s->gb);
843 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
845 val= pack16to32(mx,my);
848 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
852 assert(IS_8X16(mb_type));
853 for(list=0; list<h->list_count; list++){
856 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
857 if(h->ref_count[list]==1){
859 }else if(h->ref_count[list]==2){
860 val= get_bits1(&s->gb)^1;
862 val= get_ue_golomb_31(&s->gb);
863 if(val >= h->ref_count[list]){
864 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
869 val= LIST_NOT_USED&0xFF;
870 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
873 for(list=0; list<h->list_count; list++){
876 if(IS_DIR(mb_type, i, list)){
877 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
878 mx += get_se_golomb(&s->gb);
879 my += get_se_golomb(&s->gb);
880 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
882 val= pack16to32(mx,my);
885 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
891 if(IS_INTER(mb_type))
892 write_back_motion(h, mb_type);
894 if(!IS_INTRA16x16(mb_type)){
895 cbp= get_ue_golomb(&s->gb);
897 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
902 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
903 else cbp= golomb_to_inter_cbp [cbp];
905 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
906 else cbp= golomb_to_inter_cbp_gray[cbp];
910 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
911 if(get_bits1(&s->gb)){
912 mb_type |= MB_TYPE_8x8DCT;
916 h->cbp_table[mb_xy]= cbp;
917 s->current_picture.mb_type[mb_xy]= mb_type;
919 if(cbp || IS_INTRA16x16(mb_type)){
920 int i8x8, i4x4, chroma_idx;
922 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
923 const uint8_t *scan, *scan8x8, *dc_scan;
925 // fill_non_zero_count_cache(h);
927 if(IS_INTERLACED(mb_type)){
928 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
929 scan= s->qscale ? h->field_scan : h->field_scan_q0;
930 dc_scan= luma_dc_field_scan;
932 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
933 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
934 dc_scan= luma_dc_zigzag_scan;
937 dquant= get_se_golomb(&s->gb);
941 if(((unsigned)s->qscale) > 51){
942 if(s->qscale<0) s->qscale+= 52;
944 if(((unsigned)s->qscale) > 51){
945 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
950 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
951 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
952 if(IS_INTRA16x16(mb_type)){
953 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
954 return -1; //FIXME continue if partitioned and other return -1 too
957 assert((cbp&15) == 0 || (cbp&15) == 15);
960 for(i8x8=0; i8x8<4; i8x8++){
961 for(i4x4=0; i4x4<4; i4x4++){
962 const int index= i4x4 + 4*i8x8;
963 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
969 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
972 for(i8x8=0; i8x8<4; i8x8++){
974 if(IS_8x8DCT(mb_type)){
975 DCTELEM *buf = &h->mb[64*i8x8];
977 for(i4x4=0; i4x4<4; i4x4++){
978 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
979 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
982 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
983 nnz[0] += nnz[1] + nnz[8] + nnz[9];
985 for(i4x4=0; i4x4<4; i4x4++){
986 const int index= i4x4 + 4*i8x8;
988 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
994 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
995 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
1001 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1002 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
1008 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1009 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1010 for(i4x4=0; i4x4<4; i4x4++){
1011 const int index= 16 + 4*chroma_idx + i4x4;
1012 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1018 uint8_t * const nnz= &h->non_zero_count_cache[0];
1019 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1020 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1023 uint8_t * const nnz= &h->non_zero_count_cache[0];
1024 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1025 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1026 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1028 s->current_picture.qscale_table[mb_xy]= s->qscale;
1029 write_back_non_zero_count(h);
1032 h->ref_count[0] >>= 1;
1033 h->ref_count[1] >>= 1;