2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264_cavlc.c
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
39 #include "x86/h264_i386.h"
45 static const uint8_t golomb_to_inter_cbp_gray[16]={
46 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
49 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
50 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
53 static const uint8_t chroma_dc_coeff_token_len[4*5]={
61 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
69 static const uint8_t coeff_token_len[4][4*17]={
72 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
73 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
74 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
75 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
79 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
80 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
81 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
82 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
86 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
87 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
88 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
89 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
93 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
94 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
95 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
96 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
100 static const uint8_t coeff_token_bits[4][4*17]={
103 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
104 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
105 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
106 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
110 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
111 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
112 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
113 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
117 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
118 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
119 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
120 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
124 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
125 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
126 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
127 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
131 static const uint8_t total_zeros_len[16][16]= {
132 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
133 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
134 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
135 {5,3,4,4,3,3,3,4,3,4,5,5,5},
136 {4,4,4,3,3,3,3,3,4,5,4,5},
137 {6,5,3,3,3,3,3,3,4,3,6},
138 {6,5,3,3,3,2,3,4,3,6},
149 static const uint8_t total_zeros_bits[16][16]= {
150 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
151 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
152 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
153 {3,7,5,4,6,5,4,3,3,2,2,1,0},
154 {5,4,3,7,6,5,4,3,2,1,1,0},
155 {1,1,7,6,5,4,3,2,1,1,0},
156 {1,1,5,4,3,3,2,1,1,0},
167 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
173 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
179 static const uint8_t run_len[7][16]={
186 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
189 static const uint8_t run_bits[7][16]={
196 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
199 static VLC coeff_token_vlc[4];
200 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
201 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
203 static VLC chroma_dc_coeff_token_vlc;
204 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
205 static const int chroma_dc_coeff_token_vlc_table_size = 256;
207 static VLC total_zeros_vlc[15];
208 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
209 static const int total_zeros_vlc_tables_size = 512;
211 static VLC chroma_dc_total_zeros_vlc[3];
212 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
213 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
215 static VLC run_vlc[6];
216 static VLC_TYPE run_vlc_tables[6][8][2];
217 static const int run_vlc_tables_size = 8;
220 static VLC_TYPE run7_vlc_table[96][2];
221 static const int run7_vlc_table_size = 96;
223 #define LEVEL_TAB_BITS 8
224 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
228 * gets the predicted number of non-zero coefficients.
229 * @param n block index
231 static inline int pred_non_zero_count(H264Context *h, int n){
232 const int index8= scan8[n];
233 const int left= h->non_zero_count_cache[index8 - 1];
234 const int top = h->non_zero_count_cache[index8 - 8];
237 if(i<64) i= (i+1)>>1;
239 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
244 static av_cold void init_cavlc_level_tab(void){
245 int suffix_length, mask;
248 for(suffix_length=0; suffix_length<7; suffix_length++){
249 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
250 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
251 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
253 mask= -(level_code&1);
254 level_code= (((2+level_code)>>1) ^ mask) - mask;
255 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
256 cavlc_level_tab[suffix_length][i][0]= level_code;
257 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
258 }else if(prefix + 1 <= LEVEL_TAB_BITS){
259 cavlc_level_tab[suffix_length][i][0]= prefix+100;
260 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
262 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
263 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
269 av_cold void ff_h264_decode_init_vlc(void){
277 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
278 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
279 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
280 &chroma_dc_coeff_token_len [0], 1, 1,
281 &chroma_dc_coeff_token_bits[0], 1, 1,
282 INIT_VLC_USE_NEW_STATIC);
286 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
287 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
288 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
289 &coeff_token_len [i][0], 1, 1,
290 &coeff_token_bits[i][0], 1, 1,
291 INIT_VLC_USE_NEW_STATIC);
292 offset += coeff_token_vlc_tables_size[i];
295 * This is a one time safety check to make sure that
296 * the packed static coeff_token_vlc table sizes
297 * were initialized correctly.
299 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
302 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
303 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
304 init_vlc(&chroma_dc_total_zeros_vlc[i],
305 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
306 &chroma_dc_total_zeros_len [i][0], 1, 1,
307 &chroma_dc_total_zeros_bits[i][0], 1, 1,
308 INIT_VLC_USE_NEW_STATIC);
311 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
312 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
313 init_vlc(&total_zeros_vlc[i],
314 TOTAL_ZEROS_VLC_BITS, 16,
315 &total_zeros_len [i][0], 1, 1,
316 &total_zeros_bits[i][0], 1, 1,
317 INIT_VLC_USE_NEW_STATIC);
321 run_vlc[i].table = run_vlc_tables[i];
322 run_vlc[i].table_allocated = run_vlc_tables_size;
323 init_vlc(&run_vlc[i],
325 &run_len [i][0], 1, 1,
326 &run_bits[i][0], 1, 1,
327 INIT_VLC_USE_NEW_STATIC);
329 run7_vlc.table = run7_vlc_table,
330 run7_vlc.table_allocated = run7_vlc_table_size;
331 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
332 &run_len [6][0], 1, 1,
333 &run_bits[6][0], 1, 1,
334 INIT_VLC_USE_NEW_STATIC);
336 init_cavlc_level_tab();
343 static inline int get_level_prefix(GetBitContext *gb){
348 UPDATE_CACHE(re, gb);
349 buf=GET_CACHE(re, gb);
351 log= 32 - av_log2(buf);
353 print_bin(buf>>(32-log), log);
354 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
357 LAST_SKIP_BITS(re, gb, log);
358 CLOSE_READER(re, gb);
364 * decodes a residual block.
365 * @param n block index
366 * @param scantable scantable
367 * @param max_coeff number of coefficients in the block
368 * @return <0 if an error occurred
370 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
371 MpegEncContext * const s = &h->s;
372 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
374 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
376 //FIXME put trailing_onex into the context
378 if(n == CHROMA_DC_BLOCK_INDEX){
379 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
380 total_coeff= coeff_token>>2;
382 if(n == LUMA_DC_BLOCK_INDEX){
383 total_coeff= pred_non_zero_count(h, 0);
384 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385 total_coeff= coeff_token>>2;
387 total_coeff= pred_non_zero_count(h, n);
388 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
389 total_coeff= coeff_token>>2;
390 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
394 //FIXME set last_non_zero?
398 if(total_coeff > (unsigned)max_coeff) {
399 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
403 trailing_ones= coeff_token&3;
404 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
405 assert(total_coeff<=16);
407 i = show_bits(gb, 3);
408 skip_bits(gb, trailing_ones);
409 level[0] = 1-((i&4)>>1);
410 level[1] = 1-((i&2) );
411 level[2] = 1-((i&1)<<1);
413 if(trailing_ones<total_coeff) {
415 int suffix_length = total_coeff > 10 && trailing_ones < 3;
416 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
417 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
419 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
420 if(level_code >= 100){
421 prefix= level_code - 100;
422 if(prefix == LEVEL_TAB_BITS)
423 prefix += get_level_prefix(gb);
425 //first coefficient has suffix_length equal to 0 or 1
426 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
428 level_code= (prefix<<1) + get_bits1(gb); //part
430 level_code= prefix; //part
431 }else if(prefix==14){
433 level_code= (prefix<<1) + get_bits1(gb); //part
435 level_code= prefix + get_bits(gb, 4); //part
437 level_code= 30 + get_bits(gb, prefix-3); //part
439 level_code += (1<<(prefix-3))-4096;
442 if(trailing_ones < 3) level_code += 2;
445 mask= -(level_code&1);
446 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
448 if(trailing_ones < 3) level_code += (level_code>>31)|1;
451 if(level_code + 3U > 6U)
453 level[trailing_ones]= level_code;
456 //remaining coefficients have suffix_length > 0
457 for(i=trailing_ones+1;i<total_coeff;i++) {
458 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
459 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
460 level_code= cavlc_level_tab[suffix_length][bitsi][0];
462 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
463 if(level_code >= 100){
464 prefix= level_code - 100;
465 if(prefix == LEVEL_TAB_BITS){
466 prefix += get_level_prefix(gb);
469 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
471 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
473 level_code += (1<<(prefix-3))-4096;
475 mask= -(level_code&1);
476 level_code= (((2+level_code)>>1) ^ mask) - mask;
478 level[i]= level_code;
480 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
485 if(total_coeff == max_coeff)
488 if(n == CHROMA_DC_BLOCK_INDEX)
489 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
491 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
494 coeff_num = zeros_left + total_coeff - 1;
495 j = scantable[coeff_num];
498 for(i=1;i<total_coeff;i++) {
501 else if(zeros_left < 7){
502 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
504 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
506 zeros_left -= run_before;
507 coeff_num -= 1 + run_before;
508 j= scantable[ coeff_num ];
513 block[j] = (level[0] * qmul[j] + 32)>>6;
514 for(i=1;i<total_coeff;i++) {
517 else if(zeros_left < 7){
518 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
520 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
522 zeros_left -= run_before;
523 coeff_num -= 1 + run_before;
524 j= scantable[ coeff_num ];
526 block[j]= (level[i] * qmul[j] + 32)>>6;
531 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
538 int ff_h264_decode_mb_cavlc(H264Context *h){
539 MpegEncContext * const s = &h->s;
542 unsigned int mb_type, cbp;
543 int dct8x8_allowed= h->pps.transform_8x8_mode;
545 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
547 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
548 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
550 if(h->slice_type_nos != FF_I_TYPE){
551 if(s->mb_skip_run==-1)
552 s->mb_skip_run= get_ue_golomb(&s->gb);
554 if (s->mb_skip_run--) {
555 if(FRAME_MBAFF && (s->mb_y&1) == 0){
556 if(s->mb_skip_run==0)
557 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
559 predict_field_decoding_flag(h);
566 if( (s->mb_y&1) == 0 )
567 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
570 h->prev_mb_skipped= 0;
572 mb_type= get_ue_golomb(&s->gb);
573 if(h->slice_type_nos == FF_B_TYPE){
575 partition_count= b_mb_type_info[mb_type].partition_count;
576 mb_type= b_mb_type_info[mb_type].type;
579 goto decode_intra_mb;
581 }else if(h->slice_type_nos == FF_P_TYPE){
583 partition_count= p_mb_type_info[mb_type].partition_count;
584 mb_type= p_mb_type_info[mb_type].type;
587 goto decode_intra_mb;
590 assert(h->slice_type_nos == FF_I_TYPE);
591 if(h->slice_type == FF_SI_TYPE && mb_type)
595 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
599 cbp= i_mb_type_info[mb_type].cbp;
600 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
601 mb_type= i_mb_type_info[mb_type].type;
605 mb_type |= MB_TYPE_INTERLACED;
607 h->slice_table[ mb_xy ]= h->slice_num;
609 if(IS_INTRA_PCM(mb_type)){
612 // We assume these blocks are very rare so we do not optimize it.
613 align_get_bits(&s->gb);
615 // The pixels are stored in the same order as levels in h->mb array.
616 for(x=0; x < (CHROMA ? 384 : 256); x++){
617 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
620 // In deblocking, the quantizer is 0
621 s->current_picture.qscale_table[mb_xy]= 0;
622 // All coeffs are present
623 memset(h->non_zero_count[mb_xy], 16, 32);
625 s->current_picture.mb_type[mb_xy]= mb_type;
630 h->ref_count[0] <<= 1;
631 h->ref_count[1] <<= 1;
634 fill_decode_caches(h, mb_type);
637 if(IS_INTRA(mb_type)){
639 // init_top_left_availability(h);
640 if(IS_INTRA4x4(mb_type)){
643 if(dct8x8_allowed && get_bits1(&s->gb)){
644 mb_type |= MB_TYPE_8x8DCT;
648 // fill_intra4x4_pred_table(h);
649 for(i=0; i<16; i+=di){
650 int mode= pred_intra_mode(h, i);
652 if(!get_bits1(&s->gb)){
653 const int rem_mode= get_bits(&s->gb, 3);
654 mode = rem_mode + (rem_mode >= mode);
658 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
660 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
662 ff_h264_write_back_intra_pred_mode(h);
663 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
666 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
667 if(h->intra16x16_pred_mode < 0)
671 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
674 h->chroma_pred_mode= pred_mode;
676 }else if(partition_count==4){
677 int i, j, sub_partition_count[4], list, ref[2][4];
679 if(h->slice_type_nos == FF_B_TYPE){
681 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
682 if(h->sub_mb_type[i] >=13){
683 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
686 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
687 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
689 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
690 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
691 ff_h264_pred_direct_motion(h, &mb_type);
692 h->ref_cache[0][scan8[4]] =
693 h->ref_cache[1][scan8[4]] =
694 h->ref_cache[0][scan8[12]] =
695 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
698 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
700 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
701 if(h->sub_mb_type[i] >=4){
702 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
705 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
706 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
710 for(list=0; list<h->list_count; list++){
711 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
713 if(IS_DIRECT(h->sub_mb_type[i])) continue;
714 if(IS_DIR(h->sub_mb_type[i], 0, list)){
718 }else if(ref_count == 2){
719 tmp= get_bits1(&s->gb)^1;
721 tmp= get_ue_golomb_31(&s->gb);
723 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
736 dct8x8_allowed = get_dct8x8_allowed(h);
738 for(list=0; list<h->list_count; list++){
740 if(IS_DIRECT(h->sub_mb_type[i])) {
741 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
744 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
745 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
747 if(IS_DIR(h->sub_mb_type[i], 0, list)){
748 const int sub_mb_type= h->sub_mb_type[i];
749 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
750 for(j=0; j<sub_partition_count[i]; j++){
752 const int index= 4*i + block_width*j;
753 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
754 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
755 mx += get_se_golomb(&s->gb);
756 my += get_se_golomb(&s->gb);
757 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
759 if(IS_SUB_8X8(sub_mb_type)){
761 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
763 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
764 }else if(IS_SUB_8X4(sub_mb_type)){
765 mv_cache[ 1 ][0]= mx;
766 mv_cache[ 1 ][1]= my;
767 }else if(IS_SUB_4X8(sub_mb_type)){
768 mv_cache[ 8 ][0]= mx;
769 mv_cache[ 8 ][1]= my;
771 mv_cache[ 0 ][0]= mx;
772 mv_cache[ 0 ][1]= my;
775 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
781 }else if(IS_DIRECT(mb_type)){
782 ff_h264_pred_direct_motion(h, &mb_type);
783 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
786 //FIXME we should set ref_idx_l? to 0 if we use that later ...
787 if(IS_16X16(mb_type)){
788 for(list=0; list<h->list_count; list++){
790 if(IS_DIR(mb_type, 0, list)){
791 if(h->ref_count[list]==1){
793 }else if(h->ref_count[list]==2){
794 val= get_bits1(&s->gb)^1;
796 val= get_ue_golomb_31(&s->gb);
797 if(val >= h->ref_count[list]){
798 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
803 val= LIST_NOT_USED&0xFF;
804 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
806 for(list=0; list<h->list_count; list++){
808 if(IS_DIR(mb_type, 0, list)){
809 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
810 mx += get_se_golomb(&s->gb);
811 my += get_se_golomb(&s->gb);
812 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
814 val= pack16to32(mx,my);
817 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
820 else if(IS_16X8(mb_type)){
821 for(list=0; list<h->list_count; list++){
824 if(IS_DIR(mb_type, i, list)){
825 if(h->ref_count[list] == 1){
827 }else if(h->ref_count[list] == 2){
828 val= get_bits1(&s->gb)^1;
830 val= get_ue_golomb_31(&s->gb);
831 if(val >= h->ref_count[list]){
832 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
837 val= LIST_NOT_USED&0xFF;
838 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
841 for(list=0; list<h->list_count; list++){
844 if(IS_DIR(mb_type, i, list)){
845 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
846 mx += get_se_golomb(&s->gb);
847 my += get_se_golomb(&s->gb);
848 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
850 val= pack16to32(mx,my);
853 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
857 assert(IS_8X16(mb_type));
858 for(list=0; list<h->list_count; list++){
861 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
862 if(h->ref_count[list]==1){
864 }else if(h->ref_count[list]==2){
865 val= get_bits1(&s->gb)^1;
867 val= get_ue_golomb_31(&s->gb);
868 if(val >= h->ref_count[list]){
869 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
874 val= LIST_NOT_USED&0xFF;
875 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
878 for(list=0; list<h->list_count; list++){
881 if(IS_DIR(mb_type, i, list)){
882 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
883 mx += get_se_golomb(&s->gb);
884 my += get_se_golomb(&s->gb);
885 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
887 val= pack16to32(mx,my);
890 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
896 if(IS_INTER(mb_type))
897 write_back_motion(h, mb_type);
899 if(!IS_INTRA16x16(mb_type)){
900 cbp= get_ue_golomb(&s->gb);
902 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
907 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
908 else cbp= golomb_to_inter_cbp [cbp];
910 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
911 else cbp= golomb_to_inter_cbp_gray[cbp];
915 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
916 if(get_bits1(&s->gb)){
917 mb_type |= MB_TYPE_8x8DCT;
921 h->cbp_table[mb_xy]= cbp;
922 s->current_picture.mb_type[mb_xy]= mb_type;
924 if(cbp || IS_INTRA16x16(mb_type)){
925 int i8x8, i4x4, chroma_idx;
927 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
928 const uint8_t *scan, *scan8x8, *dc_scan;
930 // fill_non_zero_count_cache(h);
932 if(IS_INTERLACED(mb_type)){
933 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
934 scan= s->qscale ? h->field_scan : h->field_scan_q0;
935 dc_scan= luma_dc_field_scan;
937 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
938 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
939 dc_scan= luma_dc_zigzag_scan;
942 dquant= get_se_golomb(&s->gb);
944 if( dquant > 25 || dquant < -26 ){
945 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
950 if(((unsigned)s->qscale) > 51){
951 if(s->qscale<0) s->qscale+= 52;
955 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
956 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
957 if(IS_INTRA16x16(mb_type)){
958 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
959 return -1; //FIXME continue if partitioned and other return -1 too
962 assert((cbp&15) == 0 || (cbp&15) == 15);
965 for(i8x8=0; i8x8<4; i8x8++){
966 for(i4x4=0; i4x4<4; i4x4++){
967 const int index= i4x4 + 4*i8x8;
968 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
974 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
977 for(i8x8=0; i8x8<4; i8x8++){
979 if(IS_8x8DCT(mb_type)){
980 DCTELEM *buf = &h->mb[64*i8x8];
982 for(i4x4=0; i4x4<4; i4x4++){
983 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
984 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
987 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
988 nnz[0] += nnz[1] + nnz[8] + nnz[9];
990 for(i4x4=0; i4x4<4; i4x4++){
991 const int index= i4x4 + 4*i8x8;
993 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
999 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
1000 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
1006 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1007 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
1013 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1014 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1015 for(i4x4=0; i4x4<4; i4x4++){
1016 const int index= 16 + 4*chroma_idx + i4x4;
1017 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1023 uint8_t * const nnz= &h->non_zero_count_cache[0];
1024 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1025 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1028 uint8_t * const nnz= &h->non_zero_count_cache[0];
1029 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1030 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1031 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1033 s->current_picture.qscale_table[mb_xy]= s->qscale;
1034 write_back_non_zero_count(h);
1037 h->ref_count[0] >>= 1;
1038 h->ref_count[1] >>= 1;