2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
33 #include "h264data.h" // FIXME FIXME FIXME
34 #include "h264_mvpred.h"
36 #include "mpegutils.h"
40 static const uint8_t golomb_to_inter_cbp_gray[16]={
41 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
44 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
45 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
48 static const uint8_t chroma_dc_coeff_token_len[4*5]={
56 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
64 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
76 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
88 static const uint8_t coeff_token_len[4][4*17]={
91 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
92 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
93 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
94 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
98 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
99 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
100 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
101 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
105 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
106 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
107 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
108 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
112 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
113 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
119 static const uint8_t coeff_token_bits[4][4*17]={
122 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
123 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
124 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
125 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
129 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
130 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
131 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
132 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
136 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
137 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
138 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
139 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
143 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
144 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
145 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
146 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
150 static const uint8_t total_zeros_len[16][16]= {
151 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
152 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
153 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
154 {5,3,4,4,3,3,3,4,3,4,5,5,5},
155 {4,4,4,3,3,3,3,3,4,5,4,5},
156 {6,5,3,3,3,3,3,3,4,3,6},
157 {6,5,3,3,3,2,3,4,3,6},
168 static const uint8_t total_zeros_bits[16][16]= {
169 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
170 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
171 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
172 {3,7,5,4,6,5,4,3,3,2,2,1,0},
173 {5,4,3,7,6,5,4,3,2,1,1,0},
174 {1,1,7,6,5,4,3,2,1,1,0},
175 {1,1,5,4,3,3,2,1,1,0},
186 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
192 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
198 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
199 { 1, 3, 3, 4, 4, 4, 5, 5 },
200 { 3, 2, 3, 3, 3, 3, 3 },
201 { 3, 3, 2, 2, 3, 3 },
208 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
209 { 1, 2, 3, 2, 3, 1, 1, 0 },
210 { 0, 1, 1, 4, 5, 6, 7 },
211 { 0, 1, 1, 2, 6, 7 },
218 static const uint8_t run_len[7][16]={
225 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
228 static const uint8_t run_bits[7][16]={
235 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
238 static VLC coeff_token_vlc[4];
239 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
240 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 static VLC chroma_dc_coeff_token_vlc;
243 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
244 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246 static VLC chroma422_dc_coeff_token_vlc;
247 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
248 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250 static VLC total_zeros_vlc[15];
251 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
252 static const int total_zeros_vlc_tables_size = 512;
254 static VLC chroma_dc_total_zeros_vlc[3];
255 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
256 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258 static VLC chroma422_dc_total_zeros_vlc[7];
259 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
260 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262 static VLC run_vlc[6];
263 static VLC_TYPE run_vlc_tables[6][8][2];
264 static const int run_vlc_tables_size = 8;
267 static VLC_TYPE run7_vlc_table[96][2];
268 static const int run7_vlc_table_size = 96;
270 #define LEVEL_TAB_BITS 8
271 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
274 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
275 #define COEFF_TOKEN_VLC_BITS 8
276 #define TOTAL_ZEROS_VLC_BITS 9
277 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
278 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
279 #define RUN_VLC_BITS 3
280 #define RUN7_VLC_BITS 6
283 * Get the predicted number of non-zero coefficients.
284 * @param n block index
286 static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
288 const int index8= scan8[n];
289 const int left = sl->non_zero_count_cache[index8 - 1];
290 const int top = sl->non_zero_count_cache[index8 - 8];
293 if(i<64) i= (i+1)>>1;
295 ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
300 static av_cold void init_cavlc_level_tab(void){
304 for(suffix_length=0; suffix_length<7; suffix_length++){
305 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309 int level_code = (prefix << suffix_length) +
310 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311 int mask = -(level_code&1);
312 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313 cavlc_level_tab[suffix_length][i][0]= level_code;
314 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315 }else if(prefix + 1 <= LEVEL_TAB_BITS){
316 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
326 av_cold void ff_h264_decode_init_vlc(void){
334 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337 &chroma_dc_coeff_token_len [0], 1, 1,
338 &chroma_dc_coeff_token_bits[0], 1, 1,
339 INIT_VLC_USE_NEW_STATIC);
341 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344 &chroma422_dc_coeff_token_len [0], 1, 1,
345 &chroma422_dc_coeff_token_bits[0], 1, 1,
346 INIT_VLC_USE_NEW_STATIC);
350 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353 &coeff_token_len [i][0], 1, 1,
354 &coeff_token_bits[i][0], 1, 1,
355 INIT_VLC_USE_NEW_STATIC);
356 offset += coeff_token_vlc_tables_size[i];
359 * This is a one time safety check to make sure that
360 * the packed static coeff_token_vlc table sizes
361 * were initialized correctly.
363 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
366 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368 init_vlc(&chroma_dc_total_zeros_vlc[i],
369 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370 &chroma_dc_total_zeros_len [i][0], 1, 1,
371 &chroma_dc_total_zeros_bits[i][0], 1, 1,
372 INIT_VLC_USE_NEW_STATIC);
376 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378 init_vlc(&chroma422_dc_total_zeros_vlc[i],
379 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380 &chroma422_dc_total_zeros_len [i][0], 1, 1,
381 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382 INIT_VLC_USE_NEW_STATIC);
386 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388 init_vlc(&total_zeros_vlc[i],
389 TOTAL_ZEROS_VLC_BITS, 16,
390 &total_zeros_len [i][0], 1, 1,
391 &total_zeros_bits[i][0], 1, 1,
392 INIT_VLC_USE_NEW_STATIC);
396 run_vlc[i].table = run_vlc_tables[i];
397 run_vlc[i].table_allocated = run_vlc_tables_size;
398 init_vlc(&run_vlc[i],
400 &run_len [i][0], 1, 1,
401 &run_bits[i][0], 1, 1,
402 INIT_VLC_USE_NEW_STATIC);
404 run7_vlc.table = run7_vlc_table,
405 run7_vlc.table_allocated = run7_vlc_table_size;
406 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407 &run_len [6][0], 1, 1,
408 &run_bits[6][0], 1, 1,
409 INIT_VLC_USE_NEW_STATIC);
411 init_cavlc_level_tab();
418 static inline int get_level_prefix(GetBitContext *gb){
423 UPDATE_CACHE(re, gb);
424 buf=GET_CACHE(re, gb);
426 log= 32 - av_log2(buf);
428 print_bin(buf>>(32-log), log);
429 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
432 LAST_SKIP_BITS(re, gb, log);
433 CLOSE_READER(re, gb);
439 * Decode a residual block.
440 * @param n block index
441 * @param scantable scantable
442 * @param max_coeff number of coefficients in the block
443 * @return <0 if an error occurred
445 static int decode_residual(const H264Context *h, H264SliceContext *sl,
446 GetBitContext *gb, int16_t *block, int n,
447 const uint8_t *scantable, const uint32_t *qmul,
450 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
452 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
454 //FIXME put trailing_onex into the context
458 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
460 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
461 total_coeff= coeff_token>>2;
463 if(n >= LUMA_DC_BLOCK_INDEX){
464 total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
465 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466 total_coeff= coeff_token>>2;
468 total_coeff= pred_non_zero_count(h, sl, n);
469 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
470 total_coeff= coeff_token>>2;
473 sl->non_zero_count_cache[scan8[n]] = total_coeff;
475 //FIXME set last_non_zero?
479 if(total_coeff > (unsigned)max_coeff) {
480 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
484 trailing_ones= coeff_token&3;
485 ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
486 assert(total_coeff<=16);
488 i = show_bits(gb, 3);
489 skip_bits(gb, trailing_ones);
490 level[0] = 1-((i&4)>>1);
491 level[1] = 1-((i&2) );
492 level[2] = 1-((i&1)<<1);
494 if(trailing_ones<total_coeff) {
496 int suffix_length = total_coeff > 10 & trailing_ones < 3;
497 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
498 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
500 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
501 if(level_code >= 100){
502 prefix= level_code - 100;
503 if(prefix == LEVEL_TAB_BITS)
504 prefix += get_level_prefix(gb);
506 //first coefficient has suffix_length equal to 0 or 1
507 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
509 level_code= (prefix<<1) + get_bits1(gb); //part
511 level_code= prefix; //part
512 }else if(prefix==14){
514 level_code= (prefix<<1) + get_bits1(gb); //part
516 level_code= prefix + get_bits(gb, 4); //part
518 level_code= 30 + get_bits(gb, prefix-3); //part
521 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
524 level_code += (1<<(prefix-3))-4096;
528 if(trailing_ones < 3) level_code += 2;
531 mask= -(level_code&1);
532 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
534 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
536 suffix_length = 1 + (level_code + 3U > 6U);
537 level[trailing_ones]= level_code;
540 //remaining coefficients have suffix_length > 0
541 for(i=trailing_ones+1;i<total_coeff;i++) {
542 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
543 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
544 level_code= cavlc_level_tab[suffix_length][bitsi][0];
546 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
547 if(level_code >= 100){
548 prefix= level_code - 100;
549 if(prefix == LEVEL_TAB_BITS){
550 prefix += get_level_prefix(gb);
553 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
555 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
557 level_code += (1<<(prefix-3))-4096;
559 mask= -(level_code&1);
560 level_code= (((2+level_code)>>1) ^ mask) - mask;
562 level[i]= level_code;
563 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
567 if(total_coeff == max_coeff)
570 if (max_coeff <= 8) {
572 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
573 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
575 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
576 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
578 zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
582 #define STORE_BLOCK(type) \
583 scantable += zeros_left + total_coeff - 1; \
584 if(n >= LUMA_DC_BLOCK_INDEX){ \
585 ((type*)block)[*scantable] = level[0]; \
586 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
588 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
590 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
591 zeros_left -= run_before; \
592 scantable -= 1 + run_before; \
593 ((type*)block)[*scantable]= level[i]; \
595 for(;i<total_coeff;i++) { \
597 ((type*)block)[*scantable]= level[i]; \
600 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
601 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
603 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
605 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
606 zeros_left -= run_before; \
607 scantable -= 1 + run_before; \
608 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610 for(;i<total_coeff;i++) { \
612 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
616 if (zeros_left < 0) {
617 av_log(h->avctx, AV_LOG_ERROR,
618 "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
619 return AVERROR_INVALIDDATA;
622 if (h->pixel_shift) {
631 static av_always_inline
632 int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
633 GetBitContext *gb, const uint8_t *scan,
634 const uint8_t *scan8x8, int pixel_shift,
635 int mb_type, int cbp, int p)
638 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
639 if(IS_INTRA16x16(mb_type)){
640 AV_ZERO128(sl->mb_luma_dc[p]+0);
641 AV_ZERO128(sl->mb_luma_dc[p]+8);
642 AV_ZERO128(sl->mb_luma_dc[p]+16);
643 AV_ZERO128(sl->mb_luma_dc[p]+24);
644 if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
645 return -1; //FIXME continue if partitioned and other return -1 too
648 assert((cbp&15) == 0 || (cbp&15) == 15);
651 for(i8x8=0; i8x8<4; i8x8++){
652 for(i4x4=0; i4x4<4; i4x4++){
653 const int index= i4x4 + 4*i8x8 + p*16;
654 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
655 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
662 fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
666 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
667 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
669 for(i8x8=0; i8x8<4; i8x8++){
671 if(IS_8x8DCT(mb_type)){
672 int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
674 for(i4x4=0; i4x4<4; i4x4++){
675 const int index= i4x4 + 4*i8x8 + p*16;
676 if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
677 h->dequant8_coeff[cqm][qscale], 16) < 0 )
680 nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
681 nnz[0] += nnz[1] + nnz[8] + nnz[9];
682 new_cbp |= !!nnz[0] << i8x8;
684 for(i4x4=0; i4x4<4; i4x4++){
685 const int index= i4x4 + 4*i8x8 + p*16;
686 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
687 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
690 new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
694 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
695 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
702 int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
706 unsigned int mb_type, cbp;
707 int dct8x8_allowed= h->pps.transform_8x8_mode;
708 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
709 const int pixel_shift = h->pixel_shift;
711 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
713 ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, sl->mb_x, sl->mb_y);
714 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
716 if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
717 if (sl->mb_skip_run == -1)
718 sl->mb_skip_run = get_ue_golomb(&sl->gb);
720 if (sl->mb_skip_run--) {
721 if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
722 if (sl->mb_skip_run == 0)
723 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
725 decode_mb_skip(h, sl);
729 if (FRAME_MBAFF(h)) {
730 if ((sl->mb_y & 1) == 0)
731 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
734 sl->prev_mb_skipped = 0;
736 mb_type= get_ue_golomb(&sl->gb);
737 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
739 partition_count= b_mb_type_info[mb_type].partition_count;
740 mb_type= b_mb_type_info[mb_type].type;
743 goto decode_intra_mb;
745 } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
747 partition_count= p_mb_type_info[mb_type].partition_count;
748 mb_type= p_mb_type_info[mb_type].type;
751 goto decode_intra_mb;
754 assert(sl->slice_type_nos == AV_PICTURE_TYPE_I);
755 if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
759 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
763 cbp= i_mb_type_info[mb_type].cbp;
764 sl->intra16x16_pred_mode = i_mb_type_info[mb_type].pred_mode;
765 mb_type= i_mb_type_info[mb_type].type;
769 mb_type |= MB_TYPE_INTERLACED;
771 h->slice_table[mb_xy] = sl->slice_num;
773 if(IS_INTRA_PCM(mb_type)){
774 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
775 h->sps.bit_depth_luma;
777 // We assume these blocks are very rare so we do not optimize it.
778 sl->intra_pcm_ptr = align_get_bits(&sl->gb);
779 if (get_bits_left(&sl->gb) < mb_size) {
780 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
781 return AVERROR_INVALIDDATA;
783 skip_bits_long(&sl->gb, mb_size);
785 // In deblocking, the quantizer is 0
786 h->cur_pic.qscale_table[mb_xy] = 0;
787 // All coeffs are present
788 memset(h->non_zero_count[mb_xy], 16, 48);
790 h->cur_pic.mb_type[mb_xy] = mb_type;
794 fill_decode_neighbors(h, sl, mb_type);
795 fill_decode_caches(h, sl, mb_type);
798 if(IS_INTRA(mb_type)){
800 // init_top_left_availability(h);
801 if(IS_INTRA4x4(mb_type)){
804 if(dct8x8_allowed && get_bits1(&sl->gb)){
805 mb_type |= MB_TYPE_8x8DCT;
809 // fill_intra4x4_pred_table(h);
810 for(i=0; i<16; i+=di){
811 int mode = pred_intra_mode(h, sl, i);
813 if(!get_bits1(&sl->gb)){
814 const int rem_mode= get_bits(&sl->gb, 3);
815 mode = rem_mode + (rem_mode >= mode);
819 fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
821 sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
823 write_back_intra_pred_mode(h, sl);
824 if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
827 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, sl->intra16x16_pred_mode, 0);
828 if (sl->intra16x16_pred_mode < 0)
832 pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&sl->gb), 1);
835 sl->chroma_pred_mode = pred_mode;
837 sl->chroma_pred_mode = DC_128_PRED8x8;
839 }else if(partition_count==4){
840 int i, j, sub_partition_count[4], list, ref[2][4];
842 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
844 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
845 if(sl->sub_mb_type[i] >=13){
846 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
849 sub_partition_count[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
850 sl->sub_mb_type[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
852 if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
853 ff_h264_pred_direct_motion(h, sl, &mb_type);
854 sl->ref_cache[0][scan8[4]] =
855 sl->ref_cache[1][scan8[4]] =
856 sl->ref_cache[0][scan8[12]] =
857 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
860 assert(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
862 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
863 if(sl->sub_mb_type[i] >=4){
864 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
867 sub_partition_count[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
868 sl->sub_mb_type[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
872 for (list = 0; list < sl->list_count; list++) {
873 int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
875 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
876 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
880 }else if(ref_count == 2){
881 tmp= get_bits1(&sl->gb)^1;
883 tmp= get_ue_golomb_31(&sl->gb);
885 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
898 dct8x8_allowed = get_dct8x8_allowed(h, sl);
900 for (list = 0; list < sl->list_count; list++) {
902 if(IS_DIRECT(sl->sub_mb_type[i])) {
903 sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
906 sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
907 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
909 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
910 const int sub_mb_type= sl->sub_mb_type[i];
911 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
912 for(j=0; j<sub_partition_count[i]; j++){
914 const int index= 4*i + block_width*j;
915 int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
916 pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
917 mx += get_se_golomb(&sl->gb);
918 my += get_se_golomb(&sl->gb);
919 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
921 if(IS_SUB_8X8(sub_mb_type)){
923 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
925 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
926 }else if(IS_SUB_8X4(sub_mb_type)){
927 mv_cache[ 1 ][0]= mx;
928 mv_cache[ 1 ][1]= my;
929 }else if(IS_SUB_4X8(sub_mb_type)){
930 mv_cache[ 8 ][0]= mx;
931 mv_cache[ 8 ][1]= my;
933 mv_cache[ 0 ][0]= mx;
934 mv_cache[ 0 ][1]= my;
937 uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
943 }else if(IS_DIRECT(mb_type)){
944 ff_h264_pred_direct_motion(h, sl, &mb_type);
945 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
948 //FIXME we should set ref_idx_l? to 0 if we use that later ...
949 if(IS_16X16(mb_type)){
950 for (list = 0; list < sl->list_count; list++) {
952 if(IS_DIR(mb_type, 0, list)){
953 int rc = sl->ref_count[list] << MB_MBAFF(sl);
956 } else if (rc == 2) {
957 val= get_bits1(&sl->gb)^1;
959 val= get_ue_golomb_31(&sl->gb);
961 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
965 fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
968 for (list = 0; list < sl->list_count; list++) {
969 if(IS_DIR(mb_type, 0, list)){
970 pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
971 mx += get_se_golomb(&sl->gb);
972 my += get_se_golomb(&sl->gb);
973 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
975 fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
979 else if(IS_16X8(mb_type)){
980 for (list = 0; list < sl->list_count; list++) {
983 if(IS_DIR(mb_type, i, list)){
984 int rc = sl->ref_count[list] << MB_MBAFF(sl);
987 } else if (rc == 2) {
988 val= get_bits1(&sl->gb)^1;
990 val= get_ue_golomb_31(&sl->gb);
992 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
997 val= LIST_NOT_USED&0xFF;
998 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1001 for (list = 0; list < sl->list_count; list++) {
1004 if(IS_DIR(mb_type, i, list)){
1005 pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1006 mx += get_se_golomb(&sl->gb);
1007 my += get_se_golomb(&sl->gb);
1008 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1010 val= pack16to32(mx,my);
1013 fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1017 assert(IS_8X16(mb_type));
1018 for (list = 0; list < sl->list_count; list++) {
1021 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1022 int rc = sl->ref_count[list] << MB_MBAFF(sl);
1025 } else if (rc == 2) {
1026 val= get_bits1(&sl->gb)^1;
1028 val= get_ue_golomb_31(&sl->gb);
1030 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1035 val= LIST_NOT_USED&0xFF;
1036 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1039 for (list = 0; list < sl->list_count; list++) {
1042 if(IS_DIR(mb_type, i, list)){
1043 pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1044 mx += get_se_golomb(&sl->gb);
1045 my += get_se_golomb(&sl->gb);
1046 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1048 val= pack16to32(mx,my);
1051 fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1057 if(IS_INTER(mb_type))
1058 write_back_motion(h, sl, mb_type);
1060 if(!IS_INTRA16x16(mb_type)){
1061 cbp= get_ue_golomb(&sl->gb);
1065 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1068 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1069 else cbp= golomb_to_inter_cbp [cbp];
1072 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1075 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1076 else cbp= golomb_to_inter_cbp_gray[cbp];
1080 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1081 mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1084 h->cbp_table[mb_xy]= cbp;
1085 h->cur_pic.mb_type[mb_xy] = mb_type;
1087 if(cbp || IS_INTRA16x16(mb_type)){
1088 int i4x4, i8x8, chroma_idx;
1091 GetBitContext *gb = &sl->gb;
1092 const uint8_t *scan, *scan8x8;
1093 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1095 if(IS_INTERLACED(mb_type)){
1096 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1097 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1099 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1100 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1103 dquant= get_se_golomb(&sl->gb);
1105 sl->qscale += dquant;
1107 if (((unsigned)sl->qscale) > max_qp){
1108 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1109 else sl->qscale -= max_qp+1;
1110 if (((unsigned)sl->qscale) > max_qp){
1111 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1116 sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
1117 sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
1119 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1122 h->cbp_table[mb_xy] |= ret << 12;
1124 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1127 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1130 } else if (CHROMA422(h)) {
1132 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1133 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1134 CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1141 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1142 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1143 int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1144 for (i8x8 = 0; i8x8 < 2; i8x8++) {
1145 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1146 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1147 if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1149 mb += 16 << pixel_shift;
1154 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1155 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1157 } else /* yuv420 */ {
1159 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1160 if( decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1166 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1167 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1168 for(i4x4=0; i4x4<4; i4x4++){
1169 const int index= 16 + 16*chroma_idx + i4x4;
1170 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1176 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1177 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1181 fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1182 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1183 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1185 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1186 write_back_non_zero_count(h, sl);