2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
33 #include "h264_mvpred.h"
36 #include "mpegutils.h"
40 static const uint8_t golomb_to_inter_cbp_gray[16]={
41 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
44 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
45 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
48 static const uint8_t chroma_dc_coeff_token_len[4*5]={
56 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
64 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
76 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
88 static const uint8_t coeff_token_len[4][4*17]={
91 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
92 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
93 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
94 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
98 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
99 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
100 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
101 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
105 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
106 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
107 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
108 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
112 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
113 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
119 static const uint8_t coeff_token_bits[4][4*17]={
122 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
123 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
124 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
125 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
129 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
130 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
131 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
132 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
136 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
137 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
138 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
139 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
143 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
144 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
145 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
146 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
150 static const uint8_t total_zeros_len[16][16]= {
151 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
152 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
153 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
154 {5,3,4,4,3,3,3,4,3,4,5,5,5},
155 {4,4,4,3,3,3,3,3,4,5,4,5},
156 {6,5,3,3,3,3,3,3,4,3,6},
157 {6,5,3,3,3,2,3,4,3,6},
168 static const uint8_t total_zeros_bits[16][16]= {
169 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
170 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
171 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
172 {3,7,5,4,6,5,4,3,3,2,2,1,0},
173 {5,4,3,7,6,5,4,3,2,1,1,0},
174 {1,1,7,6,5,4,3,2,1,1,0},
175 {1,1,5,4,3,3,2,1,1,0},
186 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
192 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
198 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
199 { 1, 3, 3, 4, 4, 4, 5, 5 },
200 { 3, 2, 3, 3, 3, 3, 3 },
201 { 3, 3, 2, 2, 3, 3 },
208 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
209 { 1, 2, 3, 2, 3, 1, 1, 0 },
210 { 0, 1, 1, 4, 5, 6, 7 },
211 { 0, 1, 1, 2, 6, 7 },
218 static const uint8_t run_len[7][16]={
225 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
228 static const uint8_t run_bits[7][16]={
235 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
238 static VLC coeff_token_vlc[4];
239 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
240 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 static VLC chroma_dc_coeff_token_vlc;
243 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
244 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246 static VLC chroma422_dc_coeff_token_vlc;
247 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
248 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250 static VLC total_zeros_vlc[15];
251 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
252 static const int total_zeros_vlc_tables_size = 512;
254 static VLC chroma_dc_total_zeros_vlc[3];
255 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
256 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258 static VLC chroma422_dc_total_zeros_vlc[7];
259 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
260 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262 static VLC run_vlc[6];
263 static VLC_TYPE run_vlc_tables[6][8][2];
264 static const int run_vlc_tables_size = 8;
267 static VLC_TYPE run7_vlc_table[96][2];
268 static const int run7_vlc_table_size = 96;
270 #define LEVEL_TAB_BITS 8
271 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
274 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
275 #define COEFF_TOKEN_VLC_BITS 8
276 #define TOTAL_ZEROS_VLC_BITS 9
277 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
278 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
279 #define RUN_VLC_BITS 3
280 #define RUN7_VLC_BITS 6
283 * Get the predicted number of non-zero coefficients.
284 * @param n block index
286 static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
288 const int index8= scan8[n];
289 const int left = sl->non_zero_count_cache[index8 - 1];
290 const int top = sl->non_zero_count_cache[index8 - 8];
293 if(i<64) i= (i+1)>>1;
295 ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
300 static av_cold void init_cavlc_level_tab(void){
304 for(suffix_length=0; suffix_length<7; suffix_length++){
305 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309 int level_code = (prefix << suffix_length) +
310 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311 int mask = -(level_code&1);
312 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313 cavlc_level_tab[suffix_length][i][0]= level_code;
314 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315 }else if(prefix + 1 <= LEVEL_TAB_BITS){
316 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
326 av_cold void ff_h264_decode_init_vlc(void){
334 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337 &chroma_dc_coeff_token_len [0], 1, 1,
338 &chroma_dc_coeff_token_bits[0], 1, 1,
339 INIT_VLC_USE_NEW_STATIC);
341 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344 &chroma422_dc_coeff_token_len [0], 1, 1,
345 &chroma422_dc_coeff_token_bits[0], 1, 1,
346 INIT_VLC_USE_NEW_STATIC);
350 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353 &coeff_token_len [i][0], 1, 1,
354 &coeff_token_bits[i][0], 1, 1,
355 INIT_VLC_USE_NEW_STATIC);
356 offset += coeff_token_vlc_tables_size[i];
359 * This is a one time safety check to make sure that
360 * the packed static coeff_token_vlc table sizes
361 * were initialized correctly.
363 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
366 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368 init_vlc(&chroma_dc_total_zeros_vlc[i],
369 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370 &chroma_dc_total_zeros_len [i][0], 1, 1,
371 &chroma_dc_total_zeros_bits[i][0], 1, 1,
372 INIT_VLC_USE_NEW_STATIC);
376 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378 init_vlc(&chroma422_dc_total_zeros_vlc[i],
379 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380 &chroma422_dc_total_zeros_len [i][0], 1, 1,
381 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382 INIT_VLC_USE_NEW_STATIC);
386 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388 init_vlc(&total_zeros_vlc[i],
389 TOTAL_ZEROS_VLC_BITS, 16,
390 &total_zeros_len [i][0], 1, 1,
391 &total_zeros_bits[i][0], 1, 1,
392 INIT_VLC_USE_NEW_STATIC);
396 run_vlc[i].table = run_vlc_tables[i];
397 run_vlc[i].table_allocated = run_vlc_tables_size;
398 init_vlc(&run_vlc[i],
400 &run_len [i][0], 1, 1,
401 &run_bits[i][0], 1, 1,
402 INIT_VLC_USE_NEW_STATIC);
404 run7_vlc.table = run7_vlc_table,
405 run7_vlc.table_allocated = run7_vlc_table_size;
406 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407 &run_len [6][0], 1, 1,
408 &run_bits[6][0], 1, 1,
409 INIT_VLC_USE_NEW_STATIC);
411 init_cavlc_level_tab();
415 static inline int get_level_prefix(GetBitContext *gb){
420 UPDATE_CACHE(re, gb);
421 buf=GET_CACHE(re, gb);
423 log= 32 - av_log2(buf);
425 print_bin(buf>>(32-log), log);
426 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
429 LAST_SKIP_BITS(re, gb, log);
430 CLOSE_READER(re, gb);
436 * Decode a residual block.
437 * @param n block index
438 * @param scantable scantable
439 * @param max_coeff number of coefficients in the block
440 * @return <0 if an error occurred
442 static int decode_residual(const H264Context *h, H264SliceContext *sl,
443 GetBitContext *gb, int16_t *block, int n,
444 const uint8_t *scantable, const uint32_t *qmul,
447 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
449 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
451 //FIXME put trailing_onex into the context
455 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
457 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458 total_coeff= coeff_token>>2;
460 if(n >= LUMA_DC_BLOCK_INDEX){
461 total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
462 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463 total_coeff= coeff_token>>2;
465 total_coeff= pred_non_zero_count(h, sl, n);
466 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467 total_coeff= coeff_token>>2;
470 sl->non_zero_count_cache[scan8[n]] = total_coeff;
472 //FIXME set last_non_zero?
476 if(total_coeff > (unsigned)max_coeff) {
477 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
481 trailing_ones= coeff_token&3;
482 ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483 assert(total_coeff<=16);
485 i = show_bits(gb, 3);
486 skip_bits(gb, trailing_ones);
487 level[0] = 1-((i&4)>>1);
488 level[1] = 1-((i&2) );
489 level[2] = 1-((i&1)<<1);
491 if(trailing_ones<total_coeff) {
493 int suffix_length = total_coeff > 10 & trailing_ones < 3;
494 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
497 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498 if(level_code >= 100){
499 prefix= level_code - 100;
500 if(prefix == LEVEL_TAB_BITS)
501 prefix += get_level_prefix(gb);
503 //first coefficient has suffix_length equal to 0 or 1
504 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
506 level_code= (prefix<<1) + get_bits1(gb); //part
508 level_code= prefix; //part
509 }else if(prefix==14){
511 level_code= (prefix<<1) + get_bits1(gb); //part
513 level_code= prefix + get_bits(gb, 4); //part
515 level_code= 30 + get_bits(gb, prefix-3); //part
518 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
521 level_code += (1<<(prefix-3))-4096;
525 if(trailing_ones < 3) level_code += 2;
528 mask= -(level_code&1);
529 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
531 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
533 suffix_length = 1 + (level_code + 3U > 6U);
534 level[trailing_ones]= level_code;
537 //remaining coefficients have suffix_length > 0
538 for(i=trailing_ones+1;i<total_coeff;i++) {
539 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541 level_code= cavlc_level_tab[suffix_length][bitsi][0];
543 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544 if(level_code >= 100){
545 prefix= level_code - 100;
546 if(prefix == LEVEL_TAB_BITS){
547 prefix += get_level_prefix(gb);
550 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
552 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
554 level_code += (1<<(prefix-3))-4096;
556 mask= -(level_code&1);
557 level_code= (((2+level_code)>>1) ^ mask) - mask;
559 level[i]= level_code;
560 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
564 if(total_coeff == max_coeff)
567 if (max_coeff <= 8) {
569 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
570 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
572 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
573 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
575 zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
579 #define STORE_BLOCK(type) \
580 scantable += zeros_left + total_coeff - 1; \
581 if(n >= LUMA_DC_BLOCK_INDEX){ \
582 ((type*)block)[*scantable] = level[0]; \
583 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
585 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
587 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
588 zeros_left -= run_before; \
589 scantable -= 1 + run_before; \
590 ((type*)block)[*scantable]= level[i]; \
592 for(;i<total_coeff;i++) { \
594 ((type*)block)[*scantable]= level[i]; \
597 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
598 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
600 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
602 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
603 zeros_left -= run_before; \
604 scantable -= 1 + run_before; \
605 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
607 for(;i<total_coeff;i++) { \
609 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
613 if (zeros_left < 0) {
614 av_log(h->avctx, AV_LOG_ERROR,
615 "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
616 return AVERROR_INVALIDDATA;
619 if (h->pixel_shift) {
628 static av_always_inline
629 int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
630 GetBitContext *gb, const uint8_t *scan,
631 const uint8_t *scan8x8, int pixel_shift,
632 int mb_type, int cbp, int p)
635 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
636 if(IS_INTRA16x16(mb_type)){
637 AV_ZERO128(sl->mb_luma_dc[p]+0);
638 AV_ZERO128(sl->mb_luma_dc[p]+8);
639 AV_ZERO128(sl->mb_luma_dc[p]+16);
640 AV_ZERO128(sl->mb_luma_dc[p]+24);
641 if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
642 return -1; //FIXME continue if partitioned and other return -1 too
645 assert((cbp&15) == 0 || (cbp&15) == 15);
648 for(i8x8=0; i8x8<4; i8x8++){
649 for(i4x4=0; i4x4<4; i4x4++){
650 const int index= i4x4 + 4*i8x8 + p*16;
651 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
652 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
659 fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
663 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
664 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
666 for(i8x8=0; i8x8<4; i8x8++){
668 if(IS_8x8DCT(mb_type)){
669 int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
671 for(i4x4=0; i4x4<4; i4x4++){
672 const int index= i4x4 + 4*i8x8 + p*16;
673 if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
674 h->dequant8_coeff[cqm][qscale], 16) < 0 )
677 nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
678 nnz[0] += nnz[1] + nnz[8] + nnz[9];
679 new_cbp |= !!nnz[0] << i8x8;
681 for(i4x4=0; i4x4<4; i4x4++){
682 const int index= i4x4 + 4*i8x8 + p*16;
683 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
684 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
687 new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
691 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
692 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
699 int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
703 unsigned int mb_type, cbp;
704 int dct8x8_allowed= h->pps.transform_8x8_mode;
705 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
706 const int pixel_shift = h->pixel_shift;
708 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
710 ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, sl->mb_x, sl->mb_y);
711 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
713 if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
714 if (sl->mb_skip_run == -1)
715 sl->mb_skip_run = get_ue_golomb(&sl->gb);
717 if (sl->mb_skip_run--) {
718 if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
719 if (sl->mb_skip_run == 0)
720 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
722 decode_mb_skip(h, sl);
726 if (FRAME_MBAFF(h)) {
727 if ((sl->mb_y & 1) == 0)
728 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
731 sl->prev_mb_skipped = 0;
733 mb_type= get_ue_golomb(&sl->gb);
734 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
736 partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
737 mb_type = ff_h264_b_mb_type_info[mb_type].type;
740 goto decode_intra_mb;
742 } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
744 partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
745 mb_type = ff_h264_p_mb_type_info[mb_type].type;
748 goto decode_intra_mb;
751 assert(sl->slice_type_nos == AV_PICTURE_TYPE_I);
752 if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
756 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
760 cbp = ff_h264_i_mb_type_info[mb_type].cbp;
761 sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
762 mb_type = ff_h264_i_mb_type_info[mb_type].type;
766 mb_type |= MB_TYPE_INTERLACED;
768 h->slice_table[mb_xy] = sl->slice_num;
770 if(IS_INTRA_PCM(mb_type)){
771 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
772 h->sps.bit_depth_luma;
774 // We assume these blocks are very rare so we do not optimize it.
775 sl->intra_pcm_ptr = align_get_bits(&sl->gb);
776 if (get_bits_left(&sl->gb) < mb_size) {
777 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
778 return AVERROR_INVALIDDATA;
780 skip_bits_long(&sl->gb, mb_size);
782 // In deblocking, the quantizer is 0
783 h->cur_pic.qscale_table[mb_xy] = 0;
784 // All coeffs are present
785 memset(h->non_zero_count[mb_xy], 16, 48);
787 h->cur_pic.mb_type[mb_xy] = mb_type;
791 fill_decode_neighbors(h, sl, mb_type);
792 fill_decode_caches(h, sl, mb_type);
795 if(IS_INTRA(mb_type)){
797 // init_top_left_availability(h);
798 if(IS_INTRA4x4(mb_type)){
801 if(dct8x8_allowed && get_bits1(&sl->gb)){
802 mb_type |= MB_TYPE_8x8DCT;
806 // fill_intra4x4_pred_table(h);
807 for(i=0; i<16; i+=di){
808 int mode = pred_intra_mode(h, sl, i);
810 if(!get_bits1(&sl->gb)){
811 const int rem_mode= get_bits(&sl->gb, 3);
812 mode = rem_mode + (rem_mode >= mode);
816 fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
818 sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
820 write_back_intra_pred_mode(h, sl);
821 if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
822 sl->top_samples_available, sl->left_samples_available) < 0)
825 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
826 sl->left_samples_available, sl->intra16x16_pred_mode, 0);
827 if (sl->intra16x16_pred_mode < 0)
831 pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
832 sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
835 sl->chroma_pred_mode = pred_mode;
837 sl->chroma_pred_mode = DC_128_PRED8x8;
839 }else if(partition_count==4){
840 int i, j, sub_partition_count[4], list, ref[2][4];
842 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
844 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
845 if(sl->sub_mb_type[i] >=13){
846 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
849 sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
850 sl->sub_mb_type[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
852 if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
853 ff_h264_pred_direct_motion(h, sl, &mb_type);
854 sl->ref_cache[0][scan8[4]] =
855 sl->ref_cache[1][scan8[4]] =
856 sl->ref_cache[0][scan8[12]] =
857 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
860 assert(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
862 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
863 if(sl->sub_mb_type[i] >=4){
864 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
867 sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
868 sl->sub_mb_type[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
872 for (list = 0; list < sl->list_count; list++) {
873 int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
875 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
876 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
880 }else if(ref_count == 2){
881 tmp= get_bits1(&sl->gb)^1;
883 tmp= get_ue_golomb_31(&sl->gb);
885 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
898 dct8x8_allowed = get_dct8x8_allowed(h, sl);
900 for (list = 0; list < sl->list_count; list++) {
902 if(IS_DIRECT(sl->sub_mb_type[i])) {
903 sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
906 sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
907 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
909 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
910 const int sub_mb_type= sl->sub_mb_type[i];
911 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
912 for(j=0; j<sub_partition_count[i]; j++){
914 const int index= 4*i + block_width*j;
915 int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
916 pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
917 mx += get_se_golomb(&sl->gb);
918 my += get_se_golomb(&sl->gb);
919 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
921 if(IS_SUB_8X8(sub_mb_type)){
923 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
925 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
926 }else if(IS_SUB_8X4(sub_mb_type)){
927 mv_cache[ 1 ][0]= mx;
928 mv_cache[ 1 ][1]= my;
929 }else if(IS_SUB_4X8(sub_mb_type)){
930 mv_cache[ 8 ][0]= mx;
931 mv_cache[ 8 ][1]= my;
933 mv_cache[ 0 ][0]= mx;
934 mv_cache[ 0 ][1]= my;
937 uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
943 }else if(IS_DIRECT(mb_type)){
944 ff_h264_pred_direct_motion(h, sl, &mb_type);
945 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
948 //FIXME we should set ref_idx_l? to 0 if we use that later ...
949 if(IS_16X16(mb_type)){
950 for (list = 0; list < sl->list_count; list++) {
952 if(IS_DIR(mb_type, 0, list)){
953 int rc = sl->ref_count[list] << MB_MBAFF(sl);
956 } else if (rc == 2) {
957 val= get_bits1(&sl->gb)^1;
959 val= get_ue_golomb_31(&sl->gb);
961 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
965 fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
968 for (list = 0; list < sl->list_count; list++) {
969 if(IS_DIR(mb_type, 0, list)){
970 pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
971 mx += get_se_golomb(&sl->gb);
972 my += get_se_golomb(&sl->gb);
973 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
975 fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
979 else if(IS_16X8(mb_type)){
980 for (list = 0; list < sl->list_count; list++) {
983 if(IS_DIR(mb_type, i, list)){
984 int rc = sl->ref_count[list] << MB_MBAFF(sl);
987 } else if (rc == 2) {
988 val= get_bits1(&sl->gb)^1;
990 val= get_ue_golomb_31(&sl->gb);
992 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
997 val= LIST_NOT_USED&0xFF;
998 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1001 for (list = 0; list < sl->list_count; list++) {
1004 if(IS_DIR(mb_type, i, list)){
1005 pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1006 mx += get_se_golomb(&sl->gb);
1007 my += get_se_golomb(&sl->gb);
1008 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1010 val= pack16to32(mx,my);
1013 fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1017 assert(IS_8X16(mb_type));
1018 for (list = 0; list < sl->list_count; list++) {
1021 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1022 int rc = sl->ref_count[list] << MB_MBAFF(sl);
1025 } else if (rc == 2) {
1026 val= get_bits1(&sl->gb)^1;
1028 val= get_ue_golomb_31(&sl->gb);
1030 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1035 val= LIST_NOT_USED&0xFF;
1036 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1039 for (list = 0; list < sl->list_count; list++) {
1042 if(IS_DIR(mb_type, i, list)){
1043 pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1044 mx += get_se_golomb(&sl->gb);
1045 my += get_se_golomb(&sl->gb);
1046 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1048 val= pack16to32(mx,my);
1051 fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1057 if(IS_INTER(mb_type))
1058 write_back_motion(h, sl, mb_type);
1060 if(!IS_INTRA16x16(mb_type)){
1061 cbp= get_ue_golomb(&sl->gb);
1065 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1068 if (IS_INTRA4x4(mb_type))
1069 cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
1071 cbp = ff_h264_golomb_to_inter_cbp[cbp];
1074 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1077 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1078 else cbp= golomb_to_inter_cbp_gray[cbp];
1082 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1083 mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1086 h->cbp_table[mb_xy]= cbp;
1087 h->cur_pic.mb_type[mb_xy] = mb_type;
1089 if(cbp || IS_INTRA16x16(mb_type)){
1090 int i4x4, i8x8, chroma_idx;
1093 GetBitContext *gb = &sl->gb;
1094 const uint8_t *scan, *scan8x8;
1095 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1097 if(IS_INTERLACED(mb_type)){
1098 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1099 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1101 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1102 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1105 dquant= get_se_golomb(&sl->gb);
1107 sl->qscale += dquant;
1109 if (((unsigned)sl->qscale) > max_qp){
1110 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1111 else sl->qscale -= max_qp+1;
1112 if (((unsigned)sl->qscale) > max_qp){
1113 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1118 sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
1119 sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
1121 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1124 h->cbp_table[mb_xy] |= ret << 12;
1126 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1129 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1132 } else if (CHROMA422(h)) {
1134 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1135 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1136 CHROMA_DC_BLOCK_INDEX + chroma_idx, ff_h264_chroma422_dc_scan,
1143 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1144 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1145 int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1146 for (i8x8 = 0; i8x8 < 2; i8x8++) {
1147 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1148 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1149 if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1151 mb += 16 << pixel_shift;
1156 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1157 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1159 } else /* yuv420 */ {
1161 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1162 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16 * 16 * chroma_idx) << pixel_shift),
1163 CHROMA_DC_BLOCK_INDEX + chroma_idx, ff_h264_chroma_dc_scan, NULL, 4) < 0) {
1169 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1170 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1171 for(i4x4=0; i4x4<4; i4x4++){
1172 const int index= 16 + 16*chroma_idx + i4x4;
1173 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1179 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1180 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1184 fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1185 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1186 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1188 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1189 write_back_non_zero_count(h, sl);