2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
33 #include "h264data.h" // FIXME FIXME FIXME
34 #include "h264_mvpred.h"
36 #include "mpegutils.h"
40 static const uint8_t golomb_to_inter_cbp_gray[16]={
41 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
44 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
45 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
48 static const uint8_t chroma_dc_coeff_token_len[4*5]={
56 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
64 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
76 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
88 static const uint8_t coeff_token_len[4][4*17]={
91 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
92 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
93 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
94 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
98 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
99 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
100 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
101 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
105 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
106 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
107 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
108 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
112 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
113 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
119 static const uint8_t coeff_token_bits[4][4*17]={
122 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
123 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
124 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
125 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
129 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
130 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
131 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
132 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
136 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
137 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
138 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
139 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
143 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
144 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
145 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
146 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
150 static const uint8_t total_zeros_len[16][16]= {
151 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
152 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
153 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
154 {5,3,4,4,3,3,3,4,3,4,5,5,5},
155 {4,4,4,3,3,3,3,3,4,5,4,5},
156 {6,5,3,3,3,3,3,3,4,3,6},
157 {6,5,3,3,3,2,3,4,3,6},
168 static const uint8_t total_zeros_bits[16][16]= {
169 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
170 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
171 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
172 {3,7,5,4,6,5,4,3,3,2,2,1,0},
173 {5,4,3,7,6,5,4,3,2,1,1,0},
174 {1,1,7,6,5,4,3,2,1,1,0},
175 {1,1,5,4,3,3,2,1,1,0},
186 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
192 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
198 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
199 { 1, 3, 3, 4, 4, 4, 5, 5 },
200 { 3, 2, 3, 3, 3, 3, 3 },
201 { 3, 3, 2, 2, 3, 3 },
208 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
209 { 1, 2, 3, 2, 3, 1, 1, 0 },
210 { 0, 1, 1, 4, 5, 6, 7 },
211 { 0, 1, 1, 2, 6, 7 },
218 static const uint8_t run_len[7][16]={
225 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
228 static const uint8_t run_bits[7][16]={
235 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
238 static VLC coeff_token_vlc[4];
239 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
240 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 static VLC chroma_dc_coeff_token_vlc;
243 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
244 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246 static VLC chroma422_dc_coeff_token_vlc;
247 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
248 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250 static VLC total_zeros_vlc[15];
251 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
252 static const int total_zeros_vlc_tables_size = 512;
254 static VLC chroma_dc_total_zeros_vlc[3];
255 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
256 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258 static VLC chroma422_dc_total_zeros_vlc[7];
259 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
260 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262 static VLC run_vlc[6];
263 static VLC_TYPE run_vlc_tables[6][8][2];
264 static const int run_vlc_tables_size = 8;
267 static VLC_TYPE run7_vlc_table[96][2];
268 static const int run7_vlc_table_size = 96;
270 #define LEVEL_TAB_BITS 8
271 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
274 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
275 #define COEFF_TOKEN_VLC_BITS 8
276 #define TOTAL_ZEROS_VLC_BITS 9
277 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
278 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
279 #define RUN_VLC_BITS 3
280 #define RUN7_VLC_BITS 6
283 * Get the predicted number of non-zero coefficients.
284 * @param n block index
286 static inline int pred_non_zero_count(H264Context *h, H264SliceContext *sl, int n)
288 const int index8= scan8[n];
289 const int left = sl->non_zero_count_cache[index8 - 1];
290 const int top = sl->non_zero_count_cache[index8 - 8];
293 if(i<64) i= (i+1)>>1;
295 tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
300 static av_cold void init_cavlc_level_tab(void){
304 for(suffix_length=0; suffix_length<7; suffix_length++){
305 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309 int level_code = (prefix << suffix_length) +
310 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311 int mask = -(level_code&1);
312 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313 cavlc_level_tab[suffix_length][i][0]= level_code;
314 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315 }else if(prefix + 1 <= LEVEL_TAB_BITS){
316 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
326 av_cold void ff_h264_decode_init_vlc(void){
334 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337 &chroma_dc_coeff_token_len [0], 1, 1,
338 &chroma_dc_coeff_token_bits[0], 1, 1,
339 INIT_VLC_USE_NEW_STATIC);
341 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344 &chroma422_dc_coeff_token_len [0], 1, 1,
345 &chroma422_dc_coeff_token_bits[0], 1, 1,
346 INIT_VLC_USE_NEW_STATIC);
350 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353 &coeff_token_len [i][0], 1, 1,
354 &coeff_token_bits[i][0], 1, 1,
355 INIT_VLC_USE_NEW_STATIC);
356 offset += coeff_token_vlc_tables_size[i];
359 * This is a one time safety check to make sure that
360 * the packed static coeff_token_vlc table sizes
361 * were initialized correctly.
363 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
366 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368 init_vlc(&chroma_dc_total_zeros_vlc[i],
369 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370 &chroma_dc_total_zeros_len [i][0], 1, 1,
371 &chroma_dc_total_zeros_bits[i][0], 1, 1,
372 INIT_VLC_USE_NEW_STATIC);
376 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378 init_vlc(&chroma422_dc_total_zeros_vlc[i],
379 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380 &chroma422_dc_total_zeros_len [i][0], 1, 1,
381 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382 INIT_VLC_USE_NEW_STATIC);
386 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388 init_vlc(&total_zeros_vlc[i],
389 TOTAL_ZEROS_VLC_BITS, 16,
390 &total_zeros_len [i][0], 1, 1,
391 &total_zeros_bits[i][0], 1, 1,
392 INIT_VLC_USE_NEW_STATIC);
396 run_vlc[i].table = run_vlc_tables[i];
397 run_vlc[i].table_allocated = run_vlc_tables_size;
398 init_vlc(&run_vlc[i],
400 &run_len [i][0], 1, 1,
401 &run_bits[i][0], 1, 1,
402 INIT_VLC_USE_NEW_STATIC);
404 run7_vlc.table = run7_vlc_table,
405 run7_vlc.table_allocated = run7_vlc_table_size;
406 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407 &run_len [6][0], 1, 1,
408 &run_bits[6][0], 1, 1,
409 INIT_VLC_USE_NEW_STATIC);
411 init_cavlc_level_tab();
418 static inline int get_level_prefix(GetBitContext *gb){
423 UPDATE_CACHE(re, gb);
424 buf=GET_CACHE(re, gb);
426 log= 32 - av_log2(buf);
428 print_bin(buf>>(32-log), log);
429 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
432 LAST_SKIP_BITS(re, gb, log);
433 CLOSE_READER(re, gb);
439 * Decode a residual block.
440 * @param n block index
441 * @param scantable scantable
442 * @param max_coeff number of coefficients in the block
443 * @return <0 if an error occurred
445 static int decode_residual(H264Context *h, H264SliceContext *sl,
446 GetBitContext *gb, int16_t *block, int n,
447 const uint8_t *scantable, const uint32_t *qmul,
450 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
452 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
454 //FIXME put trailing_onex into the context
458 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
460 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
461 total_coeff= coeff_token>>2;
463 if(n >= LUMA_DC_BLOCK_INDEX){
464 total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
465 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466 total_coeff= coeff_token>>2;
468 total_coeff= pred_non_zero_count(h, sl, n);
469 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
470 total_coeff= coeff_token>>2;
473 sl->non_zero_count_cache[scan8[n]] = total_coeff;
475 //FIXME set last_non_zero?
479 if(total_coeff > (unsigned)max_coeff) {
480 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
484 trailing_ones= coeff_token&3;
485 tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
486 assert(total_coeff<=16);
488 i = show_bits(gb, 3);
489 skip_bits(gb, trailing_ones);
490 level[0] = 1-((i&4)>>1);
491 level[1] = 1-((i&2) );
492 level[2] = 1-((i&1)<<1);
494 if(trailing_ones<total_coeff) {
496 int suffix_length = total_coeff > 10 & trailing_ones < 3;
497 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
498 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
500 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
501 if(level_code >= 100){
502 prefix= level_code - 100;
503 if(prefix == LEVEL_TAB_BITS)
504 prefix += get_level_prefix(gb);
506 //first coefficient has suffix_length equal to 0 or 1
507 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
509 level_code= (prefix<<1) + get_bits1(gb); //part
511 level_code= prefix; //part
512 }else if(prefix==14){
514 level_code= (prefix<<1) + get_bits1(gb); //part
516 level_code= prefix + get_bits(gb, 4); //part
518 level_code= 30 + get_bits(gb, prefix-3); //part
521 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
524 level_code += (1<<(prefix-3))-4096;
528 if(trailing_ones < 3) level_code += 2;
531 mask= -(level_code&1);
532 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
534 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
536 suffix_length = 1 + (level_code + 3U > 6U);
537 level[trailing_ones]= level_code;
540 //remaining coefficients have suffix_length > 0
541 for(i=trailing_ones+1;i<total_coeff;i++) {
542 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
543 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
544 level_code= cavlc_level_tab[suffix_length][bitsi][0];
546 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
547 if(level_code >= 100){
548 prefix= level_code - 100;
549 if(prefix == LEVEL_TAB_BITS){
550 prefix += get_level_prefix(gb);
553 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
555 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
557 level_code += (1<<(prefix-3))-4096;
559 mask= -(level_code&1);
560 level_code= (((2+level_code)>>1) ^ mask) - mask;
562 level[i]= level_code;
563 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
567 if(total_coeff == max_coeff)
570 if (max_coeff <= 8) {
572 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
573 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
575 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
576 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
578 zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
582 #define STORE_BLOCK(type) \
583 scantable += zeros_left + total_coeff - 1; \
584 if(n >= LUMA_DC_BLOCK_INDEX){ \
585 ((type*)block)[*scantable] = level[0]; \
586 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
588 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
590 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
591 zeros_left -= run_before; \
592 scantable -= 1 + run_before; \
593 ((type*)block)[*scantable]= level[i]; \
595 for(;i<total_coeff;i++) { \
597 ((type*)block)[*scantable]= level[i]; \
600 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
601 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
603 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
605 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
606 zeros_left -= run_before; \
607 scantable -= 1 + run_before; \
608 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610 for(;i<total_coeff;i++) { \
612 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
616 if (zeros_left < 0) {
617 av_log(h->avctx, AV_LOG_ERROR,
618 "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
619 return AVERROR_INVALIDDATA;
622 if (h->pixel_shift) {
631 static av_always_inline int decode_luma_residual(H264Context *h, H264SliceContext *sl, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
633 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
634 if(IS_INTRA16x16(mb_type)){
635 AV_ZERO128(sl->mb_luma_dc[p]+0);
636 AV_ZERO128(sl->mb_luma_dc[p]+8);
637 AV_ZERO128(sl->mb_luma_dc[p]+16);
638 AV_ZERO128(sl->mb_luma_dc[p]+24);
639 if( decode_residual(h, sl, h->intra_gb_ptr, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
640 return -1; //FIXME continue if partitioned and other return -1 too
643 assert((cbp&15) == 0 || (cbp&15) == 15);
646 for(i8x8=0; i8x8<4; i8x8++){
647 for(i4x4=0; i4x4<4; i4x4++){
648 const int index= i4x4 + 4*i8x8 + p*16;
649 if( decode_residual(h, sl, h->intra_gb_ptr, sl->mb + (16*index << pixel_shift),
650 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
657 fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
661 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
662 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
664 for(i8x8=0; i8x8<4; i8x8++){
666 if(IS_8x8DCT(mb_type)){
667 int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
669 for(i4x4=0; i4x4<4; i4x4++){
670 const int index= i4x4 + 4*i8x8 + p*16;
671 if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
672 h->dequant8_coeff[cqm][qscale], 16) < 0 )
675 nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
676 nnz[0] += nnz[1] + nnz[8] + nnz[9];
677 new_cbp |= !!nnz[0] << i8x8;
679 for(i4x4=0; i4x4<4; i4x4++){
680 const int index= i4x4 + 4*i8x8 + p*16;
681 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
682 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
685 new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
689 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
690 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
697 int ff_h264_decode_mb_cavlc(H264Context *h, H264SliceContext *sl)
701 unsigned int mb_type, cbp;
702 int dct8x8_allowed= h->pps.transform_8x8_mode;
703 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
704 const int pixel_shift = h->pixel_shift;
706 mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
708 tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
709 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
711 if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
712 if(h->mb_skip_run==-1)
713 h->mb_skip_run= get_ue_golomb(&h->gb);
715 if (h->mb_skip_run--) {
716 if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
717 if(h->mb_skip_run==0)
718 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
720 decode_mb_skip(h, sl);
724 if (FRAME_MBAFF(h)) {
725 if( (h->mb_y&1) == 0 )
726 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
729 sl->prev_mb_skipped = 0;
731 mb_type= get_ue_golomb(&h->gb);
732 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
734 partition_count= b_mb_type_info[mb_type].partition_count;
735 mb_type= b_mb_type_info[mb_type].type;
738 goto decode_intra_mb;
740 } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
742 partition_count= p_mb_type_info[mb_type].partition_count;
743 mb_type= p_mb_type_info[mb_type].type;
746 goto decode_intra_mb;
749 assert(sl->slice_type_nos == AV_PICTURE_TYPE_I);
750 if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
754 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), h->mb_x, h->mb_y);
758 cbp= i_mb_type_info[mb_type].cbp;
759 sl->intra16x16_pred_mode = i_mb_type_info[mb_type].pred_mode;
760 mb_type= i_mb_type_info[mb_type].type;
764 mb_type |= MB_TYPE_INTERLACED;
766 h->slice_table[mb_xy] = sl->slice_num;
768 if(IS_INTRA_PCM(mb_type)){
769 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
770 h->sps.bit_depth_luma;
772 // We assume these blocks are very rare so we do not optimize it.
773 sl->intra_pcm_ptr = align_get_bits(&h->gb);
774 if (get_bits_left(&h->gb) < mb_size) {
775 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
776 return AVERROR_INVALIDDATA;
778 skip_bits_long(&h->gb, mb_size);
780 // In deblocking, the quantizer is 0
781 h->cur_pic.qscale_table[mb_xy] = 0;
782 // All coeffs are present
783 memset(h->non_zero_count[mb_xy], 16, 48);
785 h->cur_pic.mb_type[mb_xy] = mb_type;
789 fill_decode_neighbors(h, sl, mb_type);
790 fill_decode_caches(h, sl, mb_type);
793 if(IS_INTRA(mb_type)){
795 // init_top_left_availability(h);
796 if(IS_INTRA4x4(mb_type)){
799 if(dct8x8_allowed && get_bits1(&h->gb)){
800 mb_type |= MB_TYPE_8x8DCT;
804 // fill_intra4x4_pred_table(h);
805 for(i=0; i<16; i+=di){
806 int mode = pred_intra_mode(h, sl, i);
808 if(!get_bits1(&h->gb)){
809 const int rem_mode= get_bits(&h->gb, 3);
810 mode = rem_mode + (rem_mode >= mode);
814 fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
816 sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
818 write_back_intra_pred_mode(h, sl);
819 if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
822 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, sl->intra16x16_pred_mode, 0);
823 if (sl->intra16x16_pred_mode < 0)
827 pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&h->gb), 1);
830 sl->chroma_pred_mode = pred_mode;
832 sl->chroma_pred_mode = DC_128_PRED8x8;
834 }else if(partition_count==4){
835 int i, j, sub_partition_count[4], list, ref[2][4];
837 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
839 sl->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
840 if(sl->sub_mb_type[i] >=13){
841 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], h->mb_x, h->mb_y);
844 sub_partition_count[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
845 sl->sub_mb_type[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
847 if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
848 ff_h264_pred_direct_motion(h, sl, &mb_type);
849 sl->ref_cache[0][scan8[4]] =
850 sl->ref_cache[1][scan8[4]] =
851 sl->ref_cache[0][scan8[12]] =
852 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
855 assert(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
857 sl->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
858 if(sl->sub_mb_type[i] >=4){
859 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], h->mb_x, h->mb_y);
862 sub_partition_count[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
863 sl->sub_mb_type[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
867 for (list = 0; list < sl->list_count; list++) {
868 int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(h);
870 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
871 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
875 }else if(ref_count == 2){
876 tmp= get_bits1(&h->gb)^1;
878 tmp= get_ue_golomb_31(&h->gb);
880 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
893 dct8x8_allowed = get_dct8x8_allowed(h, sl);
895 for (list = 0; list < sl->list_count; list++) {
897 if(IS_DIRECT(sl->sub_mb_type[i])) {
898 sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
901 sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
902 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
904 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
905 const int sub_mb_type= sl->sub_mb_type[i];
906 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
907 for(j=0; j<sub_partition_count[i]; j++){
909 const int index= 4*i + block_width*j;
910 int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
911 pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
912 mx += get_se_golomb(&h->gb);
913 my += get_se_golomb(&h->gb);
914 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
916 if(IS_SUB_8X8(sub_mb_type)){
918 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
920 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
921 }else if(IS_SUB_8X4(sub_mb_type)){
922 mv_cache[ 1 ][0]= mx;
923 mv_cache[ 1 ][1]= my;
924 }else if(IS_SUB_4X8(sub_mb_type)){
925 mv_cache[ 8 ][0]= mx;
926 mv_cache[ 8 ][1]= my;
928 mv_cache[ 0 ][0]= mx;
929 mv_cache[ 0 ][1]= my;
932 uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
938 }else if(IS_DIRECT(mb_type)){
939 ff_h264_pred_direct_motion(h, sl, &mb_type);
940 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
943 //FIXME we should set ref_idx_l? to 0 if we use that later ...
944 if(IS_16X16(mb_type)){
945 for (list = 0; list < sl->list_count; list++) {
947 if(IS_DIR(mb_type, 0, list)){
948 int rc = sl->ref_count[list] << MB_MBAFF(h);
951 } else if (rc == 2) {
952 val= get_bits1(&h->gb)^1;
954 val= get_ue_golomb_31(&h->gb);
956 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
960 fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
963 for (list = 0; list < sl->list_count; list++) {
964 if(IS_DIR(mb_type, 0, list)){
965 pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
966 mx += get_se_golomb(&h->gb);
967 my += get_se_golomb(&h->gb);
968 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
970 fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
974 else if(IS_16X8(mb_type)){
975 for (list = 0; list < sl->list_count; list++) {
978 if(IS_DIR(mb_type, i, list)){
979 int rc = sl->ref_count[list] << MB_MBAFF(h);
982 } else if (rc == 2) {
983 val= get_bits1(&h->gb)^1;
985 val= get_ue_golomb_31(&h->gb);
987 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
992 val= LIST_NOT_USED&0xFF;
993 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
996 for (list = 0; list < sl->list_count; list++) {
999 if(IS_DIR(mb_type, i, list)){
1000 pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1001 mx += get_se_golomb(&h->gb);
1002 my += get_se_golomb(&h->gb);
1003 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1005 val= pack16to32(mx,my);
1008 fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1012 assert(IS_8X16(mb_type));
1013 for (list = 0; list < sl->list_count; list++) {
1016 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1017 int rc = sl->ref_count[list] << MB_MBAFF(h);
1020 } else if (rc == 2) {
1021 val= get_bits1(&h->gb)^1;
1023 val= get_ue_golomb_31(&h->gb);
1025 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1030 val= LIST_NOT_USED&0xFF;
1031 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1034 for (list = 0; list < sl->list_count; list++) {
1037 if(IS_DIR(mb_type, i, list)){
1038 pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1039 mx += get_se_golomb(&h->gb);
1040 my += get_se_golomb(&h->gb);
1041 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1043 val= pack16to32(mx,my);
1046 fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1052 if(IS_INTER(mb_type))
1053 write_back_motion(h, sl, mb_type);
1055 if(!IS_INTRA16x16(mb_type)){
1056 cbp= get_ue_golomb(&h->gb);
1060 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1063 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1064 else cbp= golomb_to_inter_cbp [cbp];
1067 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1070 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1071 else cbp= golomb_to_inter_cbp_gray[cbp];
1075 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1076 mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1079 h->cbp_table[mb_xy]= cbp;
1080 h->cur_pic.mb_type[mb_xy] = mb_type;
1082 if(cbp || IS_INTRA16x16(mb_type)){
1083 int i4x4, i8x8, chroma_idx;
1086 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1087 const uint8_t *scan, *scan8x8;
1088 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1090 if(IS_INTERLACED(mb_type)){
1091 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1092 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1094 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1095 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1098 dquant= get_se_golomb(&h->gb);
1100 sl->qscale += dquant;
1102 if (((unsigned)sl->qscale) > max_qp){
1103 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1104 else sl->qscale -= max_qp+1;
1105 if (((unsigned)sl->qscale) > max_qp){
1106 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1111 sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
1112 sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
1114 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1117 h->cbp_table[mb_xy] |= ret << 12;
1119 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1122 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1125 } else if (CHROMA422(h)) {
1127 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1128 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1129 CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1136 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1137 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1138 int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1139 for (i8x8 = 0; i8x8 < 2; i8x8++) {
1140 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1141 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1142 if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1144 mb += 16 << pixel_shift;
1149 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1150 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1152 } else /* yuv420 */ {
1154 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1155 if( decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1161 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1162 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1163 for(i4x4=0; i4x4<4; i4x4++){
1164 const int index= 16 + 16*chroma_idx + i4x4;
1165 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1171 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1172 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1176 fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1177 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1178 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1180 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1181 write_back_non_zero_count(h, sl);