2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
33 #include "h264data.h" // FIXME FIXME FIXME
34 #include "h264_mvpred.h"
36 #include "mpegutils.h"
40 static const uint8_t golomb_to_inter_cbp_gray[16]={
41 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
44 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
45 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
48 static const uint8_t chroma_dc_coeff_token_len[4*5]={
56 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
64 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
76 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
88 static const uint8_t coeff_token_len[4][4*17]={
91 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
92 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
93 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
94 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
98 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
99 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
100 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
101 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
105 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
106 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
107 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
108 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
112 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
113 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
119 static const uint8_t coeff_token_bits[4][4*17]={
122 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
123 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
124 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
125 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
129 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
130 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
131 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
132 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
136 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
137 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
138 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
139 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
143 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
144 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
145 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
146 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
150 static const uint8_t total_zeros_len[16][16]= {
151 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
152 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
153 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
154 {5,3,4,4,3,3,3,4,3,4,5,5,5},
155 {4,4,4,3,3,3,3,3,4,5,4,5},
156 {6,5,3,3,3,3,3,3,4,3,6},
157 {6,5,3,3,3,2,3,4,3,6},
168 static const uint8_t total_zeros_bits[16][16]= {
169 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
170 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
171 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
172 {3,7,5,4,6,5,4,3,3,2,2,1,0},
173 {5,4,3,7,6,5,4,3,2,1,1,0},
174 {1,1,7,6,5,4,3,2,1,1,0},
175 {1,1,5,4,3,3,2,1,1,0},
186 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
192 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
198 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
199 { 1, 3, 3, 4, 4, 4, 5, 5 },
200 { 3, 2, 3, 3, 3, 3, 3 },
201 { 3, 3, 2, 2, 3, 3 },
208 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
209 { 1, 2, 3, 2, 3, 1, 1, 0 },
210 { 0, 1, 1, 4, 5, 6, 7 },
211 { 0, 1, 1, 2, 6, 7 },
218 static const uint8_t run_len[7][16]={
225 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
228 static const uint8_t run_bits[7][16]={
235 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
238 static VLC coeff_token_vlc[4];
239 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
240 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 static VLC chroma_dc_coeff_token_vlc;
243 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
244 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246 static VLC chroma422_dc_coeff_token_vlc;
247 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
248 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250 static VLC total_zeros_vlc[15];
251 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
252 static const int total_zeros_vlc_tables_size = 512;
254 static VLC chroma_dc_total_zeros_vlc[3];
255 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
256 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258 static VLC chroma422_dc_total_zeros_vlc[7];
259 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
260 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262 static VLC run_vlc[6];
263 static VLC_TYPE run_vlc_tables[6][8][2];
264 static const int run_vlc_tables_size = 8;
267 static VLC_TYPE run7_vlc_table[96][2];
268 static const int run7_vlc_table_size = 96;
270 #define LEVEL_TAB_BITS 8
271 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
274 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
275 #define COEFF_TOKEN_VLC_BITS 8
276 #define TOTAL_ZEROS_VLC_BITS 9
277 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
278 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
279 #define RUN_VLC_BITS 3
280 #define RUN7_VLC_BITS 6
283 * Get the predicted number of non-zero coefficients.
284 * @param n block index
286 static inline int pred_non_zero_count(H264Context *h, int n){
287 const int index8= scan8[n];
288 const int left= h->non_zero_count_cache[index8 - 1];
289 const int top = h->non_zero_count_cache[index8 - 8];
292 if(i<64) i= (i+1)>>1;
294 tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
299 static av_cold void init_cavlc_level_tab(void){
303 for(suffix_length=0; suffix_length<7; suffix_length++){
304 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
305 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
308 int level_code = (prefix << suffix_length) +
309 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
310 int mask = -(level_code&1);
311 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
312 cavlc_level_tab[suffix_length][i][0]= level_code;
313 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314 }else if(prefix + 1 <= LEVEL_TAB_BITS){
315 cavlc_level_tab[suffix_length][i][0]= prefix+100;
316 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
319 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
325 av_cold void ff_h264_decode_init_vlc(void){
333 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
334 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
335 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
336 &chroma_dc_coeff_token_len [0], 1, 1,
337 &chroma_dc_coeff_token_bits[0], 1, 1,
338 INIT_VLC_USE_NEW_STATIC);
340 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
341 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
342 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
343 &chroma422_dc_coeff_token_len [0], 1, 1,
344 &chroma422_dc_coeff_token_bits[0], 1, 1,
345 INIT_VLC_USE_NEW_STATIC);
349 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
350 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
351 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
352 &coeff_token_len [i][0], 1, 1,
353 &coeff_token_bits[i][0], 1, 1,
354 INIT_VLC_USE_NEW_STATIC);
355 offset += coeff_token_vlc_tables_size[i];
358 * This is a one time safety check to make sure that
359 * the packed static coeff_token_vlc table sizes
360 * were initialized correctly.
362 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
365 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
366 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
367 init_vlc(&chroma_dc_total_zeros_vlc[i],
368 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
369 &chroma_dc_total_zeros_len [i][0], 1, 1,
370 &chroma_dc_total_zeros_bits[i][0], 1, 1,
371 INIT_VLC_USE_NEW_STATIC);
375 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
376 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
377 init_vlc(&chroma422_dc_total_zeros_vlc[i],
378 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
379 &chroma422_dc_total_zeros_len [i][0], 1, 1,
380 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
381 INIT_VLC_USE_NEW_STATIC);
385 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
386 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
387 init_vlc(&total_zeros_vlc[i],
388 TOTAL_ZEROS_VLC_BITS, 16,
389 &total_zeros_len [i][0], 1, 1,
390 &total_zeros_bits[i][0], 1, 1,
391 INIT_VLC_USE_NEW_STATIC);
395 run_vlc[i].table = run_vlc_tables[i];
396 run_vlc[i].table_allocated = run_vlc_tables_size;
397 init_vlc(&run_vlc[i],
399 &run_len [i][0], 1, 1,
400 &run_bits[i][0], 1, 1,
401 INIT_VLC_USE_NEW_STATIC);
403 run7_vlc.table = run7_vlc_table,
404 run7_vlc.table_allocated = run7_vlc_table_size;
405 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
406 &run_len [6][0], 1, 1,
407 &run_bits[6][0], 1, 1,
408 INIT_VLC_USE_NEW_STATIC);
410 init_cavlc_level_tab();
417 static inline int get_level_prefix(GetBitContext *gb){
422 UPDATE_CACHE(re, gb);
423 buf=GET_CACHE(re, gb);
425 log= 32 - av_log2(buf);
427 print_bin(buf>>(32-log), log);
428 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
431 LAST_SKIP_BITS(re, gb, log);
432 CLOSE_READER(re, gb);
438 * Decode a residual block.
439 * @param n block index
440 * @param scantable scantable
441 * @param max_coeff number of coefficients in the block
442 * @return <0 if an error occurred
444 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
445 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449 //FIXME put trailing_onex into the context
453 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
456 total_coeff= coeff_token>>2;
458 if(n >= LUMA_DC_BLOCK_INDEX){
459 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
460 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
461 total_coeff= coeff_token>>2;
463 total_coeff= pred_non_zero_count(h, n);
464 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
465 total_coeff= coeff_token>>2;
468 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470 //FIXME set last_non_zero?
474 if(total_coeff > (unsigned)max_coeff) {
475 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
479 trailing_ones= coeff_token&3;
480 tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
481 assert(total_coeff<=16);
483 i = show_bits(gb, 3);
484 skip_bits(gb, trailing_ones);
485 level[0] = 1-((i&4)>>1);
486 level[1] = 1-((i&2) );
487 level[2] = 1-((i&1)<<1);
489 if(trailing_ones<total_coeff) {
491 int suffix_length = total_coeff > 10 & trailing_ones < 3;
492 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
493 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
496 if(level_code >= 100){
497 prefix= level_code - 100;
498 if(prefix == LEVEL_TAB_BITS)
499 prefix += get_level_prefix(gb);
501 //first coefficient has suffix_length equal to 0 or 1
502 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504 level_code= (prefix<<1) + get_bits1(gb); //part
506 level_code= prefix; //part
507 }else if(prefix==14){
509 level_code= (prefix<<1) + get_bits1(gb); //part
511 level_code= prefix + get_bits(gb, 4); //part
513 level_code= 30 + get_bits(gb, prefix-3); //part
516 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
519 level_code += (1<<(prefix-3))-4096;
523 if(trailing_ones < 3) level_code += 2;
526 mask= -(level_code&1);
527 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
529 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
531 suffix_length = 1 + (level_code + 3U > 6U);
532 level[trailing_ones]= level_code;
535 //remaining coefficients have suffix_length > 0
536 for(i=trailing_ones+1;i<total_coeff;i++) {
537 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
538 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
539 level_code= cavlc_level_tab[suffix_length][bitsi][0];
541 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
542 if(level_code >= 100){
543 prefix= level_code - 100;
544 if(prefix == LEVEL_TAB_BITS){
545 prefix += get_level_prefix(gb);
548 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
550 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
552 level_code += (1<<(prefix-3))-4096;
554 mask= -(level_code&1);
555 level_code= (((2+level_code)>>1) ^ mask) - mask;
557 level[i]= level_code;
558 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
562 if(total_coeff == max_coeff)
565 if (max_coeff <= 8) {
567 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
568 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
570 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
571 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
573 zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
577 #define STORE_BLOCK(type) \
578 scantable += zeros_left + total_coeff - 1; \
579 if(n >= LUMA_DC_BLOCK_INDEX){ \
580 ((type*)block)[*scantable] = level[0]; \
581 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
583 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
585 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
586 zeros_left -= run_before; \
587 scantable -= 1 + run_before; \
588 ((type*)block)[*scantable]= level[i]; \
590 for(;i<total_coeff;i++) { \
592 ((type*)block)[*scantable]= level[i]; \
595 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
596 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
598 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
600 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
601 zeros_left -= run_before; \
602 scantable -= 1 + run_before; \
603 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
605 for(;i<total_coeff;i++) { \
607 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
611 if (zeros_left < 0) {
612 av_log(h->avctx, AV_LOG_ERROR,
613 "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
614 return AVERROR_INVALIDDATA;
617 if (h->pixel_shift) {
626 static av_always_inline int decode_luma_residual(H264Context *h, H264SliceContext *sl, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
629 if(IS_INTRA16x16(mb_type)){
630 AV_ZERO128(h->mb_luma_dc[p]+0);
631 AV_ZERO128(h->mb_luma_dc[p]+8);
632 AV_ZERO128(h->mb_luma_dc[p]+16);
633 AV_ZERO128(h->mb_luma_dc[p]+24);
634 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
635 return -1; //FIXME continue if partitioned and other return -1 too
638 assert((cbp&15) == 0 || (cbp&15) == 15);
641 for(i8x8=0; i8x8<4; i8x8++){
642 for(i4x4=0; i4x4<4; i4x4++){
643 const int index= i4x4 + 4*i8x8 + p*16;
644 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
645 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
652 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
656 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
657 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
659 for(i8x8=0; i8x8<4; i8x8++){
661 if(IS_8x8DCT(mb_type)){
662 int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
664 for(i4x4=0; i4x4<4; i4x4++){
665 const int index= i4x4 + 4*i8x8 + p*16;
666 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
667 h->dequant8_coeff[cqm][qscale], 16) < 0 )
670 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
671 nnz[0] += nnz[1] + nnz[8] + nnz[9];
672 new_cbp |= !!nnz[0] << i8x8;
674 for(i4x4=0; i4x4<4; i4x4++){
675 const int index= i4x4 + 4*i8x8 + p*16;
676 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
677 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
680 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
684 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
685 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
692 int ff_h264_decode_mb_cavlc(H264Context *h, H264SliceContext *sl)
696 unsigned int mb_type, cbp;
697 int dct8x8_allowed= h->pps.transform_8x8_mode;
698 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
699 const int pixel_shift = h->pixel_shift;
701 mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
703 tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
704 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
706 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
707 if(h->mb_skip_run==-1)
708 h->mb_skip_run= get_ue_golomb(&h->gb);
710 if (h->mb_skip_run--) {
711 if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
712 if(h->mb_skip_run==0)
713 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
715 decode_mb_skip(h, sl);
719 if (FRAME_MBAFF(h)) {
720 if( (h->mb_y&1) == 0 )
721 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
724 sl->prev_mb_skipped = 0;
726 mb_type= get_ue_golomb(&h->gb);
727 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
729 partition_count= b_mb_type_info[mb_type].partition_count;
730 mb_type= b_mb_type_info[mb_type].type;
733 goto decode_intra_mb;
735 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
737 partition_count= p_mb_type_info[mb_type].partition_count;
738 mb_type= p_mb_type_info[mb_type].type;
741 goto decode_intra_mb;
744 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
745 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
749 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
753 cbp= i_mb_type_info[mb_type].cbp;
754 sl->intra16x16_pred_mode = i_mb_type_info[mb_type].pred_mode;
755 mb_type= i_mb_type_info[mb_type].type;
759 mb_type |= MB_TYPE_INTERLACED;
761 h->slice_table[ mb_xy ]= h->slice_num;
763 if(IS_INTRA_PCM(mb_type)){
764 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
765 h->sps.bit_depth_luma;
767 // We assume these blocks are very rare so we do not optimize it.
768 h->intra_pcm_ptr = align_get_bits(&h->gb);
769 if (get_bits_left(&h->gb) < mb_size) {
770 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
771 return AVERROR_INVALIDDATA;
773 skip_bits_long(&h->gb, mb_size);
775 // In deblocking, the quantizer is 0
776 h->cur_pic.qscale_table[mb_xy] = 0;
777 // All coeffs are present
778 memset(h->non_zero_count[mb_xy], 16, 48);
780 h->cur_pic.mb_type[mb_xy] = mb_type;
784 fill_decode_neighbors(h, mb_type);
785 fill_decode_caches(h, mb_type);
788 if(IS_INTRA(mb_type)){
790 // init_top_left_availability(h);
791 if(IS_INTRA4x4(mb_type)){
794 if(dct8x8_allowed && get_bits1(&h->gb)){
795 mb_type |= MB_TYPE_8x8DCT;
799 // fill_intra4x4_pred_table(h);
800 for(i=0; i<16; i+=di){
801 int mode= pred_intra_mode(h, i);
803 if(!get_bits1(&h->gb)){
804 const int rem_mode= get_bits(&h->gb, 3);
805 mode = rem_mode + (rem_mode >= mode);
809 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
811 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
813 write_back_intra_pred_mode(h);
814 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
817 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl->intra16x16_pred_mode, 0);
818 if (sl->intra16x16_pred_mode < 0)
822 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
825 sl->chroma_pred_mode = pred_mode;
827 sl->chroma_pred_mode = DC_128_PRED8x8;
829 }else if(partition_count==4){
830 int i, j, sub_partition_count[4], list, ref[2][4];
832 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
834 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
835 if(h->sub_mb_type[i] >=13){
836 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
839 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
840 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
842 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
843 ff_h264_pred_direct_motion(h, &mb_type);
844 h->ref_cache[0][scan8[4]] =
845 h->ref_cache[1][scan8[4]] =
846 h->ref_cache[0][scan8[12]] =
847 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
850 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
852 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
853 if(h->sub_mb_type[i] >=4){
854 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
857 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
858 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
862 for(list=0; list<h->list_count; list++){
863 int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF(h);
865 if(IS_DIRECT(h->sub_mb_type[i])) continue;
866 if(IS_DIR(h->sub_mb_type[i], 0, list)){
870 }else if(ref_count == 2){
871 tmp= get_bits1(&h->gb)^1;
873 tmp= get_ue_golomb_31(&h->gb);
875 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
888 dct8x8_allowed = get_dct8x8_allowed(h);
890 for(list=0; list<h->list_count; list++){
892 if(IS_DIRECT(h->sub_mb_type[i])) {
893 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
896 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
897 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
899 if(IS_DIR(h->sub_mb_type[i], 0, list)){
900 const int sub_mb_type= h->sub_mb_type[i];
901 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
902 for(j=0; j<sub_partition_count[i]; j++){
904 const int index= 4*i + block_width*j;
905 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
906 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
907 mx += get_se_golomb(&h->gb);
908 my += get_se_golomb(&h->gb);
909 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
911 if(IS_SUB_8X8(sub_mb_type)){
913 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
915 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
916 }else if(IS_SUB_8X4(sub_mb_type)){
917 mv_cache[ 1 ][0]= mx;
918 mv_cache[ 1 ][1]= my;
919 }else if(IS_SUB_4X8(sub_mb_type)){
920 mv_cache[ 8 ][0]= mx;
921 mv_cache[ 8 ][1]= my;
923 mv_cache[ 0 ][0]= mx;
924 mv_cache[ 0 ][1]= my;
927 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
933 }else if(IS_DIRECT(mb_type)){
934 ff_h264_pred_direct_motion(h, &mb_type);
935 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
938 //FIXME we should set ref_idx_l? to 0 if we use that later ...
939 if(IS_16X16(mb_type)){
940 for(list=0; list<h->list_count; list++){
942 if(IS_DIR(mb_type, 0, list)){
943 int rc = h->ref_count[list] << MB_MBAFF(h);
946 } else if (rc == 2) {
947 val= get_bits1(&h->gb)^1;
949 val= get_ue_golomb_31(&h->gb);
951 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
955 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
958 for(list=0; list<h->list_count; list++){
959 if(IS_DIR(mb_type, 0, list)){
960 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
961 mx += get_se_golomb(&h->gb);
962 my += get_se_golomb(&h->gb);
963 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
965 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
969 else if(IS_16X8(mb_type)){
970 for(list=0; list<h->list_count; list++){
973 if(IS_DIR(mb_type, i, list)){
974 int rc = h->ref_count[list] << MB_MBAFF(h);
977 } else if (rc == 2) {
978 val= get_bits1(&h->gb)^1;
980 val= get_ue_golomb_31(&h->gb);
982 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
987 val= LIST_NOT_USED&0xFF;
988 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
991 for(list=0; list<h->list_count; list++){
994 if(IS_DIR(mb_type, i, list)){
995 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
996 mx += get_se_golomb(&h->gb);
997 my += get_se_golomb(&h->gb);
998 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1000 val= pack16to32(mx,my);
1003 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1007 assert(IS_8X16(mb_type));
1008 for(list=0; list<h->list_count; list++){
1011 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1012 int rc = h->ref_count[list] << MB_MBAFF(h);
1015 } else if (rc == 2) {
1016 val= get_bits1(&h->gb)^1;
1018 val= get_ue_golomb_31(&h->gb);
1020 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1025 val= LIST_NOT_USED&0xFF;
1026 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1029 for(list=0; list<h->list_count; list++){
1032 if(IS_DIR(mb_type, i, list)){
1033 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1034 mx += get_se_golomb(&h->gb);
1035 my += get_se_golomb(&h->gb);
1036 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1038 val= pack16to32(mx,my);
1041 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1047 if(IS_INTER(mb_type))
1048 write_back_motion(h, mb_type);
1050 if(!IS_INTRA16x16(mb_type)){
1051 cbp= get_ue_golomb(&h->gb);
1055 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1058 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1059 else cbp= golomb_to_inter_cbp [cbp];
1062 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1065 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1066 else cbp= golomb_to_inter_cbp_gray[cbp];
1070 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1071 mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1074 h->cbp_table[mb_xy]= cbp;
1075 h->cur_pic.mb_type[mb_xy] = mb_type;
1077 if(cbp || IS_INTRA16x16(mb_type)){
1078 int i4x4, i8x8, chroma_idx;
1081 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1082 const uint8_t *scan, *scan8x8;
1083 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1085 if(IS_INTERLACED(mb_type)){
1086 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1087 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1089 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1090 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1093 dquant= get_se_golomb(&h->gb);
1095 sl->qscale += dquant;
1097 if (((unsigned)sl->qscale) > max_qp){
1098 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1099 else sl->qscale -= max_qp+1;
1100 if (((unsigned)sl->qscale) > max_qp){
1101 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1106 sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
1107 sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
1109 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1112 h->cbp_table[mb_xy] |= ret << 12;
1114 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1117 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1120 } else if (CHROMA422(h)) {
1122 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1123 if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1124 CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1131 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1132 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1133 int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1134 for (i8x8 = 0; i8x8 < 2; i8x8++) {
1135 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1136 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1137 if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1139 mb += 16 << pixel_shift;
1144 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1145 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1147 } else /* yuv420 */ {
1149 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1150 if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1156 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1157 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1158 for(i4x4=0; i4x4<4; i4x4++){
1159 const int index= 16 + 16*chroma_idx + i4x4;
1160 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1166 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1167 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1171 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1172 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1173 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1175 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1176 write_back_non_zero_count(h);