2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #define UNCHECKED_BITSTREAM_READER 1
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
37 #include "mpegutils.h"
38 #include "libavutil/avassert.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
89 static const uint8_t coeff_token_len[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * Get the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
289 const int index8= scan8[n];
290 const int left = sl->non_zero_count_cache[index8 - 1];
291 const int top = sl->non_zero_count_cache[index8 - 8];
294 if(i<64) i= (i+1)>>1;
296 ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
301 static av_cold void init_cavlc_level_tab(void){
305 for(suffix_length=0; suffix_length<7; suffix_length++){
306 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
307 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
309 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310 int level_code = (prefix << suffix_length) +
311 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
312 int mask = -(level_code&1);
313 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314 cavlc_level_tab[suffix_length][i][0]= level_code;
315 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
316 }else if(prefix + 1 <= LEVEL_TAB_BITS){
317 cavlc_level_tab[suffix_length][i][0]= prefix+100;
318 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
320 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
321 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
327 av_cold void ff_h264_decode_init_vlc(void){
335 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
336 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
337 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
338 &chroma_dc_coeff_token_len [0], 1, 1,
339 &chroma_dc_coeff_token_bits[0], 1, 1,
340 INIT_VLC_USE_NEW_STATIC);
342 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
343 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
344 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
345 &chroma422_dc_coeff_token_len [0], 1, 1,
346 &chroma422_dc_coeff_token_bits[0], 1, 1,
347 INIT_VLC_USE_NEW_STATIC);
351 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
352 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
353 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
354 &coeff_token_len [i][0], 1, 1,
355 &coeff_token_bits[i][0], 1, 1,
356 INIT_VLC_USE_NEW_STATIC);
357 offset += coeff_token_vlc_tables_size[i];
360 * This is a one time safety check to make sure that
361 * the packed static coeff_token_vlc table sizes
362 * were initialized correctly.
364 av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
367 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
368 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
369 init_vlc(&chroma_dc_total_zeros_vlc[i],
370 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
371 &chroma_dc_total_zeros_len [i][0], 1, 1,
372 &chroma_dc_total_zeros_bits[i][0], 1, 1,
373 INIT_VLC_USE_NEW_STATIC);
377 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
378 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
379 init_vlc(&chroma422_dc_total_zeros_vlc[i],
380 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
381 &chroma422_dc_total_zeros_len [i][0], 1, 1,
382 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383 INIT_VLC_USE_NEW_STATIC);
387 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
388 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
389 init_vlc(&total_zeros_vlc[i],
390 TOTAL_ZEROS_VLC_BITS, 16,
391 &total_zeros_len [i][0], 1, 1,
392 &total_zeros_bits[i][0], 1, 1,
393 INIT_VLC_USE_NEW_STATIC);
397 run_vlc[i].table = run_vlc_tables[i];
398 run_vlc[i].table_allocated = run_vlc_tables_size;
399 init_vlc(&run_vlc[i],
401 &run_len [i][0], 1, 1,
402 &run_bits[i][0], 1, 1,
403 INIT_VLC_USE_NEW_STATIC);
405 run7_vlc.table = run7_vlc_table,
406 run7_vlc.table_allocated = run7_vlc_table_size;
407 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
408 &run_len [6][0], 1, 1,
409 &run_bits[6][0], 1, 1,
410 INIT_VLC_USE_NEW_STATIC);
412 init_cavlc_level_tab();
416 static inline int get_level_prefix(GetBitContext *gb){
421 UPDATE_CACHE(re, gb);
422 buf=GET_CACHE(re, gb);
424 log= 32 - av_log2(buf);
426 print_bin(buf>>(32-log), log);
427 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 LAST_SKIP_BITS(re, gb, log);
431 CLOSE_READER(re, gb);
437 * Decode a residual block.
438 * @param n block index
439 * @param scantable scantable
440 * @param max_coeff number of coefficients in the block
441 * @return <0 if an error occurred
443 static int decode_residual(const H264Context *h, H264SliceContext *sl,
444 GetBitContext *gb, int16_t *block, int n,
445 const uint8_t *scantable, const uint32_t *qmul,
448 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
450 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
452 //FIXME put trailing_onex into the context
456 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
458 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
459 total_coeff= coeff_token>>2;
461 if(n >= LUMA_DC_BLOCK_INDEX){
462 total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
463 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
464 total_coeff= coeff_token>>2;
466 total_coeff= pred_non_zero_count(h, sl, n);
467 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
468 total_coeff= coeff_token>>2;
471 sl->non_zero_count_cache[scan8[n]] = total_coeff;
473 //FIXME set last_non_zero?
477 if(total_coeff > (unsigned)max_coeff) {
478 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
482 trailing_ones= coeff_token&3;
483 ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
484 av_assert2(total_coeff<=16);
486 i = show_bits(gb, 3);
487 skip_bits(gb, trailing_ones);
488 level[0] = 1-((i&4)>>1);
489 level[1] = 1-((i&2) );
490 level[2] = 1-((i&1)<<1);
492 if(trailing_ones<total_coeff) {
494 int suffix_length = total_coeff > 10 & trailing_ones < 3;
495 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
496 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
498 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
499 if(level_code >= 100){
500 prefix= level_code - 100;
501 if(prefix == LEVEL_TAB_BITS)
502 prefix += get_level_prefix(gb);
504 //first coefficient has suffix_length equal to 0 or 1
505 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
507 level_code= (prefix<<1) + get_bits1(gb); //part
509 level_code= prefix; //part
510 }else if(prefix==14){
512 level_code= (prefix<<1) + get_bits1(gb); //part
514 level_code= prefix + get_bits(gb, 4); //part
519 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
522 level_code += (1<<(prefix-3))-4096;
524 level_code += get_bits(gb, prefix-3); //part
527 if(trailing_ones < 3) level_code += 2;
530 mask= -(level_code&1);
531 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
533 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
535 suffix_length = 1 + (level_code + 3U > 6U);
536 level[trailing_ones]= level_code;
539 //remaining coefficients have suffix_length > 0
540 for(i=trailing_ones+1;i<total_coeff;i++) {
541 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
542 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
543 level_code= cavlc_level_tab[suffix_length][bitsi][0];
545 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
546 if(level_code >= 100){
547 prefix= level_code - 100;
548 if(prefix == LEVEL_TAB_BITS){
549 prefix += get_level_prefix(gb);
552 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
554 level_code = 15<<suffix_length;
557 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
558 return AVERROR_INVALIDDATA;
560 level_code += (1<<(prefix-3))-4096;
562 level_code += get_bits(gb, prefix-3);
564 mask= -(level_code&1);
565 level_code= (((2+level_code)>>1) ^ mask) - mask;
567 level[i]= level_code;
568 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
572 if(total_coeff == max_coeff)
575 if (max_coeff <= 8) {
577 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
578 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
580 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
581 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
583 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
587 #define STORE_BLOCK(type) \
588 scantable += zeros_left + total_coeff - 1; \
589 if(n >= LUMA_DC_BLOCK_INDEX){ \
590 ((type*)block)[*scantable] = level[0]; \
591 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
593 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
595 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
596 zeros_left -= run_before; \
597 scantable -= 1 + run_before; \
598 ((type*)block)[*scantable]= level[i]; \
600 for(;i<total_coeff;i++) { \
602 ((type*)block)[*scantable]= level[i]; \
605 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
606 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
608 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
610 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
611 zeros_left -= run_before; \
612 scantable -= 1 + run_before; \
613 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
615 for(;i<total_coeff;i++) { \
617 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
621 if (h->pixel_shift) {
628 av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
635 static av_always_inline
636 int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
637 GetBitContext *gb, const uint8_t *scan,
638 const uint8_t *scan8x8, int pixel_shift,
639 int mb_type, int cbp, int p)
642 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
643 if(IS_INTRA16x16(mb_type)){
644 AV_ZERO128(sl->mb_luma_dc[p]+0);
645 AV_ZERO128(sl->mb_luma_dc[p]+8);
646 AV_ZERO128(sl->mb_luma_dc[p]+16);
647 AV_ZERO128(sl->mb_luma_dc[p]+24);
648 if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
649 return -1; //FIXME continue if partitioned and other return -1 too
652 av_assert2((cbp&15) == 0 || (cbp&15) == 15);
655 for(i8x8=0; i8x8<4; i8x8++){
656 for(i4x4=0; i4x4<4; i4x4++){
657 const int index= i4x4 + 4*i8x8 + p*16;
658 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
659 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
666 fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
670 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
671 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
673 for(i8x8=0; i8x8<4; i8x8++){
675 if(IS_8x8DCT(mb_type)){
676 int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
678 for(i4x4=0; i4x4<4; i4x4++){
679 const int index= i4x4 + 4*i8x8 + p*16;
680 if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
681 h->dequant8_coeff[cqm][qscale], 16) < 0 )
684 nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
685 nnz[0] += nnz[1] + nnz[8] + nnz[9];
686 new_cbp |= !!nnz[0] << i8x8;
688 for(i4x4=0; i4x4<4; i4x4++){
689 const int index= i4x4 + 4*i8x8 + p*16;
690 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
691 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
694 new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
698 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
699 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
706 int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
710 unsigned int mb_type, cbp;
711 int dct8x8_allowed= h->pps.transform_8x8_mode;
712 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
713 const int pixel_shift = h->pixel_shift;
715 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
717 ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, sl->mb_x, sl->mb_y);
718 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
720 if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
721 if (sl->mb_skip_run == -1)
722 sl->mb_skip_run = get_ue_golomb_long(&sl->gb);
724 if (sl->mb_skip_run--) {
725 if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
726 if (sl->mb_skip_run == 0)
727 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
729 decode_mb_skip(h, sl);
733 if (FRAME_MBAFF(h)) {
734 if ((sl->mb_y & 1) == 0)
735 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
738 sl->prev_mb_skipped = 0;
740 mb_type= get_ue_golomb(&sl->gb);
741 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
743 partition_count= b_mb_type_info[mb_type].partition_count;
744 mb_type= b_mb_type_info[mb_type].type;
747 goto decode_intra_mb;
749 } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
751 partition_count= p_mb_type_info[mb_type].partition_count;
752 mb_type= p_mb_type_info[mb_type].type;
755 goto decode_intra_mb;
758 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
759 if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
763 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
767 cbp= i_mb_type_info[mb_type].cbp;
768 sl->intra16x16_pred_mode = i_mb_type_info[mb_type].pred_mode;
769 mb_type= i_mb_type_info[mb_type].type;
773 mb_type |= MB_TYPE_INTERLACED;
775 h->slice_table[mb_xy] = sl->slice_num;
777 if(IS_INTRA_PCM(mb_type)){
778 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
779 h->sps.bit_depth_luma;
781 // We assume these blocks are very rare so we do not optimize it.
782 sl->intra_pcm_ptr = align_get_bits(&sl->gb);
783 if (get_bits_left(&sl->gb) < mb_size) {
784 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
785 return AVERROR_INVALIDDATA;
787 skip_bits_long(&sl->gb, mb_size);
789 // In deblocking, the quantizer is 0
790 h->cur_pic.qscale_table[mb_xy] = 0;
791 // All coeffs are present
792 memset(h->non_zero_count[mb_xy], 16, 48);
794 h->cur_pic.mb_type[mb_xy] = mb_type;
798 fill_decode_neighbors(h, sl, mb_type);
799 fill_decode_caches(h, sl, mb_type);
802 if(IS_INTRA(mb_type)){
804 // init_top_left_availability(h);
805 if(IS_INTRA4x4(mb_type)){
808 if(dct8x8_allowed && get_bits1(&sl->gb)){
809 mb_type |= MB_TYPE_8x8DCT;
813 // fill_intra4x4_pred_table(h);
814 for(i=0; i<16; i+=di){
815 int mode = pred_intra_mode(h, sl, i);
817 if(!get_bits1(&sl->gb)){
818 const int rem_mode= get_bits(&sl->gb, 3);
819 mode = rem_mode + (rem_mode >= mode);
823 fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
825 sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
827 write_back_intra_pred_mode(h, sl);
828 if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
831 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, sl->intra16x16_pred_mode, 0);
832 if (sl->intra16x16_pred_mode < 0)
836 pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&sl->gb), 1);
839 sl->chroma_pred_mode = pred_mode;
841 sl->chroma_pred_mode = DC_128_PRED8x8;
843 }else if(partition_count==4){
844 int i, j, sub_partition_count[4], list, ref[2][4];
846 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
848 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
849 if(sl->sub_mb_type[i] >=13){
850 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
853 sub_partition_count[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
854 sl->sub_mb_type[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
856 if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
857 ff_h264_pred_direct_motion(h, sl, &mb_type);
858 sl->ref_cache[0][scan8[4]] =
859 sl->ref_cache[1][scan8[4]] =
860 sl->ref_cache[0][scan8[12]] =
861 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
864 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
866 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
867 if(sl->sub_mb_type[i] >=4){
868 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
871 sub_partition_count[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
872 sl->sub_mb_type[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
876 for (list = 0; list < sl->list_count; list++) {
877 int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
879 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
880 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
884 }else if(ref_count == 2){
885 tmp= get_bits1(&sl->gb)^1;
887 tmp= get_ue_golomb_31(&sl->gb);
889 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
902 dct8x8_allowed = get_dct8x8_allowed(h, sl);
904 for (list = 0; list < sl->list_count; list++) {
906 if(IS_DIRECT(sl->sub_mb_type[i])) {
907 sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
910 sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
911 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
913 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
914 const int sub_mb_type= sl->sub_mb_type[i];
915 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
916 for(j=0; j<sub_partition_count[i]; j++){
918 const int index= 4*i + block_width*j;
919 int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
920 pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
921 mx += get_se_golomb(&sl->gb);
922 my += get_se_golomb(&sl->gb);
923 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
925 if(IS_SUB_8X8(sub_mb_type)){
927 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
929 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
930 }else if(IS_SUB_8X4(sub_mb_type)){
931 mv_cache[ 1 ][0]= mx;
932 mv_cache[ 1 ][1]= my;
933 }else if(IS_SUB_4X8(sub_mb_type)){
934 mv_cache[ 8 ][0]= mx;
935 mv_cache[ 8 ][1]= my;
937 mv_cache[ 0 ][0]= mx;
938 mv_cache[ 0 ][1]= my;
941 uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
947 }else if(IS_DIRECT(mb_type)){
948 ff_h264_pred_direct_motion(h, sl, &mb_type);
949 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
952 //FIXME we should set ref_idx_l? to 0 if we use that later ...
953 if(IS_16X16(mb_type)){
954 for (list = 0; list < sl->list_count; list++) {
956 if(IS_DIR(mb_type, 0, list)){
957 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
960 } else if (rc == 2) {
961 val= get_bits1(&sl->gb)^1;
963 val= get_ue_golomb_31(&sl->gb);
965 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
969 fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
972 for (list = 0; list < sl->list_count; list++) {
973 if(IS_DIR(mb_type, 0, list)){
974 pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
975 mx += get_se_golomb(&sl->gb);
976 my += get_se_golomb(&sl->gb);
977 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
979 fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
983 else if(IS_16X8(mb_type)){
984 for (list = 0; list < sl->list_count; list++) {
987 if(IS_DIR(mb_type, i, list)){
988 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
991 } else if (rc == 2) {
992 val= get_bits1(&sl->gb)^1;
994 val= get_ue_golomb_31(&sl->gb);
996 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1001 val= LIST_NOT_USED&0xFF;
1002 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1005 for (list = 0; list < sl->list_count; list++) {
1008 if(IS_DIR(mb_type, i, list)){
1009 pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1010 mx += get_se_golomb(&sl->gb);
1011 my += get_se_golomb(&sl->gb);
1012 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1014 val= pack16to32(mx,my);
1017 fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1021 av_assert2(IS_8X16(mb_type));
1022 for (list = 0; list < sl->list_count; list++) {
1025 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1026 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
1029 } else if (rc == 2) {
1030 val= get_bits1(&sl->gb)^1;
1032 val= get_ue_golomb_31(&sl->gb);
1034 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1039 val= LIST_NOT_USED&0xFF;
1040 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1043 for (list = 0; list < sl->list_count; list++) {
1046 if(IS_DIR(mb_type, i, list)){
1047 pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1048 mx += get_se_golomb(&sl->gb);
1049 my += get_se_golomb(&sl->gb);
1050 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1052 val= pack16to32(mx,my);
1055 fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1061 if(IS_INTER(mb_type))
1062 write_back_motion(h, sl, mb_type);
1064 if(!IS_INTRA16x16(mb_type)){
1065 cbp= get_ue_golomb(&sl->gb);
1069 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1072 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1073 else cbp= golomb_to_inter_cbp [cbp];
1076 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1079 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1080 else cbp= golomb_to_inter_cbp_gray[cbp];
1083 if (!decode_chroma && cbp>15) {
1084 av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1085 return AVERROR_INVALIDDATA;
1089 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1090 mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1093 h->cbp_table[mb_xy]= cbp;
1094 h->cur_pic.mb_type[mb_xy] = mb_type;
1096 if(cbp || IS_INTRA16x16(mb_type)){
1097 int i4x4, i8x8, chroma_idx;
1100 GetBitContext *gb = &sl->gb;
1101 const uint8_t *scan, *scan8x8;
1102 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1104 if(IS_INTERLACED(mb_type)){
1105 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1106 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1108 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1109 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1112 dquant= get_se_golomb(&sl->gb);
1114 sl->qscale += dquant;
1116 if (((unsigned)sl->qscale) > max_qp){
1117 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1118 else sl->qscale -= max_qp+1;
1119 if (((unsigned)sl->qscale) > max_qp){
1120 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1125 sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
1126 sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
1128 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1131 h->cbp_table[mb_xy] |= ret << 12;
1133 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1136 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1140 const int num_c8x8 = h->sps.chroma_format_idc;
1143 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1144 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1145 CHROMA_DC_BLOCK_INDEX+chroma_idx,
1146 CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
1147 NULL, 4*num_c8x8) < 0) {
1153 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1154 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1155 int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1156 for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1157 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1158 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1159 if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1161 mb += 16 << pixel_shift;
1166 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1167 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1171 fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1172 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1173 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1175 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1176 write_back_non_zero_count(h, sl);