2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #define UNCHECKED_BITSTREAM_READER 1
34 #include "h264_mvpred.h"
37 #include "mpegutils.h"
38 #include "libavutil/avassert.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
89 static const uint8_t coeff_token_len[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * Get the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
289 const int index8= scan8[n];
290 const int left = sl->non_zero_count_cache[index8 - 1];
291 const int top = sl->non_zero_count_cache[index8 - 8];
294 if(i<64) i= (i+1)>>1;
296 ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
301 static av_cold void init_cavlc_level_tab(void){
305 for(suffix_length=0; suffix_length<7; suffix_length++){
306 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
307 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
309 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310 int level_code = (prefix << suffix_length) +
311 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
312 int mask = -(level_code&1);
313 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314 cavlc_level_tab[suffix_length][i][0]= level_code;
315 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
316 }else if(prefix + 1 <= LEVEL_TAB_BITS){
317 cavlc_level_tab[suffix_length][i][0]= prefix+100;
318 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
320 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
321 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
327 av_cold void ff_h264_decode_init_vlc(void){
335 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
336 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
337 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
338 &chroma_dc_coeff_token_len [0], 1, 1,
339 &chroma_dc_coeff_token_bits[0], 1, 1,
340 INIT_VLC_USE_NEW_STATIC);
342 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
343 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
344 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
345 &chroma422_dc_coeff_token_len [0], 1, 1,
346 &chroma422_dc_coeff_token_bits[0], 1, 1,
347 INIT_VLC_USE_NEW_STATIC);
351 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
352 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
353 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
354 &coeff_token_len [i][0], 1, 1,
355 &coeff_token_bits[i][0], 1, 1,
356 INIT_VLC_USE_NEW_STATIC);
357 offset += coeff_token_vlc_tables_size[i];
360 * This is a one time safety check to make sure that
361 * the packed static coeff_token_vlc table sizes
362 * were initialized correctly.
364 av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
367 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
368 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
369 init_vlc(&chroma_dc_total_zeros_vlc[i],
370 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
371 &chroma_dc_total_zeros_len [i][0], 1, 1,
372 &chroma_dc_total_zeros_bits[i][0], 1, 1,
373 INIT_VLC_USE_NEW_STATIC);
377 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
378 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
379 init_vlc(&chroma422_dc_total_zeros_vlc[i],
380 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
381 &chroma422_dc_total_zeros_len [i][0], 1, 1,
382 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383 INIT_VLC_USE_NEW_STATIC);
387 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
388 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
389 init_vlc(&total_zeros_vlc[i],
390 TOTAL_ZEROS_VLC_BITS, 16,
391 &total_zeros_len [i][0], 1, 1,
392 &total_zeros_bits[i][0], 1, 1,
393 INIT_VLC_USE_NEW_STATIC);
397 run_vlc[i].table = run_vlc_tables[i];
398 run_vlc[i].table_allocated = run_vlc_tables_size;
399 init_vlc(&run_vlc[i],
401 &run_len [i][0], 1, 1,
402 &run_bits[i][0], 1, 1,
403 INIT_VLC_USE_NEW_STATIC);
405 run7_vlc.table = run7_vlc_table,
406 run7_vlc.table_allocated = run7_vlc_table_size;
407 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
408 &run_len [6][0], 1, 1,
409 &run_bits[6][0], 1, 1,
410 INIT_VLC_USE_NEW_STATIC);
412 init_cavlc_level_tab();
416 static inline int get_level_prefix(GetBitContext *gb){
421 UPDATE_CACHE(re, gb);
422 buf=GET_CACHE(re, gb);
424 log= 32 - av_log2(buf);
426 LAST_SKIP_BITS(re, gb, log);
427 CLOSE_READER(re, gb);
433 * Decode a residual block.
434 * @param n block index
435 * @param scantable scantable
436 * @param max_coeff number of coefficients in the block
437 * @return <0 if an error occurred
439 static int decode_residual(const H264Context *h, H264SliceContext *sl,
440 GetBitContext *gb, int16_t *block, int n,
441 const uint8_t *scantable, const uint32_t *qmul,
444 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
446 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
448 //FIXME put trailing_onex into the context
452 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
454 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
455 total_coeff= coeff_token>>2;
457 if(n >= LUMA_DC_BLOCK_INDEX){
458 total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
459 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
460 total_coeff= coeff_token>>2;
462 total_coeff= pred_non_zero_count(h, sl, n);
463 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
464 total_coeff= coeff_token>>2;
467 sl->non_zero_count_cache[scan8[n]] = total_coeff;
469 //FIXME set last_non_zero?
473 if(total_coeff > (unsigned)max_coeff) {
474 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
478 trailing_ones= coeff_token&3;
479 ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
480 av_assert2(total_coeff<=16);
482 i = show_bits(gb, 3);
483 skip_bits(gb, trailing_ones);
484 level[0] = 1-((i&4)>>1);
485 level[1] = 1-((i&2) );
486 level[2] = 1-((i&1)<<1);
488 if(trailing_ones<total_coeff) {
490 int suffix_length = total_coeff > 10 & trailing_ones < 3;
491 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
492 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
494 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
495 if(level_code >= 100){
496 prefix= level_code - 100;
497 if(prefix == LEVEL_TAB_BITS)
498 prefix += get_level_prefix(gb);
500 //first coefficient has suffix_length equal to 0 or 1
501 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
503 level_code= (prefix<<1) + get_bits1(gb); //part
505 level_code= prefix; //part
506 }else if(prefix==14){
508 level_code= (prefix<<1) + get_bits1(gb); //part
510 level_code= prefix + get_bits(gb, 4); //part
515 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
518 level_code += (1<<(prefix-3))-4096;
520 level_code += get_bits(gb, prefix-3); //part
523 if(trailing_ones < 3) level_code += 2;
526 mask= -(level_code&1);
527 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
529 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
531 suffix_length = 1 + (level_code + 3U > 6U);
532 level[trailing_ones]= level_code;
535 //remaining coefficients have suffix_length > 0
536 for(i=trailing_ones+1;i<total_coeff;i++) {
537 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
538 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
539 level_code= cavlc_level_tab[suffix_length][bitsi][0];
541 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
542 if(level_code >= 100){
543 prefix= level_code - 100;
544 if(prefix == LEVEL_TAB_BITS){
545 prefix += get_level_prefix(gb);
548 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
550 level_code = 15<<suffix_length;
553 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
554 return AVERROR_INVALIDDATA;
556 level_code += (1<<(prefix-3))-4096;
558 level_code += get_bits(gb, prefix-3);
560 mask= -(level_code&1);
561 level_code= (((2+level_code)>>1) ^ mask) - mask;
563 level[i]= level_code;
564 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
568 if(total_coeff == max_coeff)
571 if (max_coeff <= 8) {
573 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
574 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
576 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
577 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
579 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
583 #define STORE_BLOCK(type) \
584 scantable += zeros_left + total_coeff - 1; \
585 if(n >= LUMA_DC_BLOCK_INDEX){ \
586 ((type*)block)[*scantable] = level[0]; \
587 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
589 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
591 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
592 zeros_left -= run_before; \
593 scantable -= 1 + run_before; \
594 ((type*)block)[*scantable]= level[i]; \
596 for(;i<total_coeff;i++) { \
598 ((type*)block)[*scantable]= level[i]; \
601 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
602 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
604 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
606 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
607 zeros_left -= run_before; \
608 scantable -= 1 + run_before; \
609 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
611 for(;i<total_coeff;i++) { \
613 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
617 if (h->pixel_shift) {
624 av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
631 static av_always_inline
632 int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
633 GetBitContext *gb, const uint8_t *scan,
634 const uint8_t *scan8x8, int pixel_shift,
635 int mb_type, int cbp, int p)
638 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
639 if(IS_INTRA16x16(mb_type)){
640 AV_ZERO128(sl->mb_luma_dc[p]+0);
641 AV_ZERO128(sl->mb_luma_dc[p]+8);
642 AV_ZERO128(sl->mb_luma_dc[p]+16);
643 AV_ZERO128(sl->mb_luma_dc[p]+24);
644 if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
645 return -1; //FIXME continue if partitioned and other return -1 too
648 av_assert2((cbp&15) == 0 || (cbp&15) == 15);
651 for(i8x8=0; i8x8<4; i8x8++){
652 for(i4x4=0; i4x4<4; i4x4++){
653 const int index= i4x4 + 4*i8x8 + p*16;
654 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
655 index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
662 fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
666 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
667 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
669 for(i8x8=0; i8x8<4; i8x8++){
671 if(IS_8x8DCT(mb_type)){
672 int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
674 for(i4x4=0; i4x4<4; i4x4++){
675 const int index= i4x4 + 4*i8x8 + p*16;
676 if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
677 h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 )
680 nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
681 nnz[0] += nnz[1] + nnz[8] + nnz[9];
682 new_cbp |= !!nnz[0] << i8x8;
684 for(i4x4=0; i4x4<4; i4x4++){
685 const int index= i4x4 + 4*i8x8 + p*16;
686 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
687 scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){
690 new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
694 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
695 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
702 int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
706 unsigned int mb_type, cbp;
707 int dct8x8_allowed= h->ps.pps->transform_8x8_mode;
708 int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2;
709 const int pixel_shift = h->pixel_shift;
711 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
713 ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y);
714 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
716 if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
717 if (sl->mb_skip_run == -1)
718 sl->mb_skip_run = get_ue_golomb_long(&sl->gb);
720 if (sl->mb_skip_run--) {
721 if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
722 if (sl->mb_skip_run == 0)
723 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
725 decode_mb_skip(h, sl);
729 if (FRAME_MBAFF(h)) {
730 if ((sl->mb_y & 1) == 0)
731 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
734 sl->prev_mb_skipped = 0;
736 mb_type= get_ue_golomb(&sl->gb);
737 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
739 partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
740 mb_type = ff_h264_b_mb_type_info[mb_type].type;
743 goto decode_intra_mb;
745 } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
747 partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
748 mb_type = ff_h264_p_mb_type_info[mb_type].type;
751 goto decode_intra_mb;
754 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
755 if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
759 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
763 cbp = ff_h264_i_mb_type_info[mb_type].cbp;
764 sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
765 mb_type = ff_h264_i_mb_type_info[mb_type].type;
769 mb_type |= MB_TYPE_INTERLACED;
771 h->slice_table[mb_xy] = sl->slice_num;
773 if(IS_INTRA_PCM(mb_type)){
774 const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] *
775 h->ps.sps->bit_depth_luma;
777 // We assume these blocks are very rare so we do not optimize it.
778 sl->intra_pcm_ptr = align_get_bits(&sl->gb);
779 if (get_bits_left(&sl->gb) < mb_size) {
780 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
781 return AVERROR_INVALIDDATA;
783 skip_bits_long(&sl->gb, mb_size);
785 // In deblocking, the quantizer is 0
786 h->cur_pic.qscale_table[mb_xy] = 0;
787 // All coeffs are present
788 memset(h->non_zero_count[mb_xy], 16, 48);
790 h->cur_pic.mb_type[mb_xy] = mb_type;
794 fill_decode_neighbors(h, sl, mb_type);
795 fill_decode_caches(h, sl, mb_type);
798 if(IS_INTRA(mb_type)){
800 // init_top_left_availability(h);
801 if(IS_INTRA4x4(mb_type)){
804 if(dct8x8_allowed && get_bits1(&sl->gb)){
805 mb_type |= MB_TYPE_8x8DCT;
809 // fill_intra4x4_pred_table(h);
810 for(i=0; i<16; i+=di){
811 int mode = pred_intra_mode(h, sl, i);
813 if(!get_bits1(&sl->gb)){
814 const int rem_mode= get_bits(&sl->gb, 3);
815 mode = rem_mode + (rem_mode >= mode);
819 fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
821 sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
823 write_back_intra_pred_mode(h, sl);
824 if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
825 sl->top_samples_available, sl->left_samples_available) < 0)
828 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
829 sl->left_samples_available, sl->intra16x16_pred_mode, 0);
830 if (sl->intra16x16_pred_mode < 0)
834 pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
835 sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
838 sl->chroma_pred_mode = pred_mode;
840 sl->chroma_pred_mode = DC_128_PRED8x8;
842 }else if(partition_count==4){
843 int i, j, sub_partition_count[4], list, ref[2][4];
845 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
847 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
848 if(sl->sub_mb_type[i] >=13){
849 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
852 sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
853 sl->sub_mb_type[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
855 if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
856 ff_h264_pred_direct_motion(h, sl, &mb_type);
857 sl->ref_cache[0][scan8[4]] =
858 sl->ref_cache[1][scan8[4]] =
859 sl->ref_cache[0][scan8[12]] =
860 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
863 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
865 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
866 if(sl->sub_mb_type[i] >=4){
867 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
870 sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
871 sl->sub_mb_type[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
875 for (list = 0; list < sl->list_count; list++) {
876 int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
878 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
879 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
883 }else if(ref_count == 2){
884 tmp= get_bits1(&sl->gb)^1;
886 tmp= get_ue_golomb_31(&sl->gb);
888 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
901 dct8x8_allowed = get_dct8x8_allowed(h, sl);
903 for (list = 0; list < sl->list_count; list++) {
905 if(IS_DIRECT(sl->sub_mb_type[i])) {
906 sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
909 sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
910 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
912 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
913 const int sub_mb_type= sl->sub_mb_type[i];
914 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
915 for(j=0; j<sub_partition_count[i]; j++){
917 const int index= 4*i + block_width*j;
918 int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
919 pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
920 mx += get_se_golomb(&sl->gb);
921 my += get_se_golomb(&sl->gb);
922 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
924 if(IS_SUB_8X8(sub_mb_type)){
926 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
928 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
929 }else if(IS_SUB_8X4(sub_mb_type)){
930 mv_cache[ 1 ][0]= mx;
931 mv_cache[ 1 ][1]= my;
932 }else if(IS_SUB_4X8(sub_mb_type)){
933 mv_cache[ 8 ][0]= mx;
934 mv_cache[ 8 ][1]= my;
936 mv_cache[ 0 ][0]= mx;
937 mv_cache[ 0 ][1]= my;
940 uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
946 }else if(IS_DIRECT(mb_type)){
947 ff_h264_pred_direct_motion(h, sl, &mb_type);
948 dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag;
951 //FIXME we should set ref_idx_l? to 0 if we use that later ...
952 if(IS_16X16(mb_type)){
953 for (list = 0; list < sl->list_count; list++) {
955 if(IS_DIR(mb_type, 0, list)){
956 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
959 } else if (rc == 2) {
960 val= get_bits1(&sl->gb)^1;
962 val= get_ue_golomb_31(&sl->gb);
964 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
968 fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
971 for (list = 0; list < sl->list_count; list++) {
972 if(IS_DIR(mb_type, 0, list)){
973 pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
974 mx += get_se_golomb(&sl->gb);
975 my += get_se_golomb(&sl->gb);
976 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
978 fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
982 else if(IS_16X8(mb_type)){
983 for (list = 0; list < sl->list_count; list++) {
986 if(IS_DIR(mb_type, i, list)){
987 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
990 } else if (rc == 2) {
991 val= get_bits1(&sl->gb)^1;
993 val= get_ue_golomb_31(&sl->gb);
995 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1000 val= LIST_NOT_USED&0xFF;
1001 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1004 for (list = 0; list < sl->list_count; list++) {
1007 if(IS_DIR(mb_type, i, list)){
1008 pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1009 mx += get_se_golomb(&sl->gb);
1010 my += get_se_golomb(&sl->gb);
1011 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1013 val= pack16to32(mx,my);
1016 fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1020 av_assert2(IS_8X16(mb_type));
1021 for (list = 0; list < sl->list_count; list++) {
1024 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1025 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
1028 } else if (rc == 2) {
1029 val= get_bits1(&sl->gb)^1;
1031 val= get_ue_golomb_31(&sl->gb);
1033 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1038 val= LIST_NOT_USED&0xFF;
1039 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1042 for (list = 0; list < sl->list_count; list++) {
1045 if(IS_DIR(mb_type, i, list)){
1046 pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1047 mx += get_se_golomb(&sl->gb);
1048 my += get_se_golomb(&sl->gb);
1049 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1051 val= pack16to32(mx,my);
1054 fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1060 if(IS_INTER(mb_type))
1061 write_back_motion(h, sl, mb_type);
1063 if(!IS_INTRA16x16(mb_type)){
1064 cbp= get_ue_golomb(&sl->gb);
1068 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1071 if (IS_INTRA4x4(mb_type))
1072 cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
1074 cbp = ff_h264_golomb_to_inter_cbp[cbp];
1077 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1080 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1081 else cbp= golomb_to_inter_cbp_gray[cbp];
1084 if (!decode_chroma && cbp>15) {
1085 av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1086 return AVERROR_INVALIDDATA;
1090 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1091 mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1094 h->cbp_table[mb_xy]= cbp;
1095 h->cur_pic.mb_type[mb_xy] = mb_type;
1097 if(cbp || IS_INTRA16x16(mb_type)){
1098 int i4x4, i8x8, chroma_idx;
1101 GetBitContext *gb = &sl->gb;
1102 const uint8_t *scan, *scan8x8;
1103 const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
1105 if(IS_INTERLACED(mb_type)){
1106 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1107 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1109 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1110 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1113 dquant= get_se_golomb(&sl->gb);
1115 sl->qscale += dquant;
1117 if (((unsigned)sl->qscale) > max_qp){
1118 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1119 else sl->qscale -= max_qp+1;
1120 if (((unsigned)sl->qscale) > max_qp){
1121 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1126 sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
1127 sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
1129 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1132 h->cbp_table[mb_xy] |= ret << 12;
1134 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1137 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1141 const int num_c8x8 = h->ps.sps->chroma_format_idc;
1144 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1145 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1146 CHROMA_DC_BLOCK_INDEX + chroma_idx,
1147 CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan,
1148 NULL, 4 * num_c8x8) < 0) {
1154 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1155 const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1156 int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1157 for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1158 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1159 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1160 if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1162 mb += 16 << pixel_shift;
1167 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1168 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1172 fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1173 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1174 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1176 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1177 write_back_non_zero_count(h, sl);