2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #define UNCHECKED_BITSTREAM_READER 1
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
37 #include "mpegutils.h"
38 #include "libavutil/avassert.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
89 static const uint8_t coeff_token_len[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * Get the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
289 const int index8= scan8[n];
290 const int left = sl->non_zero_count_cache[index8 - 1];
291 const int top = sl->non_zero_count_cache[index8 - 8];
294 if(i<64) i= (i+1)>>1;
296 tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
301 static av_cold void init_cavlc_level_tab(void){
305 for(suffix_length=0; suffix_length<7; suffix_length++){
306 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
307 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
309 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310 int level_code = (prefix << suffix_length) +
311 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
312 int mask = -(level_code&1);
313 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314 cavlc_level_tab[suffix_length][i][0]= level_code;
315 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
316 }else if(prefix + 1 <= LEVEL_TAB_BITS){
317 cavlc_level_tab[suffix_length][i][0]= prefix+100;
318 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
320 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
321 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
327 av_cold void ff_h264_decode_init_vlc(void){
335 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
336 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
337 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
338 &chroma_dc_coeff_token_len [0], 1, 1,
339 &chroma_dc_coeff_token_bits[0], 1, 1,
340 INIT_VLC_USE_NEW_STATIC);
342 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
343 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
344 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
345 &chroma422_dc_coeff_token_len [0], 1, 1,
346 &chroma422_dc_coeff_token_bits[0], 1, 1,
347 INIT_VLC_USE_NEW_STATIC);
351 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
352 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
353 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
354 &coeff_token_len [i][0], 1, 1,
355 &coeff_token_bits[i][0], 1, 1,
356 INIT_VLC_USE_NEW_STATIC);
357 offset += coeff_token_vlc_tables_size[i];
360 * This is a one time safety check to make sure that
361 * the packed static coeff_token_vlc table sizes
362 * were initialized correctly.
364 av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
367 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
368 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
369 init_vlc(&chroma_dc_total_zeros_vlc[i],
370 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
371 &chroma_dc_total_zeros_len [i][0], 1, 1,
372 &chroma_dc_total_zeros_bits[i][0], 1, 1,
373 INIT_VLC_USE_NEW_STATIC);
377 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
378 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
379 init_vlc(&chroma422_dc_total_zeros_vlc[i],
380 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
381 &chroma422_dc_total_zeros_len [i][0], 1, 1,
382 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383 INIT_VLC_USE_NEW_STATIC);
387 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
388 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
389 init_vlc(&total_zeros_vlc[i],
390 TOTAL_ZEROS_VLC_BITS, 16,
391 &total_zeros_len [i][0], 1, 1,
392 &total_zeros_bits[i][0], 1, 1,
393 INIT_VLC_USE_NEW_STATIC);
397 run_vlc[i].table = run_vlc_tables[i];
398 run_vlc[i].table_allocated = run_vlc_tables_size;
399 init_vlc(&run_vlc[i],
401 &run_len [i][0], 1, 1,
402 &run_bits[i][0], 1, 1,
403 INIT_VLC_USE_NEW_STATIC);
405 run7_vlc.table = run7_vlc_table,
406 run7_vlc.table_allocated = run7_vlc_table_size;
407 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
408 &run_len [6][0], 1, 1,
409 &run_bits[6][0], 1, 1,
410 INIT_VLC_USE_NEW_STATIC);
412 init_cavlc_level_tab();
419 static inline int get_level_prefix(GetBitContext *gb){
424 UPDATE_CACHE(re, gb);
425 buf=GET_CACHE(re, gb);
427 log= 32 - av_log2(buf);
429 print_bin(buf>>(32-log), log);
430 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
433 LAST_SKIP_BITS(re, gb, log);
434 CLOSE_READER(re, gb);
440 * Decode a residual block.
441 * @param n block index
442 * @param scantable scantable
443 * @param max_coeff number of coefficients in the block
444 * @return <0 if an error occurred
446 static int decode_residual(const H264Context *h, H264SliceContext *sl,
447 GetBitContext *gb, int16_t *block, int n,
448 const uint8_t *scantable, const uint32_t *qmul,
451 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
453 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
455 //FIXME put trailing_onex into the context
459 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
461 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
462 total_coeff= coeff_token>>2;
464 if(n >= LUMA_DC_BLOCK_INDEX){
465 total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
466 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467 total_coeff= coeff_token>>2;
469 total_coeff= pred_non_zero_count(h, sl, n);
470 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
471 total_coeff= coeff_token>>2;
474 sl->non_zero_count_cache[scan8[n]] = total_coeff;
476 //FIXME set last_non_zero?
480 if(total_coeff > (unsigned)max_coeff) {
481 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
485 trailing_ones= coeff_token&3;
486 tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
487 av_assert2(total_coeff<=16);
489 i = show_bits(gb, 3);
490 skip_bits(gb, trailing_ones);
491 level[0] = 1-((i&4)>>1);
492 level[1] = 1-((i&2) );
493 level[2] = 1-((i&1)<<1);
495 if(trailing_ones<total_coeff) {
497 int suffix_length = total_coeff > 10 & trailing_ones < 3;
498 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
499 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
501 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
502 if(level_code >= 100){
503 prefix= level_code - 100;
504 if(prefix == LEVEL_TAB_BITS)
505 prefix += get_level_prefix(gb);
507 //first coefficient has suffix_length equal to 0 or 1
508 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
510 level_code= (prefix<<1) + get_bits1(gb); //part
512 level_code= prefix; //part
513 }else if(prefix==14){
515 level_code= (prefix<<1) + get_bits1(gb); //part
517 level_code= prefix + get_bits(gb, 4); //part
522 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
525 level_code += (1<<(prefix-3))-4096;
527 level_code += get_bits(gb, prefix-3); //part
530 if(trailing_ones < 3) level_code += 2;
533 mask= -(level_code&1);
534 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
536 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
538 suffix_length = 1 + (level_code + 3U > 6U);
539 level[trailing_ones]= level_code;
542 //remaining coefficients have suffix_length > 0
543 for(i=trailing_ones+1;i<total_coeff;i++) {
544 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
545 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
546 level_code= cavlc_level_tab[suffix_length][bitsi][0];
548 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
549 if(level_code >= 100){
550 prefix= level_code - 100;
551 if(prefix == LEVEL_TAB_BITS){
552 prefix += get_level_prefix(gb);
555 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
557 level_code = 15<<suffix_length;
560 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
561 return AVERROR_INVALIDDATA;
563 level_code += (1<<(prefix-3))-4096;
565 level_code += get_bits(gb, prefix-3);
567 mask= -(level_code&1);
568 level_code= (((2+level_code)>>1) ^ mask) - mask;
570 level[i]= level_code;
571 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
575 if(total_coeff == max_coeff)
578 if (max_coeff <= 8) {
580 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
581 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
583 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
584 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
586 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
590 #define STORE_BLOCK(type) \
591 scantable += zeros_left + total_coeff - 1; \
592 if(n >= LUMA_DC_BLOCK_INDEX){ \
593 ((type*)block)[*scantable] = level[0]; \
594 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
596 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
598 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
599 zeros_left -= run_before; \
600 scantable -= 1 + run_before; \
601 ((type*)block)[*scantable]= level[i]; \
603 for(;i<total_coeff;i++) { \
605 ((type*)block)[*scantable]= level[i]; \
608 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
609 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
611 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
613 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
614 zeros_left -= run_before; \
615 scantable -= 1 + run_before; \
616 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
618 for(;i<total_coeff;i++) { \
620 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
624 if (h->pixel_shift) {
631 av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
638 static av_always_inline
639 int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
640 GetBitContext *gb, const uint8_t *scan,
641 const uint8_t *scan8x8, int pixel_shift,
642 int mb_type, int cbp, int p)
645 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
646 if(IS_INTRA16x16(mb_type)){
647 AV_ZERO128(sl->mb_luma_dc[p]+0);
648 AV_ZERO128(sl->mb_luma_dc[p]+8);
649 AV_ZERO128(sl->mb_luma_dc[p]+16);
650 AV_ZERO128(sl->mb_luma_dc[p]+24);
651 if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
652 return -1; //FIXME continue if partitioned and other return -1 too
655 av_assert2((cbp&15) == 0 || (cbp&15) == 15);
658 for(i8x8=0; i8x8<4; i8x8++){
659 for(i4x4=0; i4x4<4; i4x4++){
660 const int index= i4x4 + 4*i8x8 + p*16;
661 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
662 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
669 fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
673 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
674 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
676 for(i8x8=0; i8x8<4; i8x8++){
678 if(IS_8x8DCT(mb_type)){
679 int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
681 for(i4x4=0; i4x4<4; i4x4++){
682 const int index= i4x4 + 4*i8x8 + p*16;
683 if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
684 h->dequant8_coeff[cqm][qscale], 16) < 0 )
687 nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
688 nnz[0] += nnz[1] + nnz[8] + nnz[9];
689 new_cbp |= !!nnz[0] << i8x8;
691 for(i4x4=0; i4x4<4; i4x4++){
692 const int index= i4x4 + 4*i8x8 + p*16;
693 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
694 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
697 new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
701 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
702 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
709 int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
713 unsigned int mb_type, cbp;
714 int dct8x8_allowed= h->pps.transform_8x8_mode;
715 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
716 const int pixel_shift = h->pixel_shift;
717 unsigned local_ref_count[2];
719 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
721 tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, sl->mb_x, sl->mb_y);
722 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
724 if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
725 if (sl->mb_skip_run == -1)
726 sl->mb_skip_run = get_ue_golomb_long(&sl->gb);
728 if (sl->mb_skip_run--) {
729 if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
730 if (sl->mb_skip_run == 0)
731 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
733 decode_mb_skip(h, sl);
737 if (FRAME_MBAFF(h)) {
738 if ((sl->mb_y & 1) == 0)
739 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
742 sl->prev_mb_skipped = 0;
744 mb_type= get_ue_golomb(&sl->gb);
745 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
747 partition_count= b_mb_type_info[mb_type].partition_count;
748 mb_type= b_mb_type_info[mb_type].type;
751 goto decode_intra_mb;
753 } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
755 partition_count= p_mb_type_info[mb_type].partition_count;
756 mb_type= p_mb_type_info[mb_type].type;
759 goto decode_intra_mb;
762 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
763 if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
767 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
771 cbp= i_mb_type_info[mb_type].cbp;
772 sl->intra16x16_pred_mode = i_mb_type_info[mb_type].pred_mode;
773 mb_type= i_mb_type_info[mb_type].type;
777 mb_type |= MB_TYPE_INTERLACED;
779 h->slice_table[mb_xy] = sl->slice_num;
781 if(IS_INTRA_PCM(mb_type)){
782 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
783 h->sps.bit_depth_luma;
785 // We assume these blocks are very rare so we do not optimize it.
786 sl->intra_pcm_ptr = align_get_bits(&sl->gb);
787 if (get_bits_left(&sl->gb) < mb_size) {
788 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
789 return AVERROR_INVALIDDATA;
791 skip_bits_long(&sl->gb, mb_size);
793 // In deblocking, the quantizer is 0
794 h->cur_pic.qscale_table[mb_xy] = 0;
795 // All coeffs are present
796 memset(h->non_zero_count[mb_xy], 16, 48);
798 h->cur_pic.mb_type[mb_xy] = mb_type;
802 local_ref_count[0] = sl->ref_count[0] << MB_MBAFF(sl);
803 local_ref_count[1] = sl->ref_count[1] << MB_MBAFF(sl);
805 fill_decode_neighbors(h, sl, mb_type);
806 fill_decode_caches(h, sl, mb_type);
809 if(IS_INTRA(mb_type)){
811 // init_top_left_availability(h);
812 if(IS_INTRA4x4(mb_type)){
815 if(dct8x8_allowed && get_bits1(&sl->gb)){
816 mb_type |= MB_TYPE_8x8DCT;
820 // fill_intra4x4_pred_table(h);
821 for(i=0; i<16; i+=di){
822 int mode = pred_intra_mode(h, sl, i);
824 if(!get_bits1(&sl->gb)){
825 const int rem_mode= get_bits(&sl->gb, 3);
826 mode = rem_mode + (rem_mode >= mode);
830 fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
832 sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
834 write_back_intra_pred_mode(h, sl);
835 if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
838 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, sl->intra16x16_pred_mode, 0);
839 if (sl->intra16x16_pred_mode < 0)
843 pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&sl->gb), 1);
846 sl->chroma_pred_mode = pred_mode;
848 sl->chroma_pred_mode = DC_128_PRED8x8;
850 }else if(partition_count==4){
851 int i, j, sub_partition_count[4], list, ref[2][4];
853 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
855 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
856 if(sl->sub_mb_type[i] >=13){
857 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
860 sub_partition_count[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
861 sl->sub_mb_type[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
863 if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
864 ff_h264_pred_direct_motion(h, sl, &mb_type);
865 sl->ref_cache[0][scan8[4]] =
866 sl->ref_cache[1][scan8[4]] =
867 sl->ref_cache[0][scan8[12]] =
868 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
871 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
873 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
874 if(sl->sub_mb_type[i] >=4){
875 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
878 sub_partition_count[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
879 sl->sub_mb_type[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
883 for (list = 0; list < sl->list_count; list++) {
884 int ref_count = IS_REF0(mb_type) ? 1 : local_ref_count[list];
886 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
887 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
891 }else if(ref_count == 2){
892 tmp= get_bits1(&sl->gb)^1;
894 tmp= get_ue_golomb_31(&sl->gb);
896 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
909 dct8x8_allowed = get_dct8x8_allowed(h, sl);
911 for (list = 0; list < sl->list_count; list++) {
913 if(IS_DIRECT(sl->sub_mb_type[i])) {
914 sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
917 sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
918 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
920 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
921 const int sub_mb_type= sl->sub_mb_type[i];
922 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
923 for(j=0; j<sub_partition_count[i]; j++){
925 const int index= 4*i + block_width*j;
926 int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
927 pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
928 mx += get_se_golomb(&sl->gb);
929 my += get_se_golomb(&sl->gb);
930 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
932 if(IS_SUB_8X8(sub_mb_type)){
934 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
936 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
937 }else if(IS_SUB_8X4(sub_mb_type)){
938 mv_cache[ 1 ][0]= mx;
939 mv_cache[ 1 ][1]= my;
940 }else if(IS_SUB_4X8(sub_mb_type)){
941 mv_cache[ 8 ][0]= mx;
942 mv_cache[ 8 ][1]= my;
944 mv_cache[ 0 ][0]= mx;
945 mv_cache[ 0 ][1]= my;
948 uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
954 }else if(IS_DIRECT(mb_type)){
955 ff_h264_pred_direct_motion(h, sl, &mb_type);
956 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
959 //FIXME we should set ref_idx_l? to 0 if we use that later ...
960 if(IS_16X16(mb_type)){
961 for (list = 0; list < sl->list_count; list++) {
963 if(IS_DIR(mb_type, 0, list)){
964 if(local_ref_count[list]==1){
966 } else if(local_ref_count[list]==2){
967 val= get_bits1(&sl->gb)^1;
969 val= get_ue_golomb_31(&sl->gb);
970 if (val >= local_ref_count[list]){
971 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
975 fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
978 for (list = 0; list < sl->list_count; list++) {
979 if(IS_DIR(mb_type, 0, list)){
980 pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
981 mx += get_se_golomb(&sl->gb);
982 my += get_se_golomb(&sl->gb);
983 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
985 fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
989 else if(IS_16X8(mb_type)){
990 for (list = 0; list < sl->list_count; list++) {
993 if(IS_DIR(mb_type, i, list)){
994 if(local_ref_count[list] == 1) {
996 } else if(local_ref_count[list] == 2) {
997 val= get_bits1(&sl->gb)^1;
999 val= get_ue_golomb_31(&sl->gb);
1000 if (val >= local_ref_count[list]){
1001 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1006 val= LIST_NOT_USED&0xFF;
1007 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1010 for (list = 0; list < sl->list_count; list++) {
1013 if(IS_DIR(mb_type, i, list)){
1014 pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1015 mx += get_se_golomb(&sl->gb);
1016 my += get_se_golomb(&sl->gb);
1017 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1019 val= pack16to32(mx,my);
1022 fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1026 av_assert2(IS_8X16(mb_type));
1027 for (list = 0; list < sl->list_count; list++) {
1030 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1031 if(local_ref_count[list]==1){
1033 } else if(local_ref_count[list]==2){
1034 val= get_bits1(&sl->gb)^1;
1036 val= get_ue_golomb_31(&sl->gb);
1037 if (val >= local_ref_count[list]){
1038 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1043 val= LIST_NOT_USED&0xFF;
1044 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1047 for (list = 0; list < sl->list_count; list++) {
1050 if(IS_DIR(mb_type, i, list)){
1051 pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1052 mx += get_se_golomb(&sl->gb);
1053 my += get_se_golomb(&sl->gb);
1054 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1056 val= pack16to32(mx,my);
1059 fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1065 if(IS_INTER(mb_type))
1066 write_back_motion(h, sl, mb_type);
1068 if(!IS_INTRA16x16(mb_type)){
1069 cbp= get_ue_golomb(&sl->gb);
1073 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1076 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1077 else cbp= golomb_to_inter_cbp [cbp];
1080 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1083 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1084 else cbp= golomb_to_inter_cbp_gray[cbp];
1087 if (!decode_chroma && cbp>15) {
1088 av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1089 return AVERROR_INVALIDDATA;
1093 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1094 mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1097 h->cbp_table[mb_xy]= cbp;
1098 h->cur_pic.mb_type[mb_xy] = mb_type;
1100 if(cbp || IS_INTRA16x16(mb_type)){
1101 int i4x4, i8x8, chroma_idx;
1104 GetBitContext *gb = &sl->gb;
1105 const uint8_t *scan, *scan8x8;
1106 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1108 if(IS_INTERLACED(mb_type)){
1109 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1110 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1112 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1113 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1116 dquant= get_se_golomb(&sl->gb);
1118 sl->qscale += dquant;
1120 if (((unsigned)sl->qscale) > max_qp){
1121 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1122 else sl->qscale -= max_qp+1;
1123 if (((unsigned)sl->qscale) > max_qp){
1124 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1129 sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
1130 sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
1132 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1135 h->cbp_table[mb_xy] |= ret << 12;
1137 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1140 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1144 const int num_c8x8 = h->sps.chroma_format_idc;
1147 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1148 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1149 CHROMA_DC_BLOCK_INDEX+chroma_idx,
1150 CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
1151 NULL, 4*num_c8x8) < 0) {
1157 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1158 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1159 int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1160 for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1161 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1162 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1163 if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1165 mb += 16 << pixel_shift;
1170 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1171 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1175 fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1176 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1177 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1179 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1180 write_back_non_zero_count(h, sl);