2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
40 static const uint8_t golomb_to_inter_cbp_gray[16]={
41 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
44 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
45 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
48 static const uint8_t chroma_dc_coeff_token_len[4*5]={
56 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
64 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
76 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
88 static const uint8_t coeff_token_len[4][4*17]={
91 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
92 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
93 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
94 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
98 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
99 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
100 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
101 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
105 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
106 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
107 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
108 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
112 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
113 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
119 static const uint8_t coeff_token_bits[4][4*17]={
122 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
123 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
124 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
125 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
129 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
130 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
131 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
132 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
136 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
137 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
138 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
139 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
143 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
144 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
145 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
146 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
150 static const uint8_t total_zeros_len[16][16]= {
151 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
152 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
153 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
154 {5,3,4,4,3,3,3,4,3,4,5,5,5},
155 {4,4,4,3,3,3,3,3,4,5,4,5},
156 {6,5,3,3,3,3,3,3,4,3,6},
157 {6,5,3,3,3,2,3,4,3,6},
168 static const uint8_t total_zeros_bits[16][16]= {
169 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
170 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
171 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
172 {3,7,5,4,6,5,4,3,3,2,2,1,0},
173 {5,4,3,7,6,5,4,3,2,1,1,0},
174 {1,1,7,6,5,4,3,2,1,1,0},
175 {1,1,5,4,3,3,2,1,1,0},
186 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
192 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
198 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
199 { 1, 3, 3, 4, 4, 4, 5, 5 },
200 { 3, 2, 3, 3, 3, 3, 3 },
201 { 3, 3, 2, 2, 3, 3 },
208 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
209 { 1, 2, 3, 2, 3, 1, 1, 0 },
210 { 0, 1, 1, 4, 5, 6, 7 },
211 { 0, 1, 1, 2, 6, 7 },
218 static const uint8_t run_len[7][16]={
225 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
228 static const uint8_t run_bits[7][16]={
235 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
238 static VLC coeff_token_vlc[4];
239 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
240 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 static VLC chroma_dc_coeff_token_vlc;
243 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
244 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246 static VLC chroma422_dc_coeff_token_vlc;
247 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
248 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250 static VLC total_zeros_vlc[15];
251 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
252 static const int total_zeros_vlc_tables_size = 512;
254 static VLC chroma_dc_total_zeros_vlc[3];
255 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
256 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258 static VLC chroma422_dc_total_zeros_vlc[7];
259 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
260 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262 static VLC run_vlc[6];
263 static VLC_TYPE run_vlc_tables[6][8][2];
264 static const int run_vlc_tables_size = 8;
267 static VLC_TYPE run7_vlc_table[96][2];
268 static const int run7_vlc_table_size = 96;
270 #define LEVEL_TAB_BITS 8
271 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
274 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
275 #define COEFF_TOKEN_VLC_BITS 8
276 #define TOTAL_ZEROS_VLC_BITS 9
277 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
278 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
279 #define RUN_VLC_BITS 3
280 #define RUN7_VLC_BITS 6
283 * Get the predicted number of non-zero coefficients.
284 * @param n block index
286 static inline int pred_non_zero_count(H264Context *h, int n){
287 const int index8= scan8[n];
288 const int left= h->non_zero_count_cache[index8 - 1];
289 const int top = h->non_zero_count_cache[index8 - 8];
292 if(i<64) i= (i+1)>>1;
294 tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
299 static av_cold void init_cavlc_level_tab(void){
303 for(suffix_length=0; suffix_length<7; suffix_length++){
304 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
305 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
308 int level_code = (prefix << suffix_length) +
309 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
310 int mask = -(level_code&1);
311 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
312 cavlc_level_tab[suffix_length][i][0]= level_code;
313 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314 }else if(prefix + 1 <= LEVEL_TAB_BITS){
315 cavlc_level_tab[suffix_length][i][0]= prefix+100;
316 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
319 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
325 av_cold void ff_h264_decode_init_vlc(void){
333 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
334 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
335 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
336 &chroma_dc_coeff_token_len [0], 1, 1,
337 &chroma_dc_coeff_token_bits[0], 1, 1,
338 INIT_VLC_USE_NEW_STATIC);
340 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
341 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
342 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
343 &chroma422_dc_coeff_token_len [0], 1, 1,
344 &chroma422_dc_coeff_token_bits[0], 1, 1,
345 INIT_VLC_USE_NEW_STATIC);
349 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
350 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
351 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
352 &coeff_token_len [i][0], 1, 1,
353 &coeff_token_bits[i][0], 1, 1,
354 INIT_VLC_USE_NEW_STATIC);
355 offset += coeff_token_vlc_tables_size[i];
358 * This is a one time safety check to make sure that
359 * the packed static coeff_token_vlc table sizes
360 * were initialized correctly.
362 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
365 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
366 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
367 init_vlc(&chroma_dc_total_zeros_vlc[i],
368 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
369 &chroma_dc_total_zeros_len [i][0], 1, 1,
370 &chroma_dc_total_zeros_bits[i][0], 1, 1,
371 INIT_VLC_USE_NEW_STATIC);
375 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
376 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
377 init_vlc(&chroma422_dc_total_zeros_vlc[i],
378 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
379 &chroma422_dc_total_zeros_len [i][0], 1, 1,
380 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
381 INIT_VLC_USE_NEW_STATIC);
385 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
386 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
387 init_vlc(&total_zeros_vlc[i],
388 TOTAL_ZEROS_VLC_BITS, 16,
389 &total_zeros_len [i][0], 1, 1,
390 &total_zeros_bits[i][0], 1, 1,
391 INIT_VLC_USE_NEW_STATIC);
395 run_vlc[i].table = run_vlc_tables[i];
396 run_vlc[i].table_allocated = run_vlc_tables_size;
397 init_vlc(&run_vlc[i],
399 &run_len [i][0], 1, 1,
400 &run_bits[i][0], 1, 1,
401 INIT_VLC_USE_NEW_STATIC);
403 run7_vlc.table = run7_vlc_table,
404 run7_vlc.table_allocated = run7_vlc_table_size;
405 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
406 &run_len [6][0], 1, 1,
407 &run_bits[6][0], 1, 1,
408 INIT_VLC_USE_NEW_STATIC);
410 init_cavlc_level_tab();
417 static inline int get_level_prefix(GetBitContext *gb){
422 UPDATE_CACHE(re, gb);
423 buf=GET_CACHE(re, gb);
425 log= 32 - av_log2(buf);
427 print_bin(buf>>(32-log), log);
428 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
431 LAST_SKIP_BITS(re, gb, log);
432 CLOSE_READER(re, gb);
438 * Decode a residual block.
439 * @param n block index
440 * @param scantable scantable
441 * @param max_coeff number of coefficients in the block
442 * @return <0 if an error occurred
444 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
445 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449 //FIXME put trailing_onex into the context
453 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
456 total_coeff= coeff_token>>2;
458 if(n >= LUMA_DC_BLOCK_INDEX){
459 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
460 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
461 total_coeff= coeff_token>>2;
463 total_coeff= pred_non_zero_count(h, n);
464 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
465 total_coeff= coeff_token>>2;
468 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470 //FIXME set last_non_zero?
474 if(total_coeff > (unsigned)max_coeff) {
475 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
479 trailing_ones= coeff_token&3;
480 tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
481 assert(total_coeff<=16);
483 i = show_bits(gb, 3);
484 skip_bits(gb, trailing_ones);
485 level[0] = 1-((i&4)>>1);
486 level[1] = 1-((i&2) );
487 level[2] = 1-((i&1)<<1);
489 if(trailing_ones<total_coeff) {
491 int suffix_length = total_coeff > 10 & trailing_ones < 3;
492 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
493 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
496 if(level_code >= 100){
497 prefix= level_code - 100;
498 if(prefix == LEVEL_TAB_BITS)
499 prefix += get_level_prefix(gb);
501 //first coefficient has suffix_length equal to 0 or 1
502 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504 level_code= (prefix<<1) + get_bits1(gb); //part
506 level_code= prefix; //part
507 }else if(prefix==14){
509 level_code= (prefix<<1) + get_bits1(gb); //part
511 level_code= prefix + get_bits(gb, 4); //part
513 level_code= 30 + get_bits(gb, prefix-3); //part
516 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
519 level_code += (1<<(prefix-3))-4096;
523 if(trailing_ones < 3) level_code += 2;
526 mask= -(level_code&1);
527 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
529 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
531 suffix_length = 1 + (level_code + 3U > 6U);
532 level[trailing_ones]= level_code;
535 //remaining coefficients have suffix_length > 0
536 for(i=trailing_ones+1;i<total_coeff;i++) {
537 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
538 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
539 level_code= cavlc_level_tab[suffix_length][bitsi][0];
541 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
542 if(level_code >= 100){
543 prefix= level_code - 100;
544 if(prefix == LEVEL_TAB_BITS){
545 prefix += get_level_prefix(gb);
548 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
550 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
552 level_code += (1<<(prefix-3))-4096;
554 mask= -(level_code&1);
555 level_code= (((2+level_code)>>1) ^ mask) - mask;
557 level[i]= level_code;
558 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
562 if(total_coeff == max_coeff)
565 if (max_coeff <= 8) {
567 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
568 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
570 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
571 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
573 zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
577 #define STORE_BLOCK(type) \
578 scantable += zeros_left + total_coeff - 1; \
579 if(n >= LUMA_DC_BLOCK_INDEX){ \
580 ((type*)block)[*scantable] = level[0]; \
581 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
583 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
585 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
586 zeros_left -= run_before; \
587 scantable -= 1 + run_before; \
588 ((type*)block)[*scantable]= level[i]; \
590 for(;i<total_coeff;i++) { \
592 ((type*)block)[*scantable]= level[i]; \
595 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
596 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
598 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
600 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
601 zeros_left -= run_before; \
602 scantable -= 1 + run_before; \
603 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
605 for(;i<total_coeff;i++) { \
607 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
611 if (zeros_left < 0) {
612 av_log(h->avctx, AV_LOG_ERROR,
613 "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
614 return AVERROR_INVALIDDATA;
617 if (h->pixel_shift) {
626 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628 int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
629 if(IS_INTRA16x16(mb_type)){
630 AV_ZERO128(h->mb_luma_dc[p]+0);
631 AV_ZERO128(h->mb_luma_dc[p]+8);
632 AV_ZERO128(h->mb_luma_dc[p]+16);
633 AV_ZERO128(h->mb_luma_dc[p]+24);
634 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
635 return -1; //FIXME continue if partitioned and other return -1 too
638 assert((cbp&15) == 0 || (cbp&15) == 15);
641 for(i8x8=0; i8x8<4; i8x8++){
642 for(i4x4=0; i4x4<4; i4x4++){
643 const int index= i4x4 + 4*i8x8 + p*16;
644 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
645 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
652 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
656 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
657 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
659 for(i8x8=0; i8x8<4; i8x8++){
661 if(IS_8x8DCT(mb_type)){
662 int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
664 for(i4x4=0; i4x4<4; i4x4++){
665 const int index= i4x4 + 4*i8x8 + p*16;
666 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
667 h->dequant8_coeff[cqm][qscale], 16) < 0 )
670 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
671 nnz[0] += nnz[1] + nnz[8] + nnz[9];
672 new_cbp |= !!nnz[0] << i8x8;
674 for(i4x4=0; i4x4<4; i4x4++){
675 const int index= i4x4 + 4*i8x8 + p*16;
676 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
677 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
680 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
684 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
685 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
692 int ff_h264_decode_mb_cavlc(H264Context *h){
695 unsigned int mb_type, cbp;
696 int dct8x8_allowed= h->pps.transform_8x8_mode;
697 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
698 const int pixel_shift = h->pixel_shift;
700 mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
702 tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
703 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
705 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
706 if(h->mb_skip_run==-1)
707 h->mb_skip_run= get_ue_golomb(&h->gb);
709 if (h->mb_skip_run--) {
710 if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
711 if(h->mb_skip_run==0)
712 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
718 if (FRAME_MBAFF(h)) {
719 if( (h->mb_y&1) == 0 )
720 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
723 h->prev_mb_skipped= 0;
725 mb_type= get_ue_golomb(&h->gb);
726 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
728 partition_count= b_mb_type_info[mb_type].partition_count;
729 mb_type= b_mb_type_info[mb_type].type;
732 goto decode_intra_mb;
734 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
736 partition_count= p_mb_type_info[mb_type].partition_count;
737 mb_type= p_mb_type_info[mb_type].type;
740 goto decode_intra_mb;
743 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
744 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
748 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
752 cbp= i_mb_type_info[mb_type].cbp;
753 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
754 mb_type= i_mb_type_info[mb_type].type;
758 mb_type |= MB_TYPE_INTERLACED;
760 h->slice_table[ mb_xy ]= h->slice_num;
762 if(IS_INTRA_PCM(mb_type)){
763 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
764 h->sps.bit_depth_luma;
766 // We assume these blocks are very rare so we do not optimize it.
767 h->intra_pcm_ptr = align_get_bits(&h->gb);
768 if (get_bits_left(&h->gb) < mb_size) {
769 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
770 return AVERROR_INVALIDDATA;
772 skip_bits_long(&h->gb, mb_size);
774 // In deblocking, the quantizer is 0
775 h->cur_pic.qscale_table[mb_xy] = 0;
776 // All coeffs are present
777 memset(h->non_zero_count[mb_xy], 16, 48);
779 h->cur_pic.mb_type[mb_xy] = mb_type;
783 fill_decode_neighbors(h, mb_type);
784 fill_decode_caches(h, mb_type);
787 if(IS_INTRA(mb_type)){
789 // init_top_left_availability(h);
790 if(IS_INTRA4x4(mb_type)){
793 if(dct8x8_allowed && get_bits1(&h->gb)){
794 mb_type |= MB_TYPE_8x8DCT;
798 // fill_intra4x4_pred_table(h);
799 for(i=0; i<16; i+=di){
800 int mode= pred_intra_mode(h, i);
802 if(!get_bits1(&h->gb)){
803 const int rem_mode= get_bits(&h->gb, 3);
804 mode = rem_mode + (rem_mode >= mode);
808 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
810 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
812 write_back_intra_pred_mode(h);
813 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
816 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
817 if(h->intra16x16_pred_mode < 0)
821 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
824 h->chroma_pred_mode= pred_mode;
826 h->chroma_pred_mode = DC_128_PRED8x8;
828 }else if(partition_count==4){
829 int i, j, sub_partition_count[4], list, ref[2][4];
831 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
833 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
834 if(h->sub_mb_type[i] >=13){
835 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
838 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
839 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
841 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
842 ff_h264_pred_direct_motion(h, &mb_type);
843 h->ref_cache[0][scan8[4]] =
844 h->ref_cache[1][scan8[4]] =
845 h->ref_cache[0][scan8[12]] =
846 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
849 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
851 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
852 if(h->sub_mb_type[i] >=4){
853 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
856 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
857 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
861 for(list=0; list<h->list_count; list++){
862 int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF(h);
864 if(IS_DIRECT(h->sub_mb_type[i])) continue;
865 if(IS_DIR(h->sub_mb_type[i], 0, list)){
869 }else if(ref_count == 2){
870 tmp= get_bits1(&h->gb)^1;
872 tmp= get_ue_golomb_31(&h->gb);
874 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
887 dct8x8_allowed = get_dct8x8_allowed(h);
889 for(list=0; list<h->list_count; list++){
891 if(IS_DIRECT(h->sub_mb_type[i])) {
892 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
895 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
896 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
898 if(IS_DIR(h->sub_mb_type[i], 0, list)){
899 const int sub_mb_type= h->sub_mb_type[i];
900 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
901 for(j=0; j<sub_partition_count[i]; j++){
903 const int index= 4*i + block_width*j;
904 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
905 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
906 mx += get_se_golomb(&h->gb);
907 my += get_se_golomb(&h->gb);
908 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
910 if(IS_SUB_8X8(sub_mb_type)){
912 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
914 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
915 }else if(IS_SUB_8X4(sub_mb_type)){
916 mv_cache[ 1 ][0]= mx;
917 mv_cache[ 1 ][1]= my;
918 }else if(IS_SUB_4X8(sub_mb_type)){
919 mv_cache[ 8 ][0]= mx;
920 mv_cache[ 8 ][1]= my;
922 mv_cache[ 0 ][0]= mx;
923 mv_cache[ 0 ][1]= my;
926 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
932 }else if(IS_DIRECT(mb_type)){
933 ff_h264_pred_direct_motion(h, &mb_type);
934 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
937 //FIXME we should set ref_idx_l? to 0 if we use that later ...
938 if(IS_16X16(mb_type)){
939 for(list=0; list<h->list_count; list++){
941 if(IS_DIR(mb_type, 0, list)){
942 int rc = h->ref_count[list] << MB_MBAFF(h);
945 } else if (rc == 2) {
946 val= get_bits1(&h->gb)^1;
948 val= get_ue_golomb_31(&h->gb);
950 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
954 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
957 for(list=0; list<h->list_count; list++){
958 if(IS_DIR(mb_type, 0, list)){
959 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
960 mx += get_se_golomb(&h->gb);
961 my += get_se_golomb(&h->gb);
962 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
964 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
968 else if(IS_16X8(mb_type)){
969 for(list=0; list<h->list_count; list++){
972 if(IS_DIR(mb_type, i, list)){
973 int rc = h->ref_count[list] << MB_MBAFF(h);
976 } else if (rc == 2) {
977 val= get_bits1(&h->gb)^1;
979 val= get_ue_golomb_31(&h->gb);
981 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
986 val= LIST_NOT_USED&0xFF;
987 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
990 for(list=0; list<h->list_count; list++){
993 if(IS_DIR(mb_type, i, list)){
994 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
995 mx += get_se_golomb(&h->gb);
996 my += get_se_golomb(&h->gb);
997 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
999 val= pack16to32(mx,my);
1002 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1006 assert(IS_8X16(mb_type));
1007 for(list=0; list<h->list_count; list++){
1010 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1011 int rc = h->ref_count[list] << MB_MBAFF(h);
1014 } else if (rc == 2) {
1015 val= get_bits1(&h->gb)^1;
1017 val= get_ue_golomb_31(&h->gb);
1019 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1024 val= LIST_NOT_USED&0xFF;
1025 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1028 for(list=0; list<h->list_count; list++){
1031 if(IS_DIR(mb_type, i, list)){
1032 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1033 mx += get_se_golomb(&h->gb);
1034 my += get_se_golomb(&h->gb);
1035 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1037 val= pack16to32(mx,my);
1040 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1046 if(IS_INTER(mb_type))
1047 write_back_motion(h, mb_type);
1049 if(!IS_INTRA16x16(mb_type)){
1050 cbp= get_ue_golomb(&h->gb);
1054 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1057 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1058 else cbp= golomb_to_inter_cbp [cbp];
1061 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1064 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1065 else cbp= golomb_to_inter_cbp_gray[cbp];
1069 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1070 mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1073 h->cbp_table[mb_xy]= cbp;
1074 h->cur_pic.mb_type[mb_xy] = mb_type;
1076 if(cbp || IS_INTRA16x16(mb_type)){
1077 int i4x4, i8x8, chroma_idx;
1080 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1081 const uint8_t *scan, *scan8x8;
1082 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1084 if(IS_INTERLACED(mb_type)){
1085 scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1086 scan= h->qscale ? h->field_scan : h->field_scan_q0;
1088 scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1089 scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1092 dquant= get_se_golomb(&h->gb);
1094 h->qscale += dquant;
1096 if(((unsigned)h->qscale) > max_qp){
1097 if(h->qscale<0) h->qscale+= max_qp+1;
1098 else h->qscale-= max_qp+1;
1099 if(((unsigned)h->qscale) > max_qp){
1100 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1105 h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1106 h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1108 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1111 h->cbp_table[mb_xy] |= ret << 12;
1113 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1116 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1119 } else if (CHROMA422(h)) {
1121 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1122 if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1123 CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1130 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1131 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1132 int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1133 for (i8x8 = 0; i8x8 < 2; i8x8++) {
1134 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1135 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1136 if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1138 mb += 16 << pixel_shift;
1143 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1144 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1146 } else /* yuv420 */ {
1148 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1149 if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1155 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1156 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1157 for(i4x4=0; i4x4<4; i4x4++){
1158 const int index= 16 + 16*chroma_idx + i4x4;
1159 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1165 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1166 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1170 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1171 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1172 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1174 h->cur_pic.qscale_table[mb_xy] = h->qscale;
1175 write_back_non_zero_count(h);