2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #define UNCHECKED_BITSTREAM_READER 1
33 #include "mpegvideo.h"
35 #include "h264data.h" // FIXME FIXME FIXME
36 #include "h264_mvpred.h"
38 #include "libavutil/avassert.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
89 static const uint8_t coeff_token_len[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * Get the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(H264Context *h, int n){
288 const int index8= scan8[n];
289 const int left= h->non_zero_count_cache[index8 - 1];
290 const int top = h->non_zero_count_cache[index8 - 8];
293 if(i<64) i= (i+1)>>1;
295 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
300 static av_cold void init_cavlc_level_tab(void){
304 for(suffix_length=0; suffix_length<7; suffix_length++){
305 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309 int level_code = (prefix << suffix_length) +
310 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311 int mask = -(level_code&1);
312 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313 cavlc_level_tab[suffix_length][i][0]= level_code;
314 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315 }else if(prefix + 1 <= LEVEL_TAB_BITS){
316 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
326 av_cold void ff_h264_decode_init_vlc(void){
334 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337 &chroma_dc_coeff_token_len [0], 1, 1,
338 &chroma_dc_coeff_token_bits[0], 1, 1,
339 INIT_VLC_USE_NEW_STATIC);
341 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344 &chroma422_dc_coeff_token_len [0], 1, 1,
345 &chroma422_dc_coeff_token_bits[0], 1, 1,
346 INIT_VLC_USE_NEW_STATIC);
350 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353 &coeff_token_len [i][0], 1, 1,
354 &coeff_token_bits[i][0], 1, 1,
355 INIT_VLC_USE_NEW_STATIC);
356 offset += coeff_token_vlc_tables_size[i];
359 * This is a one time safety check to make sure that
360 * the packed static coeff_token_vlc table sizes
361 * were initialized correctly.
363 av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
366 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368 init_vlc(&chroma_dc_total_zeros_vlc[i],
369 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370 &chroma_dc_total_zeros_len [i][0], 1, 1,
371 &chroma_dc_total_zeros_bits[i][0], 1, 1,
372 INIT_VLC_USE_NEW_STATIC);
376 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378 init_vlc(&chroma422_dc_total_zeros_vlc[i],
379 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380 &chroma422_dc_total_zeros_len [i][0], 1, 1,
381 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382 INIT_VLC_USE_NEW_STATIC);
386 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388 init_vlc(&total_zeros_vlc[i],
389 TOTAL_ZEROS_VLC_BITS, 16,
390 &total_zeros_len [i][0], 1, 1,
391 &total_zeros_bits[i][0], 1, 1,
392 INIT_VLC_USE_NEW_STATIC);
396 run_vlc[i].table = run_vlc_tables[i];
397 run_vlc[i].table_allocated = run_vlc_tables_size;
398 init_vlc(&run_vlc[i],
400 &run_len [i][0], 1, 1,
401 &run_bits[i][0], 1, 1,
402 INIT_VLC_USE_NEW_STATIC);
404 run7_vlc.table = run7_vlc_table,
405 run7_vlc.table_allocated = run7_vlc_table_size;
406 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407 &run_len [6][0], 1, 1,
408 &run_bits[6][0], 1, 1,
409 INIT_VLC_USE_NEW_STATIC);
411 init_cavlc_level_tab();
418 static inline int get_level_prefix(GetBitContext *gb){
423 UPDATE_CACHE(re, gb);
424 buf=GET_CACHE(re, gb);
426 log= 32 - av_log2(buf);
428 print_bin(buf>>(32-log), log);
429 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
432 LAST_SKIP_BITS(re, gb, log);
433 CLOSE_READER(re, gb);
439 * Decode a residual block.
440 * @param n block index
441 * @param scantable scantable
442 * @param max_coeff number of coefficients in the block
443 * @return <0 if an error occurred
445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446 MpegEncContext * const s = &h->s;
447 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
449 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
451 //FIXME put trailing_onex into the context
455 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
457 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458 total_coeff= coeff_token>>2;
460 if(n >= LUMA_DC_BLOCK_INDEX){
461 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
462 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463 total_coeff= coeff_token>>2;
465 total_coeff= pred_non_zero_count(h, n);
466 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467 total_coeff= coeff_token>>2;
470 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
472 //FIXME set last_non_zero?
476 if(total_coeff > (unsigned)max_coeff) {
477 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
481 trailing_ones= coeff_token&3;
482 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483 av_assert2(total_coeff<=16);
485 i = show_bits(gb, 3);
486 skip_bits(gb, trailing_ones);
487 level[0] = 1-((i&4)>>1);
488 level[1] = 1-((i&2) );
489 level[2] = 1-((i&1)<<1);
491 if(trailing_ones<total_coeff) {
493 int suffix_length = total_coeff > 10 & trailing_ones < 3;
494 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
497 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498 if(level_code >= 100){
499 prefix= level_code - 100;
500 if(prefix == LEVEL_TAB_BITS)
501 prefix += get_level_prefix(gb);
503 //first coefficient has suffix_length equal to 0 or 1
504 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
506 level_code= (prefix<<1) + get_bits1(gb); //part
508 level_code= prefix; //part
509 }else if(prefix==14){
511 level_code= (prefix<<1) + get_bits1(gb); //part
513 level_code= prefix + get_bits(gb, 4); //part
518 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
521 level_code += (1<<(prefix-3))-4096;
523 level_code += get_bits(gb, prefix-3); //part
526 if(trailing_ones < 3) level_code += 2;
529 mask= -(level_code&1);
530 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
532 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
534 suffix_length = 1 + (level_code + 3U > 6U);
535 level[trailing_ones]= level_code;
538 //remaining coefficients have suffix_length > 0
539 for(i=trailing_ones+1;i<total_coeff;i++) {
540 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
541 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
542 level_code= cavlc_level_tab[suffix_length][bitsi][0];
544 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
545 if(level_code >= 100){
546 prefix= level_code - 100;
547 if(prefix == LEVEL_TAB_BITS){
548 prefix += get_level_prefix(gb);
551 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
553 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
555 level_code += (1<<(prefix-3))-4096;
557 mask= -(level_code&1);
558 level_code= (((2+level_code)>>1) ^ mask) - mask;
560 level[i]= level_code;
561 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
565 if(total_coeff == max_coeff)
568 if (max_coeff <= 8) {
570 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
571 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
573 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
574 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
576 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
580 #define STORE_BLOCK(type) \
581 scantable += zeros_left + total_coeff - 1; \
582 if(n >= LUMA_DC_BLOCK_INDEX){ \
583 ((type*)block)[*scantable] = level[0]; \
584 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
586 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
588 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
589 zeros_left -= run_before; \
590 scantable -= 1 + run_before; \
591 ((type*)block)[*scantable]= level[i]; \
593 for(;i<total_coeff;i++) { \
595 ((type*)block)[*scantable]= level[i]; \
598 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
599 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
601 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
603 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
604 zeros_left -= run_before; \
605 scantable -= 1 + run_before; \
606 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
608 for(;i<total_coeff;i++) { \
610 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
614 if (h->pixel_shift) {
621 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
628 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
630 MpegEncContext * const s = &h->s;
631 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
632 if(IS_INTRA16x16(mb_type)){
633 AV_ZERO128(h->mb_luma_dc[p]+0);
634 AV_ZERO128(h->mb_luma_dc[p]+8);
635 AV_ZERO128(h->mb_luma_dc[p]+16);
636 AV_ZERO128(h->mb_luma_dc[p]+24);
637 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
638 return -1; //FIXME continue if partitioned and other return -1 too
641 av_assert2((cbp&15) == 0 || (cbp&15) == 15);
644 for(i8x8=0; i8x8<4; i8x8++){
645 for(i4x4=0; i4x4<4; i4x4++){
646 const int index= i4x4 + 4*i8x8 + p*16;
647 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
648 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
655 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
659 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
660 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
662 for(i8x8=0; i8x8<4; i8x8++){
664 if(IS_8x8DCT(mb_type)){
665 int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
667 for(i4x4=0; i4x4<4; i4x4++){
668 const int index= i4x4 + 4*i8x8 + p*16;
669 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
670 h->dequant8_coeff[cqm][qscale], 16) < 0 )
673 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
674 nnz[0] += nnz[1] + nnz[8] + nnz[9];
675 new_cbp |= !!nnz[0] << i8x8;
677 for(i4x4=0; i4x4<4; i4x4++){
678 const int index= i4x4 + 4*i8x8 + p*16;
679 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
680 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
683 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
687 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
688 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
695 int ff_h264_decode_mb_cavlc(H264Context *h){
696 MpegEncContext * const s = &h->s;
699 unsigned int mb_type, cbp;
700 int dct8x8_allowed= h->pps.transform_8x8_mode;
701 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
702 const int pixel_shift = h->pixel_shift;
703 unsigned local_ref_count[2];
705 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
707 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
708 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
710 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
711 if(s->mb_skip_run==-1)
712 s->mb_skip_run= get_ue_golomb(&s->gb);
714 if (s->mb_skip_run--) {
715 if(FRAME_MBAFF && (s->mb_y&1) == 0){
716 if(s->mb_skip_run==0)
717 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
724 if( (s->mb_y&1) == 0 )
725 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
728 h->prev_mb_skipped= 0;
730 mb_type= get_ue_golomb(&s->gb);
731 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
733 partition_count= b_mb_type_info[mb_type].partition_count;
734 mb_type= b_mb_type_info[mb_type].type;
737 goto decode_intra_mb;
739 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
741 partition_count= p_mb_type_info[mb_type].partition_count;
742 mb_type= p_mb_type_info[mb_type].type;
745 goto decode_intra_mb;
748 av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
749 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
753 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
757 cbp= i_mb_type_info[mb_type].cbp;
758 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
759 mb_type= i_mb_type_info[mb_type].type;
763 mb_type |= MB_TYPE_INTERLACED;
765 h->slice_table[ mb_xy ]= h->slice_num;
767 if(IS_INTRA_PCM(mb_type)){
769 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
770 h->sps.bit_depth_luma >> 3;
772 // We assume these blocks are very rare so we do not optimize it.
773 align_get_bits(&s->gb);
775 // The pixels are stored in the same order as levels in h->mb array.
776 for(x=0; x < mb_size; x++){
777 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
780 // In deblocking, the quantizer is 0
781 s->current_picture.f.qscale_table[mb_xy] = 0;
782 // All coeffs are present
783 memset(h->non_zero_count[mb_xy], 16, 48);
785 s->current_picture.f.mb_type[mb_xy] = mb_type;
789 local_ref_count[0] = h->ref_count[0] << MB_MBAFF;
790 local_ref_count[1] = h->ref_count[1] << MB_MBAFF;
792 fill_decode_neighbors(h, mb_type);
793 fill_decode_caches(h, mb_type);
796 if(IS_INTRA(mb_type)){
798 // init_top_left_availability(h);
799 if(IS_INTRA4x4(mb_type)){
802 if(dct8x8_allowed && get_bits1(&s->gb)){
803 mb_type |= MB_TYPE_8x8DCT;
807 // fill_intra4x4_pred_table(h);
808 for(i=0; i<16; i+=di){
809 int mode= pred_intra_mode(h, i);
811 if(!get_bits1(&s->gb)){
812 const int rem_mode= get_bits(&s->gb, 3);
813 mode = rem_mode + (rem_mode >= mode);
817 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
819 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
821 write_back_intra_pred_mode(h);
822 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
825 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
826 if(h->intra16x16_pred_mode < 0)
830 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
833 h->chroma_pred_mode= pred_mode;
835 h->chroma_pred_mode = DC_128_PRED8x8;
837 }else if(partition_count==4){
838 int i, j, sub_partition_count[4], list, ref[2][4];
840 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
842 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
843 if(h->sub_mb_type[i] >=13){
844 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
847 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
848 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
850 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
851 ff_h264_pred_direct_motion(h, &mb_type);
852 h->ref_cache[0][scan8[4]] =
853 h->ref_cache[1][scan8[4]] =
854 h->ref_cache[0][scan8[12]] =
855 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
858 av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
860 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
861 if(h->sub_mb_type[i] >=4){
862 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
865 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
866 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
870 for(list=0; list<h->list_count; list++){
871 int ref_count= IS_REF0(mb_type) ? 1 : local_ref_count[list];
873 if(IS_DIRECT(h->sub_mb_type[i])) continue;
874 if(IS_DIR(h->sub_mb_type[i], 0, list)){
878 }else if(ref_count == 2){
879 tmp= get_bits1(&s->gb)^1;
881 tmp= get_ue_golomb_31(&s->gb);
883 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
896 dct8x8_allowed = get_dct8x8_allowed(h);
898 for(list=0; list<h->list_count; list++){
900 if(IS_DIRECT(h->sub_mb_type[i])) {
901 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
904 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
905 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
907 if(IS_DIR(h->sub_mb_type[i], 0, list)){
908 const int sub_mb_type= h->sub_mb_type[i];
909 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
910 for(j=0; j<sub_partition_count[i]; j++){
912 const int index= 4*i + block_width*j;
913 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
914 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
915 mx += get_se_golomb(&s->gb);
916 my += get_se_golomb(&s->gb);
917 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
919 if(IS_SUB_8X8(sub_mb_type)){
921 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
923 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
924 }else if(IS_SUB_8X4(sub_mb_type)){
925 mv_cache[ 1 ][0]= mx;
926 mv_cache[ 1 ][1]= my;
927 }else if(IS_SUB_4X8(sub_mb_type)){
928 mv_cache[ 8 ][0]= mx;
929 mv_cache[ 8 ][1]= my;
931 mv_cache[ 0 ][0]= mx;
932 mv_cache[ 0 ][1]= my;
935 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
941 }else if(IS_DIRECT(mb_type)){
942 ff_h264_pred_direct_motion(h, &mb_type);
943 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
946 //FIXME we should set ref_idx_l? to 0 if we use that later ...
947 if(IS_16X16(mb_type)){
948 for(list=0; list<h->list_count; list++){
950 if(IS_DIR(mb_type, 0, list)){
951 if(local_ref_count[list]==1){
953 }else if(local_ref_count[list]==2){
954 val= get_bits1(&s->gb)^1;
956 val= get_ue_golomb_31(&s->gb);
957 if(val >= local_ref_count[list]){
958 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
962 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
965 for(list=0; list<h->list_count; list++){
966 if(IS_DIR(mb_type, 0, list)){
967 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
968 mx += get_se_golomb(&s->gb);
969 my += get_se_golomb(&s->gb);
970 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
972 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
976 else if(IS_16X8(mb_type)){
977 for(list=0; list<h->list_count; list++){
980 if(IS_DIR(mb_type, i, list)){
981 if(local_ref_count[list] == 1){
983 }else if(local_ref_count[list] == 2){
984 val= get_bits1(&s->gb)^1;
986 val= get_ue_golomb_31(&s->gb);
987 if(val >= local_ref_count[list]){
988 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
993 val= LIST_NOT_USED&0xFF;
994 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
997 for(list=0; list<h->list_count; list++){
1000 if(IS_DIR(mb_type, i, list)){
1001 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1002 mx += get_se_golomb(&s->gb);
1003 my += get_se_golomb(&s->gb);
1004 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1006 val= pack16to32(mx,my);
1009 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1013 av_assert2(IS_8X16(mb_type));
1014 for(list=0; list<h->list_count; list++){
1017 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1018 if(local_ref_count[list]==1){
1020 }else if(local_ref_count[list]==2){
1021 val= get_bits1(&s->gb)^1;
1023 val= get_ue_golomb_31(&s->gb);
1024 if(val >= local_ref_count[list]){
1025 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1030 val= LIST_NOT_USED&0xFF;
1031 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1034 for(list=0; list<h->list_count; list++){
1037 if(IS_DIR(mb_type, i, list)){
1038 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1039 mx += get_se_golomb(&s->gb);
1040 my += get_se_golomb(&s->gb);
1041 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1043 val= pack16to32(mx,my);
1046 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1052 if(IS_INTER(mb_type))
1053 write_back_motion(h, mb_type);
1055 if(!IS_INTRA16x16(mb_type)){
1056 cbp= get_ue_golomb(&s->gb);
1060 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1063 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1064 else cbp= golomb_to_inter_cbp [cbp];
1067 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1070 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1071 else cbp= golomb_to_inter_cbp_gray[cbp];
1074 if (!decode_chroma && cbp>15) {
1075 av_log(s->avctx, AV_LOG_ERROR, "gray chroma\n");
1076 return AVERROR_INVALIDDATA;
1080 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1081 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1084 h->cbp_table[mb_xy]= cbp;
1085 s->current_picture.f.mb_type[mb_xy] = mb_type;
1087 if(cbp || IS_INTRA16x16(mb_type)){
1088 int i4x4, i8x8, chroma_idx;
1091 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1092 const uint8_t *scan, *scan8x8;
1093 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1095 if(IS_INTERLACED(mb_type)){
1096 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1097 scan= s->qscale ? h->field_scan : h->field_scan_q0;
1099 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1100 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1103 dquant= get_se_golomb(&s->gb);
1105 s->qscale += dquant;
1107 if(((unsigned)s->qscale) > max_qp){
1108 if(s->qscale<0) s->qscale+= max_qp+1;
1109 else s->qscale-= max_qp+1;
1110 if(((unsigned)s->qscale) > max_qp){
1111 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1116 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1117 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1119 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1122 h->cbp_table[mb_xy] |= ret << 12;
1124 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1127 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1131 const int num_c8x8 = h->sps.chroma_format_idc;
1134 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1135 if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1136 CHROMA_DC_BLOCK_INDEX+chroma_idx,
1137 CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1138 NULL, 4*num_c8x8) < 0) {
1144 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1145 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1146 int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1147 for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1148 for (i4x4=0; i4x4<4; i4x4++) {
1149 const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1150 if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1152 mb += 16<<pixel_shift;
1157 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1158 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1162 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1163 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1164 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1166 s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1167 write_back_non_zero_count(h);