2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #define UNCHECKED_BITSTREAM_READER 1
33 #include "mpegvideo.h"
35 #include "h264data.h" // FIXME FIXME FIXME
36 #include "h264_mvpred.h"
42 static const uint8_t golomb_to_inter_cbp_gray[16]={
43 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
46 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
47 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
50 static const uint8_t chroma_dc_coeff_token_len[4*5]={
58 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
66 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
78 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
90 static const uint8_t coeff_token_len[4][4*17]={
93 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
94 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
95 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
96 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
100 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
101 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
102 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
103 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
107 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
108 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
109 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
110 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
114 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
117 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
121 static const uint8_t coeff_token_bits[4][4*17]={
124 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
125 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
126 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
127 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
131 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
132 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
133 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
134 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
138 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
139 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
140 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
141 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
145 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
146 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
147 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
148 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
152 static const uint8_t total_zeros_len[16][16]= {
153 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
154 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
155 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
156 {5,3,4,4,3,3,3,4,3,4,5,5,5},
157 {4,4,4,3,3,3,3,3,4,5,4,5},
158 {6,5,3,3,3,3,3,3,4,3,6},
159 {6,5,3,3,3,2,3,4,3,6},
170 static const uint8_t total_zeros_bits[16][16]= {
171 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
172 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
173 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
174 {3,7,5,4,6,5,4,3,3,2,2,1,0},
175 {5,4,3,7,6,5,4,3,2,1,1,0},
176 {1,1,7,6,5,4,3,2,1,1,0},
177 {1,1,5,4,3,3,2,1,1,0},
188 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
194 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
200 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
201 { 1, 3, 3, 4, 4, 4, 5, 5 },
202 { 3, 2, 3, 3, 3, 3, 3 },
203 { 3, 3, 2, 2, 3, 3 },
210 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
211 { 1, 2, 3, 2, 3, 1, 1, 0 },
212 { 0, 1, 1, 4, 5, 6, 7 },
213 { 0, 1, 1, 2, 6, 7 },
220 static const uint8_t run_len[7][16]={
227 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
230 static const uint8_t run_bits[7][16]={
237 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
240 static VLC coeff_token_vlc[4];
241 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
242 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
244 static VLC chroma_dc_coeff_token_vlc;
245 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
246 static const int chroma_dc_coeff_token_vlc_table_size = 256;
248 static VLC chroma422_dc_coeff_token_vlc;
249 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
250 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
252 static VLC total_zeros_vlc[15];
253 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
254 static const int total_zeros_vlc_tables_size = 512;
256 static VLC chroma_dc_total_zeros_vlc[3];
257 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
258 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
260 static VLC chroma422_dc_total_zeros_vlc[7];
261 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
262 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
264 static VLC run_vlc[6];
265 static VLC_TYPE run_vlc_tables[6][8][2];
266 static const int run_vlc_tables_size = 8;
269 static VLC_TYPE run7_vlc_table[96][2];
270 static const int run7_vlc_table_size = 96;
272 #define LEVEL_TAB_BITS 8
273 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
275 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
276 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
277 #define COEFF_TOKEN_VLC_BITS 8
278 #define TOTAL_ZEROS_VLC_BITS 9
279 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
280 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
281 #define RUN_VLC_BITS 3
282 #define RUN7_VLC_BITS 6
285 * Get the predicted number of non-zero coefficients.
286 * @param n block index
288 static inline int pred_non_zero_count(H264Context *h, int n){
289 const int index8= scan8[n];
290 const int left= h->non_zero_count_cache[index8 - 1];
291 const int top = h->non_zero_count_cache[index8 - 8];
294 if(i<64) i= (i+1)>>1;
296 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
301 static av_cold void init_cavlc_level_tab(void){
305 for(suffix_length=0; suffix_length<7; suffix_length++){
306 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
307 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
309 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310 int level_code = (prefix << suffix_length) +
311 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
312 int mask = -(level_code&1);
313 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314 cavlc_level_tab[suffix_length][i][0]= level_code;
315 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
316 }else if(prefix + 1 <= LEVEL_TAB_BITS){
317 cavlc_level_tab[suffix_length][i][0]= prefix+100;
318 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
320 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
321 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
327 av_cold void ff_h264_decode_init_vlc(void){
335 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
336 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
337 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
338 &chroma_dc_coeff_token_len [0], 1, 1,
339 &chroma_dc_coeff_token_bits[0], 1, 1,
340 INIT_VLC_USE_NEW_STATIC);
342 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
343 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
344 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
345 &chroma422_dc_coeff_token_len [0], 1, 1,
346 &chroma422_dc_coeff_token_bits[0], 1, 1,
347 INIT_VLC_USE_NEW_STATIC);
351 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
352 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
353 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
354 &coeff_token_len [i][0], 1, 1,
355 &coeff_token_bits[i][0], 1, 1,
356 INIT_VLC_USE_NEW_STATIC);
357 offset += coeff_token_vlc_tables_size[i];
360 * This is a one time safety check to make sure that
361 * the packed static coeff_token_vlc table sizes
362 * were initialized correctly.
364 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
367 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
368 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
369 init_vlc(&chroma_dc_total_zeros_vlc[i],
370 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
371 &chroma_dc_total_zeros_len [i][0], 1, 1,
372 &chroma_dc_total_zeros_bits[i][0], 1, 1,
373 INIT_VLC_USE_NEW_STATIC);
377 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
378 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
379 init_vlc(&chroma422_dc_total_zeros_vlc[i],
380 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
381 &chroma422_dc_total_zeros_len [i][0], 1, 1,
382 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383 INIT_VLC_USE_NEW_STATIC);
387 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
388 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
389 init_vlc(&total_zeros_vlc[i],
390 TOTAL_ZEROS_VLC_BITS, 16,
391 &total_zeros_len [i][0], 1, 1,
392 &total_zeros_bits[i][0], 1, 1,
393 INIT_VLC_USE_NEW_STATIC);
397 run_vlc[i].table = run_vlc_tables[i];
398 run_vlc[i].table_allocated = run_vlc_tables_size;
399 init_vlc(&run_vlc[i],
401 &run_len [i][0], 1, 1,
402 &run_bits[i][0], 1, 1,
403 INIT_VLC_USE_NEW_STATIC);
405 run7_vlc.table = run7_vlc_table,
406 run7_vlc.table_allocated = run7_vlc_table_size;
407 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
408 &run_len [6][0], 1, 1,
409 &run_bits[6][0], 1, 1,
410 INIT_VLC_USE_NEW_STATIC);
412 init_cavlc_level_tab();
419 static inline int get_level_prefix(GetBitContext *gb){
424 UPDATE_CACHE(re, gb);
425 buf=GET_CACHE(re, gb);
427 log= 32 - av_log2(buf);
429 print_bin(buf>>(32-log), log);
430 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
433 LAST_SKIP_BITS(re, gb, log);
434 CLOSE_READER(re, gb);
440 * Decode a residual block.
441 * @param n block index
442 * @param scantable scantable
443 * @param max_coeff number of coefficients in the block
444 * @return <0 if an error occurred
446 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
447 MpegEncContext * const s = &h->s;
448 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
450 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
452 //FIXME put trailing_onex into the context
456 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
458 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
459 total_coeff= coeff_token>>2;
461 if(n >= LUMA_DC_BLOCK_INDEX){
462 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
463 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
464 total_coeff= coeff_token>>2;
466 total_coeff= pred_non_zero_count(h, n);
467 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
468 total_coeff= coeff_token>>2;
471 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
473 //FIXME set last_non_zero?
477 if(total_coeff > (unsigned)max_coeff) {
478 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
482 trailing_ones= coeff_token&3;
483 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
484 assert(total_coeff<=16);
486 i = show_bits(gb, 3);
487 skip_bits(gb, trailing_ones);
488 level[0] = 1-((i&4)>>1);
489 level[1] = 1-((i&2) );
490 level[2] = 1-((i&1)<<1);
492 if(trailing_ones<total_coeff) {
494 int suffix_length = total_coeff > 10 & trailing_ones < 3;
495 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
496 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
498 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
499 if(level_code >= 100){
500 prefix= level_code - 100;
501 if(prefix == LEVEL_TAB_BITS)
502 prefix += get_level_prefix(gb);
504 //first coefficient has suffix_length equal to 0 or 1
505 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
507 level_code= (prefix<<1) + get_bits1(gb); //part
509 level_code= prefix; //part
510 }else if(prefix==14){
512 level_code= (prefix<<1) + get_bits1(gb); //part
514 level_code= prefix + get_bits(gb, 4); //part
519 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
522 level_code += (1<<(prefix-3))-4096;
524 level_code += get_bits(gb, prefix-3); //part
527 if(trailing_ones < 3) level_code += 2;
530 mask= -(level_code&1);
531 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
533 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
535 suffix_length = 1 + (level_code + 3U > 6U);
536 level[trailing_ones]= level_code;
539 //remaining coefficients have suffix_length > 0
540 for(i=trailing_ones+1;i<total_coeff;i++) {
541 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
542 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
543 level_code= cavlc_level_tab[suffix_length][bitsi][0];
545 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
546 if(level_code >= 100){
547 prefix= level_code - 100;
548 if(prefix == LEVEL_TAB_BITS){
549 prefix += get_level_prefix(gb);
552 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
554 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
556 level_code += (1<<(prefix-3))-4096;
558 mask= -(level_code&1);
559 level_code= (((2+level_code)>>1) ^ mask) - mask;
561 level[i]= level_code;
562 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
566 if(total_coeff == max_coeff)
569 if (max_coeff <= 8) {
571 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
572 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
574 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
575 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
577 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
581 #define STORE_BLOCK(type) \
582 scantable += zeros_left + total_coeff - 1; \
583 if(n >= LUMA_DC_BLOCK_INDEX){ \
584 ((type*)block)[*scantable] = level[0]; \
585 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
587 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
589 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
590 zeros_left -= run_before; \
591 scantable -= 1 + run_before; \
592 ((type*)block)[*scantable]= level[i]; \
594 for(;i<total_coeff;i++) { \
596 ((type*)block)[*scantable]= level[i]; \
599 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
600 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
602 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
604 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
605 zeros_left -= run_before; \
606 scantable -= 1 + run_before; \
607 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
609 for(;i<total_coeff;i++) { \
611 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
615 if (h->pixel_shift) {
622 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
629 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
631 MpegEncContext * const s = &h->s;
632 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
633 if(IS_INTRA16x16(mb_type)){
634 AV_ZERO128(h->mb_luma_dc[p]+0);
635 AV_ZERO128(h->mb_luma_dc[p]+8);
636 AV_ZERO128(h->mb_luma_dc[p]+16);
637 AV_ZERO128(h->mb_luma_dc[p]+24);
638 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
639 return -1; //FIXME continue if partitioned and other return -1 too
642 assert((cbp&15) == 0 || (cbp&15) == 15);
645 for(i8x8=0; i8x8<4; i8x8++){
646 for(i4x4=0; i4x4<4; i4x4++){
647 const int index= i4x4 + 4*i8x8 + p*16;
648 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
649 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
656 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
660 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
661 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
663 for(i8x8=0; i8x8<4; i8x8++){
665 if(IS_8x8DCT(mb_type)){
666 DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
668 for(i4x4=0; i4x4<4; i4x4++){
669 const int index= i4x4 + 4*i8x8 + p*16;
670 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
671 h->dequant8_coeff[cqm][qscale], 16) < 0 )
674 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
675 nnz[0] += nnz[1] + nnz[8] + nnz[9];
676 new_cbp |= !!nnz[0] << i8x8;
678 for(i4x4=0; i4x4<4; i4x4++){
679 const int index= i4x4 + 4*i8x8 + p*16;
680 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
681 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
684 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
688 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
689 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
696 int ff_h264_decode_mb_cavlc(H264Context *h){
697 MpegEncContext * const s = &h->s;
700 unsigned int mb_type, cbp;
701 int dct8x8_allowed= h->pps.transform_8x8_mode;
702 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
703 const int pixel_shift = h->pixel_shift;
705 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
707 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
708 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
710 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
711 if(s->mb_skip_run==-1)
712 s->mb_skip_run= get_ue_golomb(&s->gb);
714 if (s->mb_skip_run--) {
715 if(FRAME_MBAFF && (s->mb_y&1) == 0){
716 if(s->mb_skip_run==0)
717 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
724 if( (s->mb_y&1) == 0 )
725 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
728 h->prev_mb_skipped= 0;
730 mb_type= get_ue_golomb(&s->gb);
731 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
733 partition_count= b_mb_type_info[mb_type].partition_count;
734 mb_type= b_mb_type_info[mb_type].type;
737 goto decode_intra_mb;
739 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
741 partition_count= p_mb_type_info[mb_type].partition_count;
742 mb_type= p_mb_type_info[mb_type].type;
745 goto decode_intra_mb;
748 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
749 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
753 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
757 cbp= i_mb_type_info[mb_type].cbp;
758 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
759 mb_type= i_mb_type_info[mb_type].type;
763 mb_type |= MB_TYPE_INTERLACED;
765 h->slice_table[ mb_xy ]= h->slice_num;
767 if(IS_INTRA_PCM(mb_type)){
769 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
770 h->sps.bit_depth_luma >> 3;
772 // We assume these blocks are very rare so we do not optimize it.
773 align_get_bits(&s->gb);
775 // The pixels are stored in the same order as levels in h->mb array.
776 for(x=0; x < mb_size; x++){
777 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
780 // In deblocking, the quantizer is 0
781 s->current_picture.f.qscale_table[mb_xy] = 0;
782 // All coeffs are present
783 memset(h->non_zero_count[mb_xy], 16, 48);
785 s->current_picture.f.mb_type[mb_xy] = mb_type;
790 h->ref_count[0] <<= 1;
791 h->ref_count[1] <<= 1;
794 fill_decode_neighbors(h, mb_type);
795 fill_decode_caches(h, mb_type);
798 if(IS_INTRA(mb_type)){
800 // init_top_left_availability(h);
801 if(IS_INTRA4x4(mb_type)){
804 if(dct8x8_allowed && get_bits1(&s->gb)){
805 mb_type |= MB_TYPE_8x8DCT;
809 // fill_intra4x4_pred_table(h);
810 for(i=0; i<16; i+=di){
811 int mode= pred_intra_mode(h, i);
813 if(!get_bits1(&s->gb)){
814 const int rem_mode= get_bits(&s->gb, 3);
815 mode = rem_mode + (rem_mode >= mode);
819 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
821 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
823 write_back_intra_pred_mode(h);
824 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
827 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
828 if(h->intra16x16_pred_mode < 0)
832 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
835 h->chroma_pred_mode= pred_mode;
837 h->chroma_pred_mode = DC_128_PRED8x8;
839 }else if(partition_count==4){
840 int i, j, sub_partition_count[4], list, ref[2][4];
842 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
844 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
845 if(h->sub_mb_type[i] >=13){
846 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
849 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
850 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
852 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
853 ff_h264_pred_direct_motion(h, &mb_type);
854 h->ref_cache[0][scan8[4]] =
855 h->ref_cache[1][scan8[4]] =
856 h->ref_cache[0][scan8[12]] =
857 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
860 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
862 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
863 if(h->sub_mb_type[i] >=4){
864 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
867 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
868 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
872 for(list=0; list<h->list_count; list++){
873 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
875 if(IS_DIRECT(h->sub_mb_type[i])) continue;
876 if(IS_DIR(h->sub_mb_type[i], 0, list)){
880 }else if(ref_count == 2){
881 tmp= get_bits1(&s->gb)^1;
883 tmp= get_ue_golomb_31(&s->gb);
885 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
898 dct8x8_allowed = get_dct8x8_allowed(h);
900 for(list=0; list<h->list_count; list++){
902 if(IS_DIRECT(h->sub_mb_type[i])) {
903 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
906 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
907 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
909 if(IS_DIR(h->sub_mb_type[i], 0, list)){
910 const int sub_mb_type= h->sub_mb_type[i];
911 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
912 for(j=0; j<sub_partition_count[i]; j++){
914 const int index= 4*i + block_width*j;
915 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
916 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
917 mx += get_se_golomb(&s->gb);
918 my += get_se_golomb(&s->gb);
919 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
921 if(IS_SUB_8X8(sub_mb_type)){
923 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
925 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
926 }else if(IS_SUB_8X4(sub_mb_type)){
927 mv_cache[ 1 ][0]= mx;
928 mv_cache[ 1 ][1]= my;
929 }else if(IS_SUB_4X8(sub_mb_type)){
930 mv_cache[ 8 ][0]= mx;
931 mv_cache[ 8 ][1]= my;
933 mv_cache[ 0 ][0]= mx;
934 mv_cache[ 0 ][1]= my;
937 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
943 }else if(IS_DIRECT(mb_type)){
944 ff_h264_pred_direct_motion(h, &mb_type);
945 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
948 //FIXME we should set ref_idx_l? to 0 if we use that later ...
949 if(IS_16X16(mb_type)){
950 for(list=0; list<h->list_count; list++){
952 if(IS_DIR(mb_type, 0, list)){
953 if(h->ref_count[list]==1){
955 }else if(h->ref_count[list]==2){
956 val= get_bits1(&s->gb)^1;
958 val= get_ue_golomb_31(&s->gb);
959 if(val >= h->ref_count[list]){
960 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
964 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
967 for(list=0; list<h->list_count; list++){
968 if(IS_DIR(mb_type, 0, list)){
969 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
970 mx += get_se_golomb(&s->gb);
971 my += get_se_golomb(&s->gb);
972 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
974 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
978 else if(IS_16X8(mb_type)){
979 for(list=0; list<h->list_count; list++){
982 if(IS_DIR(mb_type, i, list)){
983 if(h->ref_count[list] == 1){
985 }else if(h->ref_count[list] == 2){
986 val= get_bits1(&s->gb)^1;
988 val= get_ue_golomb_31(&s->gb);
989 if(val >= h->ref_count[list]){
990 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
995 val= LIST_NOT_USED&0xFF;
996 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
999 for(list=0; list<h->list_count; list++){
1002 if(IS_DIR(mb_type, i, list)){
1003 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1004 mx += get_se_golomb(&s->gb);
1005 my += get_se_golomb(&s->gb);
1006 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1008 val= pack16to32(mx,my);
1011 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1015 assert(IS_8X16(mb_type));
1016 for(list=0; list<h->list_count; list++){
1019 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1020 if(h->ref_count[list]==1){
1022 }else if(h->ref_count[list]==2){
1023 val= get_bits1(&s->gb)^1;
1025 val= get_ue_golomb_31(&s->gb);
1026 if(val >= h->ref_count[list]){
1027 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1032 val= LIST_NOT_USED&0xFF;
1033 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1036 for(list=0; list<h->list_count; list++){
1039 if(IS_DIR(mb_type, i, list)){
1040 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1041 mx += get_se_golomb(&s->gb);
1042 my += get_se_golomb(&s->gb);
1043 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1045 val= pack16to32(mx,my);
1048 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1054 if(IS_INTER(mb_type))
1055 write_back_motion(h, mb_type);
1057 if(!IS_INTRA16x16(mb_type)){
1058 cbp= get_ue_golomb(&s->gb);
1062 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1065 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1066 else cbp= golomb_to_inter_cbp [cbp];
1069 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1072 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1073 else cbp= golomb_to_inter_cbp_gray[cbp];
1077 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1078 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1081 h->cbp_table[mb_xy]= cbp;
1082 s->current_picture.f.mb_type[mb_xy] = mb_type;
1084 if(cbp || IS_INTRA16x16(mb_type)){
1085 int i4x4, i8x8, chroma_idx;
1088 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1089 const uint8_t *scan, *scan8x8;
1090 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1092 if(IS_INTERLACED(mb_type)){
1093 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1094 scan= s->qscale ? h->field_scan : h->field_scan_q0;
1096 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1097 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1100 dquant= get_se_golomb(&s->gb);
1102 s->qscale += dquant;
1104 if(((unsigned)s->qscale) > max_qp){
1105 if(s->qscale<0) s->qscale+= max_qp+1;
1106 else s->qscale-= max_qp+1;
1107 if(((unsigned)s->qscale) > max_qp){
1108 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1113 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1114 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1116 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1119 h->cbp_table[mb_xy] |= ret << 12;
1121 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1124 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1128 const int num_c8x8 = h->sps.chroma_format_idc;
1131 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1132 if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1133 CHROMA_DC_BLOCK_INDEX+chroma_idx,
1134 CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1135 NULL, 4*num_c8x8) < 0) {
1141 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1142 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1143 DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1144 for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1145 for (i4x4=0; i4x4<4; i4x4++) {
1146 const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1147 if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1149 mb += 16<<pixel_shift;
1154 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1155 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1159 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1160 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1161 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1163 s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1164 write_back_non_zero_count(h);
1167 h->ref_count[0] >>= 1;
1168 h->ref_count[1] >>= 1;